_compression.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. """Internal classes used by the gzip, lzma and bz2 modules"""
  2. import io
  3. import sys
  4. BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size
  5. class BaseStream(io.BufferedIOBase):
  6. """Mode-checking helper functions."""
  7. def _check_not_closed(self):
  8. if self.closed:
  9. raise ValueError("I/O operation on closed file")
  10. def _check_can_read(self):
  11. if not self.readable():
  12. raise io.UnsupportedOperation("File not open for reading")
  13. def _check_can_write(self):
  14. if not self.writable():
  15. raise io.UnsupportedOperation("File not open for writing")
  16. def _check_can_seek(self):
  17. if not self.readable():
  18. raise io.UnsupportedOperation("Seeking is only supported "
  19. "on files open for reading")
  20. if not self.seekable():
  21. raise io.UnsupportedOperation("The underlying file object "
  22. "does not support seeking")
  23. class DecompressReader(io.RawIOBase):
  24. """Adapts the decompressor API to a RawIOBase reader API"""
  25. def readable(self):
  26. return True
  27. def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args):
  28. self._fp = fp
  29. self._eof = False
  30. self._pos = 0 # Current offset in decompressed stream
  31. # Set to size of decompressed stream once it is known, for SEEK_END
  32. self._size = -1
  33. # Save the decompressor factory and arguments.
  34. # If the file contains multiple compressed streams, each
  35. # stream will need a separate decompressor object. A new decompressor
  36. # object is also needed when implementing a backwards seek().
  37. self._decomp_factory = decomp_factory
  38. self._decomp_args = decomp_args
  39. self._decompressor = self._decomp_factory(**self._decomp_args)
  40. # Exception class to catch from decompressor signifying invalid
  41. # trailing data to ignore
  42. self._trailing_error = trailing_error
  43. def close(self):
  44. self._decompressor = None
  45. return super().close()
  46. def seekable(self):
  47. return self._fp.seekable()
  48. def readinto(self, b):
  49. with memoryview(b) as view, view.cast("B") as byte_view:
  50. data = self.read(len(byte_view))
  51. byte_view[:len(data)] = data
  52. return len(data)
  53. def read(self, size=-1):
  54. if size < 0:
  55. return self.readall()
  56. if not size or self._eof:
  57. return b""
  58. data = None # Default if EOF is encountered
  59. # Depending on the input data, our call to the decompressor may not
  60. # return any data. In this case, try again after reading another block.
  61. while True:
  62. if self._decompressor.eof:
  63. rawblock = (self._decompressor.unused_data or
  64. self._fp.read(BUFFER_SIZE))
  65. if not rawblock:
  66. break
  67. # Continue to next stream.
  68. self._decompressor = self._decomp_factory(
  69. **self._decomp_args)
  70. try:
  71. data = self._decompressor.decompress(rawblock, size)
  72. except self._trailing_error:
  73. # Trailing data isn't a valid compressed stream; ignore it.
  74. break
  75. else:
  76. if self._decompressor.needs_input:
  77. rawblock = self._fp.read(BUFFER_SIZE)
  78. if not rawblock:
  79. raise EOFError("Compressed file ended before the "
  80. "end-of-stream marker was reached")
  81. else:
  82. rawblock = b""
  83. data = self._decompressor.decompress(rawblock, size)
  84. if data:
  85. break
  86. if not data:
  87. self._eof = True
  88. self._size = self._pos
  89. return b""
  90. self._pos += len(data)
  91. return data
  92. def readall(self):
  93. chunks = []
  94. # sys.maxsize means the max length of output buffer is unlimited,
  95. # so that the whole input buffer can be decompressed within one
  96. # .decompress() call.
  97. while data := self.read(sys.maxsize):
  98. chunks.append(data)
  99. return b"".join(chunks)
  100. # Rewind the file to the beginning of the data stream.
  101. def _rewind(self):
  102. self._fp.seek(0)
  103. self._eof = False
  104. self._pos = 0
  105. self._decompressor = self._decomp_factory(**self._decomp_args)
  106. def seek(self, offset, whence=io.SEEK_SET):
  107. # Recalculate offset as an absolute file position.
  108. if whence == io.SEEK_SET:
  109. pass
  110. elif whence == io.SEEK_CUR:
  111. offset = self._pos + offset
  112. elif whence == io.SEEK_END:
  113. # Seeking relative to EOF - we need to know the file's size.
  114. if self._size < 0:
  115. while self.read(io.DEFAULT_BUFFER_SIZE):
  116. pass
  117. offset = self._size + offset
  118. else:
  119. raise ValueError("Invalid value for whence: {}".format(whence))
  120. # Make it so that offset is the number of bytes to skip forward.
  121. if offset < self._pos:
  122. self._rewind()
  123. else:
  124. offset -= self._pos
  125. # Read and discard data until we reach the desired position.
  126. while offset > 0:
  127. data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset))
  128. if not data:
  129. break
  130. offset -= len(data)
  131. return self._pos
  132. def tell(self):
  133. """Return the current file position."""
  134. return self._pos