_compression.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. """Internal classes used by the gzip, lzma and bz2 modules"""
  2. import io
  3. BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size
  4. class BaseStream(io.BufferedIOBase):
  5. """Mode-checking helper functions."""
  6. def _check_not_closed(self):
  7. if self.closed:
  8. raise ValueError("I/O operation on closed file")
  9. def _check_can_read(self):
  10. if not self.readable():
  11. raise io.UnsupportedOperation("File not open for reading")
  12. def _check_can_write(self):
  13. if not self.writable():
  14. raise io.UnsupportedOperation("File not open for writing")
  15. def _check_can_seek(self):
  16. if not self.readable():
  17. raise io.UnsupportedOperation("Seeking is only supported "
  18. "on files open for reading")
  19. if not self.seekable():
  20. raise io.UnsupportedOperation("The underlying file object "
  21. "does not support seeking")
  22. class DecompressReader(io.RawIOBase):
  23. """Adapts the decompressor API to a RawIOBase reader API"""
  24. def readable(self):
  25. return True
  26. def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args):
  27. self._fp = fp
  28. self._eof = False
  29. self._pos = 0 # Current offset in decompressed stream
  30. # Set to size of decompressed stream once it is known, for SEEK_END
  31. self._size = -1
  32. # Save the decompressor factory and arguments.
  33. # If the file contains multiple compressed streams, each
  34. # stream will need a separate decompressor object. A new decompressor
  35. # object is also needed when implementing a backwards seek().
  36. self._decomp_factory = decomp_factory
  37. self._decomp_args = decomp_args
  38. self._decompressor = self._decomp_factory(**self._decomp_args)
  39. # Exception class to catch from decompressor signifying invalid
  40. # trailing data to ignore
  41. self._trailing_error = trailing_error
  42. def close(self):
  43. self._decompressor = None
  44. return super().close()
  45. def seekable(self):
  46. return self._fp.seekable()
  47. def readinto(self, b):
  48. with memoryview(b) as view, view.cast("B") as byte_view:
  49. data = self.read(len(byte_view))
  50. byte_view[:len(data)] = data
  51. return len(data)
  52. def read(self, size=-1):
  53. if size < 0:
  54. return self.readall()
  55. if not size or self._eof:
  56. return b""
  57. data = None # Default if EOF is encountered
  58. # Depending on the input data, our call to the decompressor may not
  59. # return any data. In this case, try again after reading another block.
  60. while True:
  61. if self._decompressor.eof:
  62. rawblock = (self._decompressor.unused_data or
  63. self._fp.read(BUFFER_SIZE))
  64. if not rawblock:
  65. break
  66. # Continue to next stream.
  67. self._decompressor = self._decomp_factory(
  68. **self._decomp_args)
  69. try:
  70. data = self._decompressor.decompress(rawblock, size)
  71. except self._trailing_error:
  72. # Trailing data isn't a valid compressed stream; ignore it.
  73. break
  74. else:
  75. if self._decompressor.needs_input:
  76. rawblock = self._fp.read(BUFFER_SIZE)
  77. if not rawblock:
  78. raise EOFError("Compressed file ended before the "
  79. "end-of-stream marker was reached")
  80. else:
  81. rawblock = b""
  82. data = self._decompressor.decompress(rawblock, size)
  83. if data:
  84. break
  85. if not data:
  86. self._eof = True
  87. self._size = self._pos
  88. return b""
  89. self._pos += len(data)
  90. return data
  91. # Rewind the file to the beginning of the data stream.
  92. def _rewind(self):
  93. self._fp.seek(0)
  94. self._eof = False
  95. self._pos = 0
  96. self._decompressor = self._decomp_factory(**self._decomp_args)
  97. def seek(self, offset, whence=io.SEEK_SET):
  98. # Recalculate offset as an absolute file position.
  99. if whence == io.SEEK_SET:
  100. pass
  101. elif whence == io.SEEK_CUR:
  102. offset = self._pos + offset
  103. elif whence == io.SEEK_END:
  104. # Seeking relative to EOF - we need to know the file's size.
  105. if self._size < 0:
  106. while self.read(io.DEFAULT_BUFFER_SIZE):
  107. pass
  108. offset = self._size + offset
  109. else:
  110. raise ValueError("Invalid value for whence: {}".format(whence))
  111. # Make it so that offset is the number of bytes to skip forward.
  112. if offset < self._pos:
  113. self._rewind()
  114. else:
  115. offset -= self._pos
  116. # Read and discard data until we reach the desired position.
  117. while offset > 0:
  118. data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset))
  119. if not data:
  120. break
  121. offset -= len(data)
  122. return self._pos
  123. def tell(self):
  124. """Return the current file position."""
  125. return self._pos