ImageFile.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773
  1. #
  2. # The Python Imaging Library.
  3. # $Id$
  4. #
  5. # base class for image file handlers
  6. #
  7. # history:
  8. # 1995-09-09 fl Created
  9. # 1996-03-11 fl Fixed load mechanism.
  10. # 1996-04-15 fl Added pcx/xbm decoders.
  11. # 1996-04-30 fl Added encoders.
  12. # 1996-12-14 fl Added load helpers
  13. # 1997-01-11 fl Use encode_to_file where possible
  14. # 1997-08-27 fl Flush output in _save
  15. # 1998-03-05 fl Use memory mapping for some modes
  16. # 1999-02-04 fl Use memory mapping also for "I;16" and "I;16B"
  17. # 1999-05-31 fl Added image parser
  18. # 2000-10-12 fl Set readonly flag on memory-mapped images
  19. # 2002-03-20 fl Use better messages for common decoder errors
  20. # 2003-04-21 fl Fall back on mmap/map_buffer if map is not available
  21. # 2003-10-30 fl Added StubImageFile class
  22. # 2004-02-25 fl Made incremental parser more robust
  23. #
  24. # Copyright (c) 1997-2004 by Secret Labs AB
  25. # Copyright (c) 1995-2004 by Fredrik Lundh
  26. #
  27. # See the README file for information on usage and redistribution.
  28. #
  29. import io
  30. import itertools
  31. import struct
  32. import sys
  33. from . import Image
  34. from ._util import is_path
  35. MAXBLOCK = 65536
  36. SAFEBLOCK = 1024 * 1024
  37. LOAD_TRUNCATED_IMAGES = False
  38. """Whether or not to load truncated image files. User code may change this."""
  39. ERRORS = {
  40. -1: "image buffer overrun error",
  41. -2: "decoding error",
  42. -3: "unknown error",
  43. -8: "bad configuration",
  44. -9: "out of memory error",
  45. }
  46. """
  47. Dict of known error codes returned from :meth:`.PyDecoder.decode`,
  48. :meth:`.PyEncoder.encode` :meth:`.PyEncoder.encode_to_pyfd` and
  49. :meth:`.PyEncoder.encode_to_file`.
  50. """
  51. #
  52. # --------------------------------------------------------------------
  53. # Helpers
  54. def raise_oserror(error):
  55. try:
  56. msg = Image.core.getcodecstatus(error)
  57. except AttributeError:
  58. msg = ERRORS.get(error)
  59. if not msg:
  60. msg = f"decoder error {error}"
  61. msg += " when reading image file"
  62. raise OSError(msg)
  63. def _tilesort(t):
  64. # sort on offset
  65. return t[2]
  66. #
  67. # --------------------------------------------------------------------
  68. # ImageFile base class
  69. class ImageFile(Image.Image):
  70. """Base class for image file format handlers."""
  71. def __init__(self, fp=None, filename=None):
  72. super().__init__()
  73. self._min_frame = 0
  74. self.custom_mimetype = None
  75. self.tile = None
  76. """ A list of tile descriptors, or ``None`` """
  77. self.readonly = 1 # until we know better
  78. self.decoderconfig = ()
  79. self.decodermaxblock = MAXBLOCK
  80. if is_path(fp):
  81. # filename
  82. self.fp = open(fp, "rb")
  83. self.filename = fp
  84. self._exclusive_fp = True
  85. else:
  86. # stream
  87. self.fp = fp
  88. self.filename = filename
  89. # can be overridden
  90. self._exclusive_fp = None
  91. try:
  92. try:
  93. self._open()
  94. except (
  95. IndexError, # end of data
  96. TypeError, # end of data (ord)
  97. KeyError, # unsupported mode
  98. EOFError, # got header but not the first frame
  99. struct.error,
  100. ) as v:
  101. raise SyntaxError(v) from v
  102. if not self.mode or self.size[0] <= 0 or self.size[1] <= 0:
  103. msg = "not identified by this driver"
  104. raise SyntaxError(msg)
  105. except BaseException:
  106. # close the file only if we have opened it this constructor
  107. if self._exclusive_fp:
  108. self.fp.close()
  109. raise
  110. def get_format_mimetype(self):
  111. if self.custom_mimetype:
  112. return self.custom_mimetype
  113. if self.format is not None:
  114. return Image.MIME.get(self.format.upper())
  115. def __setstate__(self, state):
  116. self.tile = []
  117. super().__setstate__(state)
  118. def verify(self):
  119. """Check file integrity"""
  120. # raise exception if something's wrong. must be called
  121. # directly after open, and closes file when finished.
  122. if self._exclusive_fp:
  123. self.fp.close()
  124. self.fp = None
  125. def load(self):
  126. """Load image data based on tile list"""
  127. if self.tile is None:
  128. msg = "cannot load this image"
  129. raise OSError(msg)
  130. pixel = Image.Image.load(self)
  131. if not self.tile:
  132. return pixel
  133. self.map = None
  134. use_mmap = self.filename and len(self.tile) == 1
  135. # As of pypy 2.1.0, memory mapping was failing here.
  136. use_mmap = use_mmap and not hasattr(sys, "pypy_version_info")
  137. readonly = 0
  138. # look for read/seek overrides
  139. try:
  140. read = self.load_read
  141. # don't use mmap if there are custom read/seek functions
  142. use_mmap = False
  143. except AttributeError:
  144. read = self.fp.read
  145. try:
  146. seek = self.load_seek
  147. use_mmap = False
  148. except AttributeError:
  149. seek = self.fp.seek
  150. if use_mmap:
  151. # try memory mapping
  152. decoder_name, extents, offset, args = self.tile[0]
  153. if (
  154. decoder_name == "raw"
  155. and len(args) >= 3
  156. and args[0] == self.mode
  157. and args[0] in Image._MAPMODES
  158. ):
  159. try:
  160. # use mmap, if possible
  161. import mmap
  162. with open(self.filename) as fp:
  163. self.map = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)
  164. if offset + self.size[1] * args[1] > self.map.size():
  165. # buffer is not large enough
  166. raise OSError
  167. self.im = Image.core.map_buffer(
  168. self.map, self.size, decoder_name, offset, args
  169. )
  170. readonly = 1
  171. # After trashing self.im,
  172. # we might need to reload the palette data.
  173. if self.palette:
  174. self.palette.dirty = 1
  175. except (AttributeError, OSError, ImportError):
  176. self.map = None
  177. self.load_prepare()
  178. err_code = -3 # initialize to unknown error
  179. if not self.map:
  180. # sort tiles in file order
  181. self.tile.sort(key=_tilesort)
  182. try:
  183. # FIXME: This is a hack to handle TIFF's JpegTables tag.
  184. prefix = self.tile_prefix
  185. except AttributeError:
  186. prefix = b""
  187. # Remove consecutive duplicates that only differ by their offset
  188. self.tile = [
  189. list(tiles)[-1]
  190. for _, tiles in itertools.groupby(
  191. self.tile, lambda tile: (tile[0], tile[1], tile[3])
  192. )
  193. ]
  194. for decoder_name, extents, offset, args in self.tile:
  195. seek(offset)
  196. decoder = Image._getdecoder(
  197. self.mode, decoder_name, args, self.decoderconfig
  198. )
  199. try:
  200. decoder.setimage(self.im, extents)
  201. if decoder.pulls_fd:
  202. decoder.setfd(self.fp)
  203. err_code = decoder.decode(b"")[1]
  204. else:
  205. b = prefix
  206. while True:
  207. try:
  208. s = read(self.decodermaxblock)
  209. except (IndexError, struct.error) as e:
  210. # truncated png/gif
  211. if LOAD_TRUNCATED_IMAGES:
  212. break
  213. else:
  214. msg = "image file is truncated"
  215. raise OSError(msg) from e
  216. if not s: # truncated jpeg
  217. if LOAD_TRUNCATED_IMAGES:
  218. break
  219. else:
  220. msg = (
  221. "image file is truncated "
  222. f"({len(b)} bytes not processed)"
  223. )
  224. raise OSError(msg)
  225. b = b + s
  226. n, err_code = decoder.decode(b)
  227. if n < 0:
  228. break
  229. b = b[n:]
  230. finally:
  231. # Need to cleanup here to prevent leaks
  232. decoder.cleanup()
  233. self.tile = []
  234. self.readonly = readonly
  235. self.load_end()
  236. if self._exclusive_fp and self._close_exclusive_fp_after_loading:
  237. self.fp.close()
  238. self.fp = None
  239. if not self.map and not LOAD_TRUNCATED_IMAGES and err_code < 0:
  240. # still raised if decoder fails to return anything
  241. raise_oserror(err_code)
  242. return Image.Image.load(self)
  243. def load_prepare(self):
  244. # create image memory if necessary
  245. if not self.im or self.im.mode != self.mode or self.im.size != self.size:
  246. self.im = Image.core.new(self.mode, self.size)
  247. # create palette (optional)
  248. if self.mode == "P":
  249. Image.Image.load(self)
  250. def load_end(self):
  251. # may be overridden
  252. pass
  253. # may be defined for contained formats
  254. # def load_seek(self, pos):
  255. # pass
  256. # may be defined for blocked formats (e.g. PNG)
  257. # def load_read(self, bytes):
  258. # pass
  259. def _seek_check(self, frame):
  260. if (
  261. frame < self._min_frame
  262. # Only check upper limit on frames if additional seek operations
  263. # are not required to do so
  264. or (
  265. not (hasattr(self, "_n_frames") and self._n_frames is None)
  266. and frame >= self.n_frames + self._min_frame
  267. )
  268. ):
  269. msg = "attempt to seek outside sequence"
  270. raise EOFError(msg)
  271. return self.tell() != frame
  272. class StubImageFile(ImageFile):
  273. """
  274. Base class for stub image loaders.
  275. A stub loader is an image loader that can identify files of a
  276. certain format, but relies on external code to load the file.
  277. """
  278. def _open(self):
  279. msg = "StubImageFile subclass must implement _open"
  280. raise NotImplementedError(msg)
  281. def load(self):
  282. loader = self._load()
  283. if loader is None:
  284. msg = f"cannot find loader for this {self.format} file"
  285. raise OSError(msg)
  286. image = loader.load(self)
  287. assert image is not None
  288. # become the other object (!)
  289. self.__class__ = image.__class__
  290. self.__dict__ = image.__dict__
  291. return image.load()
  292. def _load(self):
  293. """(Hook) Find actual image loader."""
  294. msg = "StubImageFile subclass must implement _load"
  295. raise NotImplementedError(msg)
  296. class Parser:
  297. """
  298. Incremental image parser. This class implements the standard
  299. feed/close consumer interface.
  300. """
  301. incremental = None
  302. image = None
  303. data = None
  304. decoder = None
  305. offset = 0
  306. finished = 0
  307. def reset(self):
  308. """
  309. (Consumer) Reset the parser. Note that you can only call this
  310. method immediately after you've created a parser; parser
  311. instances cannot be reused.
  312. """
  313. assert self.data is None, "cannot reuse parsers"
  314. def feed(self, data):
  315. """
  316. (Consumer) Feed data to the parser.
  317. :param data: A string buffer.
  318. :exception OSError: If the parser failed to parse the image file.
  319. """
  320. # collect data
  321. if self.finished:
  322. return
  323. if self.data is None:
  324. self.data = data
  325. else:
  326. self.data = self.data + data
  327. # parse what we have
  328. if self.decoder:
  329. if self.offset > 0:
  330. # skip header
  331. skip = min(len(self.data), self.offset)
  332. self.data = self.data[skip:]
  333. self.offset = self.offset - skip
  334. if self.offset > 0 or not self.data:
  335. return
  336. n, e = self.decoder.decode(self.data)
  337. if n < 0:
  338. # end of stream
  339. self.data = None
  340. self.finished = 1
  341. if e < 0:
  342. # decoding error
  343. self.image = None
  344. raise_oserror(e)
  345. else:
  346. # end of image
  347. return
  348. self.data = self.data[n:]
  349. elif self.image:
  350. # if we end up here with no decoder, this file cannot
  351. # be incrementally parsed. wait until we've gotten all
  352. # available data
  353. pass
  354. else:
  355. # attempt to open this file
  356. try:
  357. with io.BytesIO(self.data) as fp:
  358. im = Image.open(fp)
  359. except OSError:
  360. # traceback.print_exc()
  361. pass # not enough data
  362. else:
  363. flag = hasattr(im, "load_seek") or hasattr(im, "load_read")
  364. if flag or len(im.tile) != 1:
  365. # custom load code, or multiple tiles
  366. self.decode = None
  367. else:
  368. # initialize decoder
  369. im.load_prepare()
  370. d, e, o, a = im.tile[0]
  371. im.tile = []
  372. self.decoder = Image._getdecoder(im.mode, d, a, im.decoderconfig)
  373. self.decoder.setimage(im.im, e)
  374. # calculate decoder offset
  375. self.offset = o
  376. if self.offset <= len(self.data):
  377. self.data = self.data[self.offset :]
  378. self.offset = 0
  379. self.image = im
  380. def __enter__(self):
  381. return self
  382. def __exit__(self, *args):
  383. self.close()
  384. def close(self):
  385. """
  386. (Consumer) Close the stream.
  387. :returns: An image object.
  388. :exception OSError: If the parser failed to parse the image file either
  389. because it cannot be identified or cannot be
  390. decoded.
  391. """
  392. # finish decoding
  393. if self.decoder:
  394. # get rid of what's left in the buffers
  395. self.feed(b"")
  396. self.data = self.decoder = None
  397. if not self.finished:
  398. msg = "image was incomplete"
  399. raise OSError(msg)
  400. if not self.image:
  401. msg = "cannot parse this image"
  402. raise OSError(msg)
  403. if self.data:
  404. # incremental parsing not possible; reopen the file
  405. # not that we have all data
  406. with io.BytesIO(self.data) as fp:
  407. try:
  408. self.image = Image.open(fp)
  409. finally:
  410. self.image.load()
  411. return self.image
  412. # --------------------------------------------------------------------
  413. def _save(im, fp, tile, bufsize=0):
  414. """Helper to save image based on tile list
  415. :param im: Image object.
  416. :param fp: File object.
  417. :param tile: Tile list.
  418. :param bufsize: Optional buffer size
  419. """
  420. im.load()
  421. if not hasattr(im, "encoderconfig"):
  422. im.encoderconfig = ()
  423. tile.sort(key=_tilesort)
  424. # FIXME: make MAXBLOCK a configuration parameter
  425. # It would be great if we could have the encoder specify what it needs
  426. # But, it would need at least the image size in most cases. RawEncode is
  427. # a tricky case.
  428. bufsize = max(MAXBLOCK, bufsize, im.size[0] * 4) # see RawEncode.c
  429. try:
  430. fh = fp.fileno()
  431. fp.flush()
  432. _encode_tile(im, fp, tile, bufsize, fh)
  433. except (AttributeError, io.UnsupportedOperation) as exc:
  434. _encode_tile(im, fp, tile, bufsize, None, exc)
  435. if hasattr(fp, "flush"):
  436. fp.flush()
  437. def _encode_tile(im, fp, tile, bufsize, fh, exc=None):
  438. for e, b, o, a in tile:
  439. if o > 0:
  440. fp.seek(o)
  441. encoder = Image._getencoder(im.mode, e, a, im.encoderconfig)
  442. try:
  443. encoder.setimage(im.im, b)
  444. if encoder.pushes_fd:
  445. encoder.setfd(fp)
  446. errcode = encoder.encode_to_pyfd()[1]
  447. else:
  448. if exc:
  449. # compress to Python file-compatible object
  450. while True:
  451. errcode, data = encoder.encode(bufsize)[1:]
  452. fp.write(data)
  453. if errcode:
  454. break
  455. else:
  456. # slight speedup: compress to real file object
  457. errcode = encoder.encode_to_file(fh, bufsize)
  458. if errcode < 0:
  459. msg = f"encoder error {errcode} when writing image file"
  460. raise OSError(msg) from exc
  461. finally:
  462. encoder.cleanup()
  463. def _safe_read(fp, size):
  464. """
  465. Reads large blocks in a safe way. Unlike fp.read(n), this function
  466. doesn't trust the user. If the requested size is larger than
  467. SAFEBLOCK, the file is read block by block.
  468. :param fp: File handle. Must implement a <b>read</b> method.
  469. :param size: Number of bytes to read.
  470. :returns: A string containing <i>size</i> bytes of data.
  471. Raises an OSError if the file is truncated and the read cannot be completed
  472. """
  473. if size <= 0:
  474. return b""
  475. if size <= SAFEBLOCK:
  476. data = fp.read(size)
  477. if len(data) < size:
  478. msg = "Truncated File Read"
  479. raise OSError(msg)
  480. return data
  481. data = []
  482. remaining_size = size
  483. while remaining_size > 0:
  484. block = fp.read(min(remaining_size, SAFEBLOCK))
  485. if not block:
  486. break
  487. data.append(block)
  488. remaining_size -= len(block)
  489. if sum(len(d) for d in data) < size:
  490. msg = "Truncated File Read"
  491. raise OSError(msg)
  492. return b"".join(data)
  493. class PyCodecState:
  494. def __init__(self):
  495. self.xsize = 0
  496. self.ysize = 0
  497. self.xoff = 0
  498. self.yoff = 0
  499. def extents(self):
  500. return self.xoff, self.yoff, self.xoff + self.xsize, self.yoff + self.ysize
  501. class PyCodec:
  502. def __init__(self, mode, *args):
  503. self.im = None
  504. self.state = PyCodecState()
  505. self.fd = None
  506. self.mode = mode
  507. self.init(args)
  508. def init(self, args):
  509. """
  510. Override to perform codec specific initialization
  511. :param args: Array of args items from the tile entry
  512. :returns: None
  513. """
  514. self.args = args
  515. def cleanup(self):
  516. """
  517. Override to perform codec specific cleanup
  518. :returns: None
  519. """
  520. pass
  521. def setfd(self, fd):
  522. """
  523. Called from ImageFile to set the Python file-like object
  524. :param fd: A Python file-like object
  525. :returns: None
  526. """
  527. self.fd = fd
  528. def setimage(self, im, extents=None):
  529. """
  530. Called from ImageFile to set the core output image for the codec
  531. :param im: A core image object
  532. :param extents: a 4 tuple of (x0, y0, x1, y1) defining the rectangle
  533. for this tile
  534. :returns: None
  535. """
  536. # following c code
  537. self.im = im
  538. if extents:
  539. (x0, y0, x1, y1) = extents
  540. else:
  541. (x0, y0, x1, y1) = (0, 0, 0, 0)
  542. if x0 == 0 and x1 == 0:
  543. self.state.xsize, self.state.ysize = self.im.size
  544. else:
  545. self.state.xoff = x0
  546. self.state.yoff = y0
  547. self.state.xsize = x1 - x0
  548. self.state.ysize = y1 - y0
  549. if self.state.xsize <= 0 or self.state.ysize <= 0:
  550. msg = "Size cannot be negative"
  551. raise ValueError(msg)
  552. if (
  553. self.state.xsize + self.state.xoff > self.im.size[0]
  554. or self.state.ysize + self.state.yoff > self.im.size[1]
  555. ):
  556. msg = "Tile cannot extend outside image"
  557. raise ValueError(msg)
  558. class PyDecoder(PyCodec):
  559. """
  560. Python implementation of a format decoder. Override this class and
  561. add the decoding logic in the :meth:`decode` method.
  562. See :ref:`Writing Your Own File Codec in Python<file-codecs-py>`
  563. """
  564. _pulls_fd = False
  565. @property
  566. def pulls_fd(self):
  567. return self._pulls_fd
  568. def decode(self, buffer):
  569. """
  570. Override to perform the decoding process.
  571. :param buffer: A bytes object with the data to be decoded.
  572. :returns: A tuple of ``(bytes consumed, errcode)``.
  573. If finished with decoding return -1 for the bytes consumed.
  574. Err codes are from :data:`.ImageFile.ERRORS`.
  575. """
  576. raise NotImplementedError()
  577. def set_as_raw(self, data, rawmode=None):
  578. """
  579. Convenience method to set the internal image from a stream of raw data
  580. :param data: Bytes to be set
  581. :param rawmode: The rawmode to be used for the decoder.
  582. If not specified, it will default to the mode of the image
  583. :returns: None
  584. """
  585. if not rawmode:
  586. rawmode = self.mode
  587. d = Image._getdecoder(self.mode, "raw", rawmode)
  588. d.setimage(self.im, self.state.extents())
  589. s = d.decode(data)
  590. if s[0] >= 0:
  591. msg = "not enough image data"
  592. raise ValueError(msg)
  593. if s[1] != 0:
  594. msg = "cannot decode image data"
  595. raise ValueError(msg)
  596. class PyEncoder(PyCodec):
  597. """
  598. Python implementation of a format encoder. Override this class and
  599. add the decoding logic in the :meth:`encode` method.
  600. See :ref:`Writing Your Own File Codec in Python<file-codecs-py>`
  601. """
  602. _pushes_fd = False
  603. @property
  604. def pushes_fd(self):
  605. return self._pushes_fd
  606. def encode(self, bufsize):
  607. """
  608. Override to perform the encoding process.
  609. :param bufsize: Buffer size.
  610. :returns: A tuple of ``(bytes encoded, errcode, bytes)``.
  611. If finished with encoding return 1 for the error code.
  612. Err codes are from :data:`.ImageFile.ERRORS`.
  613. """
  614. raise NotImplementedError()
  615. def encode_to_pyfd(self):
  616. """
  617. If ``pushes_fd`` is ``True``, then this method will be used,
  618. and ``encode()`` will only be called once.
  619. :returns: A tuple of ``(bytes consumed, errcode)``.
  620. Err codes are from :data:`.ImageFile.ERRORS`.
  621. """
  622. if not self.pushes_fd:
  623. return 0, -8 # bad configuration
  624. bytes_consumed, errcode, data = self.encode(0)
  625. if data:
  626. self.fd.write(data)
  627. return bytes_consumed, errcode
  628. def encode_to_file(self, fh, bufsize):
  629. """
  630. :param fh: File handle.
  631. :param bufsize: Buffer size.
  632. :returns: If finished successfully, return 0.
  633. Otherwise, return an error code. Err codes are from
  634. :data:`.ImageFile.ERRORS`.
  635. """
  636. errcode = 0
  637. while errcode == 0:
  638. status, errcode, buf = self.encode(bufsize)
  639. if status > 0:
  640. fh.write(buf[status:])
  641. return errcode