compare.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. """
  2. Provides a collection of utilities for comparing (image) results.
  3. """
  4. import atexit
  5. import hashlib
  6. import os
  7. from pathlib import Path
  8. import re
  9. import shutil
  10. import subprocess
  11. import sys
  12. from tempfile import TemporaryFile
  13. import numpy as np
  14. import matplotlib as mpl
  15. from matplotlib.testing.exceptions import ImageComparisonFailure
  16. from matplotlib import cbook
  17. __all__ = ['compare_images', 'comparable_formats']
  18. def make_test_filename(fname, purpose):
  19. """
  20. Make a new filename by inserting *purpose* before the file's extension.
  21. """
  22. base, ext = os.path.splitext(fname)
  23. return '%s-%s%s' % (base, purpose, ext)
  24. def get_cache_dir():
  25. cachedir = mpl.get_cachedir()
  26. if cachedir is None:
  27. raise RuntimeError('Could not find a suitable configuration directory')
  28. cache_dir = os.path.join(cachedir, 'test_cache')
  29. try:
  30. Path(cache_dir).mkdir(parents=True, exist_ok=True)
  31. except IOError:
  32. return None
  33. if not os.access(cache_dir, os.W_OK):
  34. return None
  35. return cache_dir
  36. def get_file_hash(path, block_size=2 ** 20):
  37. md5 = hashlib.md5()
  38. with open(path, 'rb') as fd:
  39. while True:
  40. data = fd.read(block_size)
  41. if not data:
  42. break
  43. md5.update(data)
  44. if path.endswith('.pdf'):
  45. md5.update(str(mpl._get_executable_info("gs").version)
  46. .encode('utf-8'))
  47. elif path.endswith('.svg'):
  48. md5.update(str(mpl._get_executable_info("inkscape").version)
  49. .encode('utf-8'))
  50. return md5.hexdigest()
  51. def make_external_conversion_command(cmd):
  52. def convert(old, new):
  53. cmdline = cmd(old, new)
  54. pipe = subprocess.Popen(cmdline, universal_newlines=True,
  55. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  56. stdout, stderr = pipe.communicate()
  57. errcode = pipe.wait()
  58. if not os.path.exists(new) or errcode:
  59. msg = "Conversion command failed:\n%s\n" % ' '.join(cmdline)
  60. if stdout:
  61. msg += "Standard output:\n%s\n" % stdout
  62. if stderr:
  63. msg += "Standard error:\n%s\n" % stderr
  64. raise IOError(msg)
  65. return convert
  66. # Modified from https://bugs.python.org/issue25567.
  67. _find_unsafe_bytes = re.compile(br'[^a-zA-Z0-9_@%+=:,./-]').search
  68. def _shlex_quote_bytes(b):
  69. return (b if _find_unsafe_bytes(b) is None
  70. else b"'" + b.replace(b"'", b"'\"'\"'") + b"'")
  71. class _ConverterError(Exception):
  72. pass
  73. class _Converter:
  74. def __init__(self):
  75. self._proc = None
  76. # Explicitly register deletion from an atexit handler because if we
  77. # wait until the object is GC'd (which occurs later), then some module
  78. # globals (e.g. signal.SIGKILL) has already been set to None, and
  79. # kill() doesn't work anymore...
  80. atexit.register(self.__del__)
  81. def __del__(self):
  82. if self._proc:
  83. self._proc.kill()
  84. self._proc.wait()
  85. for stream in filter(None, [self._proc.stdin,
  86. self._proc.stdout,
  87. self._proc.stderr]):
  88. stream.close()
  89. self._proc = None
  90. def _read_until(self, terminator):
  91. """Read until the prompt is reached."""
  92. buf = bytearray()
  93. while True:
  94. c = self._proc.stdout.read(1)
  95. if not c:
  96. raise _ConverterError
  97. buf.extend(c)
  98. if buf.endswith(terminator):
  99. return bytes(buf[:-len(terminator)])
  100. class _GSConverter(_Converter):
  101. def __call__(self, orig, dest):
  102. if not self._proc:
  103. self._proc = subprocess.Popen(
  104. [mpl._get_executable_info("gs").executable,
  105. "-dNOSAFER", "-dNOPAUSE", "-sDEVICE=png16m"],
  106. # As far as I can see, ghostscript never outputs to stderr.
  107. stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  108. try:
  109. self._read_until(b"\nGS")
  110. except _ConverterError:
  111. raise OSError("Failed to start Ghostscript")
  112. def encode_and_escape(name):
  113. return (os.fsencode(name)
  114. .replace(b"\\", b"\\\\")
  115. .replace(b"(", br"\(")
  116. .replace(b")", br"\)"))
  117. self._proc.stdin.write(
  118. b"<< /OutputFile ("
  119. + encode_and_escape(dest)
  120. + b") >> setpagedevice ("
  121. + encode_and_escape(orig)
  122. + b") run flush\n")
  123. self._proc.stdin.flush()
  124. # GS> if nothing left on the stack; GS<n> if n items left on the stack.
  125. err = self._read_until(b"GS")
  126. stack = self._read_until(b">")
  127. if stack or not os.path.exists(dest):
  128. stack_size = int(stack[1:]) if stack else 0
  129. self._proc.stdin.write(b"pop\n" * stack_size)
  130. # Using the systemencoding should at least get the filenames right.
  131. raise ImageComparisonFailure(
  132. (err + b"GS" + stack + b">")
  133. .decode(sys.getfilesystemencoding(), "replace"))
  134. class _SVGConverter(_Converter):
  135. def __call__(self, orig, dest):
  136. if (not self._proc # First run.
  137. or self._proc.poll() is not None): # Inkscape terminated.
  138. env = os.environ.copy()
  139. # If one passes e.g. a png file to Inkscape, it will try to
  140. # query the user for conversion options via a GUI (even with
  141. # `--without-gui`). Unsetting `DISPLAY` prevents this (and causes
  142. # GTK to crash and Inkscape to terminate, but that'll just be
  143. # reported as a regular exception below).
  144. env.pop("DISPLAY", None) # May already be unset.
  145. # Do not load any user options.
  146. env["INKSCAPE_PROFILE_DIR"] = os.devnull
  147. # Old versions of Inkscape (0.48.3.1, used on Travis as of now)
  148. # seem to sometimes deadlock when stderr is redirected to a pipe,
  149. # so we redirect it to a temporary file instead. This is not
  150. # necessary anymore as of Inkscape 0.92.1.
  151. stderr = TemporaryFile()
  152. self._proc = subprocess.Popen(
  153. ["inkscape", "--without-gui", "--shell"],
  154. stdin=subprocess.PIPE, stdout=subprocess.PIPE,
  155. stderr=stderr, env=env)
  156. # Slight abuse, but makes shutdown handling easier.
  157. self._proc.stderr = stderr
  158. try:
  159. self._read_until(b"\n>")
  160. except _ConverterError:
  161. raise OSError("Failed to start Inkscape in interactive mode")
  162. # Inkscape uses glib's `g_shell_parse_argv`, which has a consistent
  163. # behavior across platforms, so we can just use `shlex.quote`.
  164. orig_b, dest_b = map(_shlex_quote_bytes,
  165. map(os.fsencode, [orig, dest]))
  166. if b"\n" in orig_b or b"\n" in dest_b:
  167. # Who knows whether the current folder name has a newline, or if
  168. # our encoding is even ASCII compatible... Just fall back on the
  169. # slow solution (Inkscape uses `fgets` so it will always stop at a
  170. # newline).
  171. return make_external_conversion_command(lambda old, new: [
  172. 'inkscape', '-z', old, '--export-png', new])(orig, dest)
  173. self._proc.stdin.write(orig_b + b" --export-png=" + dest_b + b"\n")
  174. self._proc.stdin.flush()
  175. try:
  176. self._read_until(b"\n>")
  177. except _ConverterError:
  178. # Inkscape's output is not localized but gtk's is, so the output
  179. # stream probably has a mixed encoding. Using the filesystem
  180. # encoding should at least get the filenames right...
  181. self._stderr.seek(0)
  182. raise ImageComparisonFailure(
  183. self._stderr.read().decode(
  184. sys.getfilesystemencoding(), "replace"))
  185. def _update_converter():
  186. try:
  187. mpl._get_executable_info("gs")
  188. except mpl.ExecutableNotFoundError:
  189. pass
  190. else:
  191. converter['pdf'] = converter['eps'] = _GSConverter()
  192. try:
  193. mpl._get_executable_info("inkscape")
  194. except mpl.ExecutableNotFoundError:
  195. pass
  196. else:
  197. converter['svg'] = _SVGConverter()
  198. #: A dictionary that maps filename extensions to functions which
  199. #: themselves map arguments `old` and `new` (filenames) to a list of strings.
  200. #: The list can then be passed to Popen to convert files with that
  201. #: extension to png format.
  202. converter = {}
  203. _update_converter()
  204. def comparable_formats():
  205. """
  206. Return the list of file formats that `.compare_images` can compare
  207. on this system.
  208. Returns
  209. -------
  210. supported_formats : list of str
  211. E.g. ``['png', 'pdf', 'svg', 'eps']``.
  212. """
  213. return ['png', *converter]
  214. def convert(filename, cache):
  215. """
  216. Convert the named file to png; return the name of the created file.
  217. If *cache* is True, the result of the conversion is cached in
  218. `matplotlib.get_cachedir() + '/test_cache/'`. The caching is based on a
  219. hash of the exact contents of the input file. There is no limit on the
  220. size of the cache, so it may need to be manually cleared periodically.
  221. """
  222. base, extension = os.fspath(filename).rsplit('.', 1)
  223. if extension not in converter:
  224. import pytest
  225. pytest.skip(f"Don't know how to convert {extension} files to png")
  226. newname = base + '_' + extension + '.png'
  227. if not os.path.exists(filename):
  228. raise IOError("'%s' does not exist" % filename)
  229. # Only convert the file if the destination doesn't already exist or
  230. # is out of date.
  231. if (not os.path.exists(newname) or
  232. os.stat(newname).st_mtime < os.stat(filename).st_mtime):
  233. if cache:
  234. cache_dir = get_cache_dir()
  235. else:
  236. cache_dir = None
  237. if cache_dir is not None:
  238. hash_value = get_file_hash(filename)
  239. new_ext = os.path.splitext(newname)[1]
  240. cached_file = os.path.join(cache_dir, hash_value + new_ext)
  241. if os.path.exists(cached_file):
  242. shutil.copyfile(cached_file, newname)
  243. return newname
  244. converter[extension](filename, newname)
  245. if cache_dir is not None:
  246. shutil.copyfile(newname, cached_file)
  247. return newname
  248. def crop_to_same(actual_path, actual_image, expected_path, expected_image):
  249. # clip the images to the same size -- this is useful only when
  250. # comparing eps to pdf
  251. if actual_path[-7:-4] == 'eps' and expected_path[-7:-4] == 'pdf':
  252. aw, ah, ad = actual_image.shape
  253. ew, eh, ed = expected_image.shape
  254. actual_image = actual_image[int(aw / 2 - ew / 2):int(
  255. aw / 2 + ew / 2), int(ah / 2 - eh / 2):int(ah / 2 + eh / 2)]
  256. return actual_image, expected_image
  257. def calculate_rms(expected_image, actual_image):
  258. "Calculate the per-pixel errors, then compute the root mean square error."
  259. if expected_image.shape != actual_image.shape:
  260. raise ImageComparisonFailure(
  261. "Image sizes do not match expected size: {} "
  262. "actual size {}".format(expected_image.shape, actual_image.shape))
  263. # Convert to float to avoid overflowing finite integer types.
  264. return np.sqrt(((expected_image - actual_image).astype(float) ** 2).mean())
  265. def compare_images(expected, actual, tol, in_decorator=False):
  266. """
  267. Compare two "image" files checking differences within a tolerance.
  268. The two given filenames may point to files which are convertible to
  269. PNG via the `.converter` dictionary. The underlying RMS is calculated
  270. with the `.calculate_rms` function.
  271. Parameters
  272. ----------
  273. expected : str
  274. The filename of the expected image.
  275. actual : str
  276. The filename of the actual image.
  277. tol : float
  278. The tolerance (a color value difference, where 255 is the
  279. maximal difference). The test fails if the average pixel
  280. difference is greater than this value.
  281. in_decorator : bool
  282. Determines the output format. If called from image_comparison
  283. decorator, this should be True. (default=False)
  284. Returns
  285. -------
  286. comparison_result : None or dict or str
  287. Return *None* if the images are equal within the given tolerance.
  288. If the images differ, the return value depends on *in_decorator*.
  289. If *in_decorator* is true, a dict with the following entries is
  290. returned:
  291. - *rms*: The RMS of the image difference.
  292. - *expected*: The filename of the expected image.
  293. - *actual*: The filename of the actual image.
  294. - *diff_image*: The filename of the difference image.
  295. - *tol*: The comparison tolerance.
  296. Otherwise, a human-readable multi-line string representation of this
  297. information is returned.
  298. Examples
  299. --------
  300. ::
  301. img1 = "./baseline/plot.png"
  302. img2 = "./output/plot.png"
  303. compare_images(img1, img2, 0.001)
  304. """
  305. from matplotlib import _png
  306. actual = os.fspath(actual)
  307. if not os.path.exists(actual):
  308. raise Exception("Output image %s does not exist." % actual)
  309. if os.stat(actual).st_size == 0:
  310. raise Exception("Output image file %s is empty." % actual)
  311. # Convert the image to png
  312. expected = os.fspath(expected)
  313. if not os.path.exists(expected):
  314. raise IOError('Baseline image %r does not exist.' % expected)
  315. extension = expected.split('.')[-1]
  316. if extension != 'png':
  317. actual = convert(actual, False)
  318. expected = convert(expected, True)
  319. # open the image files and remove the alpha channel (if it exists)
  320. with open(expected, "rb") as expected_file:
  321. expected_image = _png.read_png_int(expected_file)[:, :, :3]
  322. with open(actual, "rb") as actual_file:
  323. actual_image = _png.read_png_int(actual_file)[:, :, :3]
  324. actual_image, expected_image = crop_to_same(
  325. actual, actual_image, expected, expected_image)
  326. diff_image = make_test_filename(actual, 'failed-diff')
  327. if tol <= 0:
  328. if np.array_equal(expected_image, actual_image):
  329. return None
  330. # convert to signed integers, so that the images can be subtracted without
  331. # overflow
  332. expected_image = expected_image.astype(np.int16)
  333. actual_image = actual_image.astype(np.int16)
  334. rms = calculate_rms(expected_image, actual_image)
  335. if rms <= tol:
  336. return None
  337. save_diff_image(expected, actual, diff_image)
  338. results = dict(rms=rms, expected=str(expected),
  339. actual=str(actual), diff=str(diff_image), tol=tol)
  340. if not in_decorator:
  341. # Then the results should be a string suitable for stdout.
  342. template = ['Error: Image files did not match.',
  343. 'RMS Value: {rms}',
  344. 'Expected: \n {expected}',
  345. 'Actual: \n {actual}',
  346. 'Difference:\n {diff}',
  347. 'Tolerance: \n {tol}', ]
  348. results = '\n '.join([line.format(**results) for line in template])
  349. return results
  350. def save_diff_image(expected, actual, output):
  351. '''
  352. Parameters
  353. ----------
  354. expected : str
  355. File path of expected image.
  356. actual : str
  357. File path of actual image.
  358. output : str
  359. File path to save difference image to.
  360. '''
  361. # Drop alpha channels, similarly to compare_images.
  362. from matplotlib import _png
  363. with open(expected, "rb") as expected_file:
  364. expected_image = _png.read_png(expected_file)[..., :3]
  365. with open(actual, "rb") as actual_file:
  366. actual_image = _png.read_png(actual_file)[..., :3]
  367. actual_image, expected_image = crop_to_same(
  368. actual, actual_image, expected, expected_image)
  369. expected_image = np.array(expected_image).astype(float)
  370. actual_image = np.array(actual_image).astype(float)
  371. if expected_image.shape != actual_image.shape:
  372. raise ImageComparisonFailure(
  373. "Image sizes do not match expected size: {} "
  374. "actual size {}".format(expected_image.shape, actual_image.shape))
  375. abs_diff_image = np.abs(expected_image - actual_image)
  376. # expand differences in luminance domain
  377. abs_diff_image *= 255 * 10
  378. save_image_np = np.clip(abs_diff_image, 0, 255).astype(np.uint8)
  379. height, width, depth = save_image_np.shape
  380. # The PDF renderer doesn't produce an alpha channel, but the
  381. # matplotlib PNG writer requires one, so expand the array
  382. if depth == 3:
  383. with_alpha = np.empty((height, width, 4), dtype=np.uint8)
  384. with_alpha[:, :, 0:3] = save_image_np
  385. save_image_np = with_alpha
  386. # Hard-code the alpha channel to fully solid
  387. save_image_np[:, :, 3] = 255
  388. with open(output, "wb") as output_file:
  389. _png.write_png(save_image_np, output_file)