pprint.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638
  1. # Author: Fred L. Drake, Jr.
  2. # fdrake@acm.org
  3. #
  4. # This is a simple little module I wrote to make life easier. I didn't
  5. # see anything quite like it in the library, though I may have overlooked
  6. # something. I wrote this when I was trying to read some heavily nested
  7. # tuples with fairly non-descriptive content. This is modeled very much
  8. # after Lisp/Scheme - style pretty-printing of lists. If you find it
  9. # useful, thank small children who sleep at night.
  10. """Support to pretty-print lists, tuples, & dictionaries recursively.
  11. Very simple, but useful, especially in debugging data structures.
  12. Classes
  13. -------
  14. PrettyPrinter()
  15. Handle pretty-printing operations onto a stream using a configured
  16. set of formatting parameters.
  17. Functions
  18. ---------
  19. pformat()
  20. Format a Python object into a pretty-printed representation.
  21. pprint()
  22. Pretty-print a Python object to a stream [default is sys.stdout].
  23. saferepr()
  24. Generate a 'standard' repr()-like value, but protect against recursive
  25. data structures.
  26. """
  27. import collections as _collections
  28. import re
  29. import sys as _sys
  30. import types as _types
  31. from io import StringIO as _StringIO
  32. __all__ = ["pprint","pformat","isreadable","isrecursive","saferepr",
  33. "PrettyPrinter", "pp"]
  34. def pprint(object, stream=None, indent=1, width=80, depth=None, *,
  35. compact=False, sort_dicts=True):
  36. """Pretty-print a Python object to a stream [default is sys.stdout]."""
  37. printer = PrettyPrinter(
  38. stream=stream, indent=indent, width=width, depth=depth,
  39. compact=compact, sort_dicts=sort_dicts)
  40. printer.pprint(object)
  41. def pformat(object, indent=1, width=80, depth=None, *,
  42. compact=False, sort_dicts=True):
  43. """Format a Python object into a pretty-printed representation."""
  44. return PrettyPrinter(indent=indent, width=width, depth=depth,
  45. compact=compact, sort_dicts=sort_dicts).pformat(object)
  46. def pp(object, *args, sort_dicts=False, **kwargs):
  47. """Pretty-print a Python object"""
  48. pprint(object, *args, sort_dicts=sort_dicts, **kwargs)
  49. def saferepr(object):
  50. """Version of repr() which can handle recursive data structures."""
  51. return _safe_repr(object, {}, None, 0, True)[0]
  52. def isreadable(object):
  53. """Determine if saferepr(object) is readable by eval()."""
  54. return _safe_repr(object, {}, None, 0, True)[1]
  55. def isrecursive(object):
  56. """Determine if object requires a recursive representation."""
  57. return _safe_repr(object, {}, None, 0, True)[2]
  58. class _safe_key:
  59. """Helper function for key functions when sorting unorderable objects.
  60. The wrapped-object will fallback to a Py2.x style comparison for
  61. unorderable types (sorting first comparing the type name and then by
  62. the obj ids). Does not work recursively, so dict.items() must have
  63. _safe_key applied to both the key and the value.
  64. """
  65. __slots__ = ['obj']
  66. def __init__(self, obj):
  67. self.obj = obj
  68. def __lt__(self, other):
  69. try:
  70. return self.obj < other.obj
  71. except TypeError:
  72. return ((str(type(self.obj)), id(self.obj)) < \
  73. (str(type(other.obj)), id(other.obj)))
  74. def _safe_tuple(t):
  75. "Helper function for comparing 2-tuples"
  76. return _safe_key(t[0]), _safe_key(t[1])
  77. class PrettyPrinter:
  78. def __init__(self, indent=1, width=80, depth=None, stream=None, *,
  79. compact=False, sort_dicts=True):
  80. """Handle pretty printing operations onto a stream using a set of
  81. configured parameters.
  82. indent
  83. Number of spaces to indent for each level of nesting.
  84. width
  85. Attempted maximum number of columns in the output.
  86. depth
  87. The maximum depth to print out nested structures.
  88. stream
  89. The desired output stream. If omitted (or false), the standard
  90. output stream available at construction will be used.
  91. compact
  92. If true, several items will be combined in one line.
  93. sort_dicts
  94. If true, dict keys are sorted.
  95. """
  96. indent = int(indent)
  97. width = int(width)
  98. if indent < 0:
  99. raise ValueError('indent must be >= 0')
  100. if depth is not None and depth <= 0:
  101. raise ValueError('depth must be > 0')
  102. if not width:
  103. raise ValueError('width must be != 0')
  104. self._depth = depth
  105. self._indent_per_level = indent
  106. self._width = width
  107. if stream is not None:
  108. self._stream = stream
  109. else:
  110. self._stream = _sys.stdout
  111. self._compact = bool(compact)
  112. self._sort_dicts = sort_dicts
  113. def pprint(self, object):
  114. self._format(object, self._stream, 0, 0, {}, 0)
  115. self._stream.write("\n")
  116. def pformat(self, object):
  117. sio = _StringIO()
  118. self._format(object, sio, 0, 0, {}, 0)
  119. return sio.getvalue()
  120. def isrecursive(self, object):
  121. return self.format(object, {}, 0, 0)[2]
  122. def isreadable(self, object):
  123. s, readable, recursive = self.format(object, {}, 0, 0)
  124. return readable and not recursive
  125. def _format(self, object, stream, indent, allowance, context, level):
  126. objid = id(object)
  127. if objid in context:
  128. stream.write(_recursion(object))
  129. self._recursive = True
  130. self._readable = False
  131. return
  132. rep = self._repr(object, context, level)
  133. max_width = self._width - indent - allowance
  134. if len(rep) > max_width:
  135. p = self._dispatch.get(type(object).__repr__, None)
  136. if p is not None:
  137. context[objid] = 1
  138. p(self, object, stream, indent, allowance, context, level + 1)
  139. del context[objid]
  140. return
  141. elif isinstance(object, dict):
  142. context[objid] = 1
  143. self._pprint_dict(object, stream, indent, allowance,
  144. context, level + 1)
  145. del context[objid]
  146. return
  147. stream.write(rep)
  148. _dispatch = {}
  149. def _pprint_dict(self, object, stream, indent, allowance, context, level):
  150. write = stream.write
  151. write('{')
  152. if self._indent_per_level > 1:
  153. write((self._indent_per_level - 1) * ' ')
  154. length = len(object)
  155. if length:
  156. if self._sort_dicts:
  157. items = sorted(object.items(), key=_safe_tuple)
  158. else:
  159. items = object.items()
  160. self._format_dict_items(items, stream, indent, allowance + 1,
  161. context, level)
  162. write('}')
  163. _dispatch[dict.__repr__] = _pprint_dict
  164. def _pprint_ordered_dict(self, object, stream, indent, allowance, context, level):
  165. if not len(object):
  166. stream.write(repr(object))
  167. return
  168. cls = object.__class__
  169. stream.write(cls.__name__ + '(')
  170. self._format(list(object.items()), stream,
  171. indent + len(cls.__name__) + 1, allowance + 1,
  172. context, level)
  173. stream.write(')')
  174. _dispatch[_collections.OrderedDict.__repr__] = _pprint_ordered_dict
  175. def _pprint_list(self, object, stream, indent, allowance, context, level):
  176. stream.write('[')
  177. self._format_items(object, stream, indent, allowance + 1,
  178. context, level)
  179. stream.write(']')
  180. _dispatch[list.__repr__] = _pprint_list
  181. def _pprint_tuple(self, object, stream, indent, allowance, context, level):
  182. stream.write('(')
  183. endchar = ',)' if len(object) == 1 else ')'
  184. self._format_items(object, stream, indent, allowance + len(endchar),
  185. context, level)
  186. stream.write(endchar)
  187. _dispatch[tuple.__repr__] = _pprint_tuple
  188. def _pprint_set(self, object, stream, indent, allowance, context, level):
  189. if not len(object):
  190. stream.write(repr(object))
  191. return
  192. typ = object.__class__
  193. if typ is set:
  194. stream.write('{')
  195. endchar = '}'
  196. else:
  197. stream.write(typ.__name__ + '({')
  198. endchar = '})'
  199. indent += len(typ.__name__) + 1
  200. object = sorted(object, key=_safe_key)
  201. self._format_items(object, stream, indent, allowance + len(endchar),
  202. context, level)
  203. stream.write(endchar)
  204. _dispatch[set.__repr__] = _pprint_set
  205. _dispatch[frozenset.__repr__] = _pprint_set
  206. def _pprint_str(self, object, stream, indent, allowance, context, level):
  207. write = stream.write
  208. if not len(object):
  209. write(repr(object))
  210. return
  211. chunks = []
  212. lines = object.splitlines(True)
  213. if level == 1:
  214. indent += 1
  215. allowance += 1
  216. max_width1 = max_width = self._width - indent
  217. for i, line in enumerate(lines):
  218. rep = repr(line)
  219. if i == len(lines) - 1:
  220. max_width1 -= allowance
  221. if len(rep) <= max_width1:
  222. chunks.append(rep)
  223. else:
  224. # A list of alternating (non-space, space) strings
  225. parts = re.findall(r'\S*\s*', line)
  226. assert parts
  227. assert not parts[-1]
  228. parts.pop() # drop empty last part
  229. max_width2 = max_width
  230. current = ''
  231. for j, part in enumerate(parts):
  232. candidate = current + part
  233. if j == len(parts) - 1 and i == len(lines) - 1:
  234. max_width2 -= allowance
  235. if len(repr(candidate)) > max_width2:
  236. if current:
  237. chunks.append(repr(current))
  238. current = part
  239. else:
  240. current = candidate
  241. if current:
  242. chunks.append(repr(current))
  243. if len(chunks) == 1:
  244. write(rep)
  245. return
  246. if level == 1:
  247. write('(')
  248. for i, rep in enumerate(chunks):
  249. if i > 0:
  250. write('\n' + ' '*indent)
  251. write(rep)
  252. if level == 1:
  253. write(')')
  254. _dispatch[str.__repr__] = _pprint_str
  255. def _pprint_bytes(self, object, stream, indent, allowance, context, level):
  256. write = stream.write
  257. if len(object) <= 4:
  258. write(repr(object))
  259. return
  260. parens = level == 1
  261. if parens:
  262. indent += 1
  263. allowance += 1
  264. write('(')
  265. delim = ''
  266. for rep in _wrap_bytes_repr(object, self._width - indent, allowance):
  267. write(delim)
  268. write(rep)
  269. if not delim:
  270. delim = '\n' + ' '*indent
  271. if parens:
  272. write(')')
  273. _dispatch[bytes.__repr__] = _pprint_bytes
  274. def _pprint_bytearray(self, object, stream, indent, allowance, context, level):
  275. write = stream.write
  276. write('bytearray(')
  277. self._pprint_bytes(bytes(object), stream, indent + 10,
  278. allowance + 1, context, level + 1)
  279. write(')')
  280. _dispatch[bytearray.__repr__] = _pprint_bytearray
  281. def _pprint_mappingproxy(self, object, stream, indent, allowance, context, level):
  282. stream.write('mappingproxy(')
  283. self._format(object.copy(), stream, indent + 13, allowance + 1,
  284. context, level)
  285. stream.write(')')
  286. _dispatch[_types.MappingProxyType.__repr__] = _pprint_mappingproxy
  287. def _pprint_simplenamespace(self, object, stream, indent, allowance, context, level):
  288. if type(object) is _types.SimpleNamespace:
  289. # The SimpleNamespace repr is "namespace" instead of the class
  290. # name, so we do the same here. For subclasses; use the class name.
  291. cls_name = 'namespace'
  292. else:
  293. cls_name = object.__class__.__name__
  294. indent += len(cls_name) + 1
  295. delimnl = ',\n' + ' ' * indent
  296. items = object.__dict__.items()
  297. last_index = len(items) - 1
  298. stream.write(cls_name + '(')
  299. for i, (key, ent) in enumerate(items):
  300. stream.write(key)
  301. stream.write('=')
  302. last = i == last_index
  303. self._format(ent, stream, indent + len(key) + 1,
  304. allowance if last else 1,
  305. context, level)
  306. if not last:
  307. stream.write(delimnl)
  308. stream.write(')')
  309. _dispatch[_types.SimpleNamespace.__repr__] = _pprint_simplenamespace
  310. def _format_dict_items(self, items, stream, indent, allowance, context,
  311. level):
  312. write = stream.write
  313. indent += self._indent_per_level
  314. delimnl = ',\n' + ' ' * indent
  315. last_index = len(items) - 1
  316. for i, (key, ent) in enumerate(items):
  317. last = i == last_index
  318. rep = self._repr(key, context, level)
  319. write(rep)
  320. write(': ')
  321. self._format(ent, stream, indent + len(rep) + 2,
  322. allowance if last else 1,
  323. context, level)
  324. if not last:
  325. write(delimnl)
  326. def _format_items(self, items, stream, indent, allowance, context, level):
  327. write = stream.write
  328. indent += self._indent_per_level
  329. if self._indent_per_level > 1:
  330. write((self._indent_per_level - 1) * ' ')
  331. delimnl = ',\n' + ' ' * indent
  332. delim = ''
  333. width = max_width = self._width - indent + 1
  334. it = iter(items)
  335. try:
  336. next_ent = next(it)
  337. except StopIteration:
  338. return
  339. last = False
  340. while not last:
  341. ent = next_ent
  342. try:
  343. next_ent = next(it)
  344. except StopIteration:
  345. last = True
  346. max_width -= allowance
  347. width -= allowance
  348. if self._compact:
  349. rep = self._repr(ent, context, level)
  350. w = len(rep) + 2
  351. if width < w:
  352. width = max_width
  353. if delim:
  354. delim = delimnl
  355. if width >= w:
  356. width -= w
  357. write(delim)
  358. delim = ', '
  359. write(rep)
  360. continue
  361. write(delim)
  362. delim = delimnl
  363. self._format(ent, stream, indent,
  364. allowance if last else 1,
  365. context, level)
  366. def _repr(self, object, context, level):
  367. repr, readable, recursive = self.format(object, context.copy(),
  368. self._depth, level)
  369. if not readable:
  370. self._readable = False
  371. if recursive:
  372. self._recursive = True
  373. return repr
  374. def format(self, object, context, maxlevels, level):
  375. """Format object for a specific context, returning a string
  376. and flags indicating whether the representation is 'readable'
  377. and whether the object represents a recursive construct.
  378. """
  379. return _safe_repr(object, context, maxlevels, level, self._sort_dicts)
  380. def _pprint_default_dict(self, object, stream, indent, allowance, context, level):
  381. if not len(object):
  382. stream.write(repr(object))
  383. return
  384. rdf = self._repr(object.default_factory, context, level)
  385. cls = object.__class__
  386. indent += len(cls.__name__) + 1
  387. stream.write('%s(%s,\n%s' % (cls.__name__, rdf, ' ' * indent))
  388. self._pprint_dict(object, stream, indent, allowance + 1, context, level)
  389. stream.write(')')
  390. _dispatch[_collections.defaultdict.__repr__] = _pprint_default_dict
  391. def _pprint_counter(self, object, stream, indent, allowance, context, level):
  392. if not len(object):
  393. stream.write(repr(object))
  394. return
  395. cls = object.__class__
  396. stream.write(cls.__name__ + '({')
  397. if self._indent_per_level > 1:
  398. stream.write((self._indent_per_level - 1) * ' ')
  399. items = object.most_common()
  400. self._format_dict_items(items, stream,
  401. indent + len(cls.__name__) + 1, allowance + 2,
  402. context, level)
  403. stream.write('})')
  404. _dispatch[_collections.Counter.__repr__] = _pprint_counter
  405. def _pprint_chain_map(self, object, stream, indent, allowance, context, level):
  406. if not len(object.maps):
  407. stream.write(repr(object))
  408. return
  409. cls = object.__class__
  410. stream.write(cls.__name__ + '(')
  411. indent += len(cls.__name__) + 1
  412. for i, m in enumerate(object.maps):
  413. if i == len(object.maps) - 1:
  414. self._format(m, stream, indent, allowance + 1, context, level)
  415. stream.write(')')
  416. else:
  417. self._format(m, stream, indent, 1, context, level)
  418. stream.write(',\n' + ' ' * indent)
  419. _dispatch[_collections.ChainMap.__repr__] = _pprint_chain_map
  420. def _pprint_deque(self, object, stream, indent, allowance, context, level):
  421. if not len(object):
  422. stream.write(repr(object))
  423. return
  424. cls = object.__class__
  425. stream.write(cls.__name__ + '(')
  426. indent += len(cls.__name__) + 1
  427. stream.write('[')
  428. if object.maxlen is None:
  429. self._format_items(object, stream, indent, allowance + 2,
  430. context, level)
  431. stream.write('])')
  432. else:
  433. self._format_items(object, stream, indent, 2,
  434. context, level)
  435. rml = self._repr(object.maxlen, context, level)
  436. stream.write('],\n%smaxlen=%s)' % (' ' * indent, rml))
  437. _dispatch[_collections.deque.__repr__] = _pprint_deque
  438. def _pprint_user_dict(self, object, stream, indent, allowance, context, level):
  439. self._format(object.data, stream, indent, allowance, context, level - 1)
  440. _dispatch[_collections.UserDict.__repr__] = _pprint_user_dict
  441. def _pprint_user_list(self, object, stream, indent, allowance, context, level):
  442. self._format(object.data, stream, indent, allowance, context, level - 1)
  443. _dispatch[_collections.UserList.__repr__] = _pprint_user_list
  444. def _pprint_user_string(self, object, stream, indent, allowance, context, level):
  445. self._format(object.data, stream, indent, allowance, context, level - 1)
  446. _dispatch[_collections.UserString.__repr__] = _pprint_user_string
  447. # Return triple (repr_string, isreadable, isrecursive).
  448. def _safe_repr(object, context, maxlevels, level, sort_dicts):
  449. typ = type(object)
  450. if typ in _builtin_scalars:
  451. return repr(object), True, False
  452. r = getattr(typ, "__repr__", None)
  453. if issubclass(typ, dict) and r is dict.__repr__:
  454. if not object:
  455. return "{}", True, False
  456. objid = id(object)
  457. if maxlevels and level >= maxlevels:
  458. return "{...}", False, objid in context
  459. if objid in context:
  460. return _recursion(object), False, True
  461. context[objid] = 1
  462. readable = True
  463. recursive = False
  464. components = []
  465. append = components.append
  466. level += 1
  467. if sort_dicts:
  468. items = sorted(object.items(), key=_safe_tuple)
  469. else:
  470. items = object.items()
  471. for k, v in items:
  472. krepr, kreadable, krecur = _safe_repr(k, context, maxlevels, level, sort_dicts)
  473. vrepr, vreadable, vrecur = _safe_repr(v, context, maxlevels, level, sort_dicts)
  474. append("%s: %s" % (krepr, vrepr))
  475. readable = readable and kreadable and vreadable
  476. if krecur or vrecur:
  477. recursive = True
  478. del context[objid]
  479. return "{%s}" % ", ".join(components), readable, recursive
  480. if (issubclass(typ, list) and r is list.__repr__) or \
  481. (issubclass(typ, tuple) and r is tuple.__repr__):
  482. if issubclass(typ, list):
  483. if not object:
  484. return "[]", True, False
  485. format = "[%s]"
  486. elif len(object) == 1:
  487. format = "(%s,)"
  488. else:
  489. if not object:
  490. return "()", True, False
  491. format = "(%s)"
  492. objid = id(object)
  493. if maxlevels and level >= maxlevels:
  494. return format % "...", False, objid in context
  495. if objid in context:
  496. return _recursion(object), False, True
  497. context[objid] = 1
  498. readable = True
  499. recursive = False
  500. components = []
  501. append = components.append
  502. level += 1
  503. for o in object:
  504. orepr, oreadable, orecur = _safe_repr(o, context, maxlevels, level, sort_dicts)
  505. append(orepr)
  506. if not oreadable:
  507. readable = False
  508. if orecur:
  509. recursive = True
  510. del context[objid]
  511. return format % ", ".join(components), readable, recursive
  512. rep = repr(object)
  513. return rep, (rep and not rep.startswith('<')), False
  514. _builtin_scalars = frozenset({str, bytes, bytearray, int, float, complex,
  515. bool, type(None)})
  516. def _recursion(object):
  517. return ("<Recursion on %s with id=%s>"
  518. % (type(object).__name__, id(object)))
  519. def _perfcheck(object=None):
  520. import time
  521. if object is None:
  522. object = [("string", (1, 2), [3, 4], {5: 6, 7: 8})] * 100000
  523. p = PrettyPrinter()
  524. t1 = time.perf_counter()
  525. _safe_repr(object, {}, None, 0, True)
  526. t2 = time.perf_counter()
  527. p.pformat(object)
  528. t3 = time.perf_counter()
  529. print("_safe_repr:", t2 - t1)
  530. print("pformat:", t3 - t2)
  531. def _wrap_bytes_repr(object, width, allowance):
  532. current = b''
  533. last = len(object) // 4 * 4
  534. for i in range(0, len(object), 4):
  535. part = object[i: i+4]
  536. candidate = current + part
  537. if i == last:
  538. width -= allowance
  539. if len(repr(candidate)) > width:
  540. if current:
  541. yield repr(current)
  542. current = part
  543. else:
  544. current = candidate
  545. if current:
  546. yield repr(current)
  547. if __name__ == "__main__":
  548. _perfcheck()