formatter.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. """Generic output formatting.
  2. Formatter objects transform an abstract flow of formatting events into
  3. specific output events on writer objects. Formatters manage several stack
  4. structures to allow various properties of a writer object to be changed and
  5. restored; writers need not be able to handle relative changes nor any sort
  6. of ``change back'' operation. Specific writer properties which may be
  7. controlled via formatter objects are horizontal alignment, font, and left
  8. margin indentations. A mechanism is provided which supports providing
  9. arbitrary, non-exclusive style settings to a writer as well. Additional
  10. interfaces facilitate formatting events which are not reversible, such as
  11. paragraph separation.
  12. Writer objects encapsulate device interfaces. Abstract devices, such as
  13. file formats, are supported as well as physical devices. The provided
  14. implementations all work with abstract devices. The interface makes
  15. available mechanisms for setting the properties which formatter objects
  16. manage and inserting data into the output.
  17. """
  18. import sys
  19. import warnings
  20. warnings.warn('the formatter module is deprecated', DeprecationWarning,
  21. stacklevel=2)
  22. AS_IS = None
  23. class NullFormatter:
  24. """A formatter which does nothing.
  25. If the writer parameter is omitted, a NullWriter instance is created.
  26. No methods of the writer are called by NullFormatter instances.
  27. Implementations should inherit from this class if implementing a writer
  28. interface but don't need to inherit any implementation.
  29. """
  30. def __init__(self, writer=None):
  31. if writer is None:
  32. writer = NullWriter()
  33. self.writer = writer
  34. def end_paragraph(self, blankline): pass
  35. def add_line_break(self): pass
  36. def add_hor_rule(self, *args, **kw): pass
  37. def add_label_data(self, format, counter, blankline=None): pass
  38. def add_flowing_data(self, data): pass
  39. def add_literal_data(self, data): pass
  40. def flush_softspace(self): pass
  41. def push_alignment(self, align): pass
  42. def pop_alignment(self): pass
  43. def push_font(self, x): pass
  44. def pop_font(self): pass
  45. def push_margin(self, margin): pass
  46. def pop_margin(self): pass
  47. def set_spacing(self, spacing): pass
  48. def push_style(self, *styles): pass
  49. def pop_style(self, n=1): pass
  50. def assert_line_data(self, flag=1): pass
  51. class AbstractFormatter:
  52. """The standard formatter.
  53. This implementation has demonstrated wide applicability to many writers,
  54. and may be used directly in most circumstances. It has been used to
  55. implement a full-featured World Wide Web browser.
  56. """
  57. # Space handling policy: blank spaces at the boundary between elements
  58. # are handled by the outermost context. "Literal" data is not checked
  59. # to determine context, so spaces in literal data are handled directly
  60. # in all circumstances.
  61. def __init__(self, writer):
  62. self.writer = writer # Output device
  63. self.align = None # Current alignment
  64. self.align_stack = [] # Alignment stack
  65. self.font_stack = [] # Font state
  66. self.margin_stack = [] # Margin state
  67. self.spacing = None # Vertical spacing state
  68. self.style_stack = [] # Other state, e.g. color
  69. self.nospace = 1 # Should leading space be suppressed
  70. self.softspace = 0 # Should a space be inserted
  71. self.para_end = 1 # Just ended a paragraph
  72. self.parskip = 0 # Skipped space between paragraphs?
  73. self.hard_break = 1 # Have a hard break
  74. self.have_label = 0
  75. def end_paragraph(self, blankline):
  76. if not self.hard_break:
  77. self.writer.send_line_break()
  78. self.have_label = 0
  79. if self.parskip < blankline and not self.have_label:
  80. self.writer.send_paragraph(blankline - self.parskip)
  81. self.parskip = blankline
  82. self.have_label = 0
  83. self.hard_break = self.nospace = self.para_end = 1
  84. self.softspace = 0
  85. def add_line_break(self):
  86. if not (self.hard_break or self.para_end):
  87. self.writer.send_line_break()
  88. self.have_label = self.parskip = 0
  89. self.hard_break = self.nospace = 1
  90. self.softspace = 0
  91. def add_hor_rule(self, *args, **kw):
  92. if not self.hard_break:
  93. self.writer.send_line_break()
  94. self.writer.send_hor_rule(*args, **kw)
  95. self.hard_break = self.nospace = 1
  96. self.have_label = self.para_end = self.softspace = self.parskip = 0
  97. def add_label_data(self, format, counter, blankline = None):
  98. if self.have_label or not self.hard_break:
  99. self.writer.send_line_break()
  100. if not self.para_end:
  101. self.writer.send_paragraph((blankline and 1) or 0)
  102. if isinstance(format, str):
  103. self.writer.send_label_data(self.format_counter(format, counter))
  104. else:
  105. self.writer.send_label_data(format)
  106. self.nospace = self.have_label = self.hard_break = self.para_end = 1
  107. self.softspace = self.parskip = 0
  108. def format_counter(self, format, counter):
  109. label = ''
  110. for c in format:
  111. if c == '1':
  112. label = label + ('%d' % counter)
  113. elif c in 'aA':
  114. if counter > 0:
  115. label = label + self.format_letter(c, counter)
  116. elif c in 'iI':
  117. if counter > 0:
  118. label = label + self.format_roman(c, counter)
  119. else:
  120. label = label + c
  121. return label
  122. def format_letter(self, case, counter):
  123. label = ''
  124. while counter > 0:
  125. counter, x = divmod(counter-1, 26)
  126. # This makes a strong assumption that lowercase letters
  127. # and uppercase letters form two contiguous blocks, with
  128. # letters in order!
  129. s = chr(ord(case) + x)
  130. label = s + label
  131. return label
  132. def format_roman(self, case, counter):
  133. ones = ['i', 'x', 'c', 'm']
  134. fives = ['v', 'l', 'd']
  135. label, index = '', 0
  136. # This will die of IndexError when counter is too big
  137. while counter > 0:
  138. counter, x = divmod(counter, 10)
  139. if x == 9:
  140. label = ones[index] + ones[index+1] + label
  141. elif x == 4:
  142. label = ones[index] + fives[index] + label
  143. else:
  144. if x >= 5:
  145. s = fives[index]
  146. x = x-5
  147. else:
  148. s = ''
  149. s = s + ones[index]*x
  150. label = s + label
  151. index = index + 1
  152. if case == 'I':
  153. return label.upper()
  154. return label
  155. def add_flowing_data(self, data):
  156. if not data: return
  157. prespace = data[:1].isspace()
  158. postspace = data[-1:].isspace()
  159. data = " ".join(data.split())
  160. if self.nospace and not data:
  161. return
  162. elif prespace or self.softspace:
  163. if not data:
  164. if not self.nospace:
  165. self.softspace = 1
  166. self.parskip = 0
  167. return
  168. if not self.nospace:
  169. data = ' ' + data
  170. self.hard_break = self.nospace = self.para_end = \
  171. self.parskip = self.have_label = 0
  172. self.softspace = postspace
  173. self.writer.send_flowing_data(data)
  174. def add_literal_data(self, data):
  175. if not data: return
  176. if self.softspace:
  177. self.writer.send_flowing_data(" ")
  178. self.hard_break = data[-1:] == '\n'
  179. self.nospace = self.para_end = self.softspace = \
  180. self.parskip = self.have_label = 0
  181. self.writer.send_literal_data(data)
  182. def flush_softspace(self):
  183. if self.softspace:
  184. self.hard_break = self.para_end = self.parskip = \
  185. self.have_label = self.softspace = 0
  186. self.nospace = 1
  187. self.writer.send_flowing_data(' ')
  188. def push_alignment(self, align):
  189. if align and align != self.align:
  190. self.writer.new_alignment(align)
  191. self.align = align
  192. self.align_stack.append(align)
  193. else:
  194. self.align_stack.append(self.align)
  195. def pop_alignment(self):
  196. if self.align_stack:
  197. del self.align_stack[-1]
  198. if self.align_stack:
  199. self.align = align = self.align_stack[-1]
  200. self.writer.new_alignment(align)
  201. else:
  202. self.align = None
  203. self.writer.new_alignment(None)
  204. def push_font(self, font):
  205. size, i, b, tt = font
  206. if self.softspace:
  207. self.hard_break = self.para_end = self.softspace = 0
  208. self.nospace = 1
  209. self.writer.send_flowing_data(' ')
  210. if self.font_stack:
  211. csize, ci, cb, ctt = self.font_stack[-1]
  212. if size is AS_IS: size = csize
  213. if i is AS_IS: i = ci
  214. if b is AS_IS: b = cb
  215. if tt is AS_IS: tt = ctt
  216. font = (size, i, b, tt)
  217. self.font_stack.append(font)
  218. self.writer.new_font(font)
  219. def pop_font(self):
  220. if self.font_stack:
  221. del self.font_stack[-1]
  222. if self.font_stack:
  223. font = self.font_stack[-1]
  224. else:
  225. font = None
  226. self.writer.new_font(font)
  227. def push_margin(self, margin):
  228. self.margin_stack.append(margin)
  229. fstack = [m for m in self.margin_stack if m]
  230. if not margin and fstack:
  231. margin = fstack[-1]
  232. self.writer.new_margin(margin, len(fstack))
  233. def pop_margin(self):
  234. if self.margin_stack:
  235. del self.margin_stack[-1]
  236. fstack = [m for m in self.margin_stack if m]
  237. if fstack:
  238. margin = fstack[-1]
  239. else:
  240. margin = None
  241. self.writer.new_margin(margin, len(fstack))
  242. def set_spacing(self, spacing):
  243. self.spacing = spacing
  244. self.writer.new_spacing(spacing)
  245. def push_style(self, *styles):
  246. if self.softspace:
  247. self.hard_break = self.para_end = self.softspace = 0
  248. self.nospace = 1
  249. self.writer.send_flowing_data(' ')
  250. for style in styles:
  251. self.style_stack.append(style)
  252. self.writer.new_styles(tuple(self.style_stack))
  253. def pop_style(self, n=1):
  254. del self.style_stack[-n:]
  255. self.writer.new_styles(tuple(self.style_stack))
  256. def assert_line_data(self, flag=1):
  257. self.nospace = self.hard_break = not flag
  258. self.para_end = self.parskip = self.have_label = 0
  259. class NullWriter:
  260. """Minimal writer interface to use in testing & inheritance.
  261. A writer which only provides the interface definition; no actions are
  262. taken on any methods. This should be the base class for all writers
  263. which do not need to inherit any implementation methods.
  264. """
  265. def __init__(self): pass
  266. def flush(self): pass
  267. def new_alignment(self, align): pass
  268. def new_font(self, font): pass
  269. def new_margin(self, margin, level): pass
  270. def new_spacing(self, spacing): pass
  271. def new_styles(self, styles): pass
  272. def send_paragraph(self, blankline): pass
  273. def send_line_break(self): pass
  274. def send_hor_rule(self, *args, **kw): pass
  275. def send_label_data(self, data): pass
  276. def send_flowing_data(self, data): pass
  277. def send_literal_data(self, data): pass
  278. class AbstractWriter(NullWriter):
  279. """A writer which can be used in debugging formatters, but not much else.
  280. Each method simply announces itself by printing its name and
  281. arguments on standard output.
  282. """
  283. def new_alignment(self, align):
  284. print("new_alignment(%r)" % (align,))
  285. def new_font(self, font):
  286. print("new_font(%r)" % (font,))
  287. def new_margin(self, margin, level):
  288. print("new_margin(%r, %d)" % (margin, level))
  289. def new_spacing(self, spacing):
  290. print("new_spacing(%r)" % (spacing,))
  291. def new_styles(self, styles):
  292. print("new_styles(%r)" % (styles,))
  293. def send_paragraph(self, blankline):
  294. print("send_paragraph(%r)" % (blankline,))
  295. def send_line_break(self):
  296. print("send_line_break()")
  297. def send_hor_rule(self, *args, **kw):
  298. print("send_hor_rule()")
  299. def send_label_data(self, data):
  300. print("send_label_data(%r)" % (data,))
  301. def send_flowing_data(self, data):
  302. print("send_flowing_data(%r)" % (data,))
  303. def send_literal_data(self, data):
  304. print("send_literal_data(%r)" % (data,))
  305. class DumbWriter(NullWriter):
  306. """Simple writer class which writes output on the file object passed in
  307. as the file parameter or, if file is omitted, on standard output. The
  308. output is simply word-wrapped to the number of columns specified by
  309. the maxcol parameter. This class is suitable for reflowing a sequence
  310. of paragraphs.
  311. """
  312. def __init__(self, file=None, maxcol=72):
  313. self.file = file or sys.stdout
  314. self.maxcol = maxcol
  315. NullWriter.__init__(self)
  316. self.reset()
  317. def reset(self):
  318. self.col = 0
  319. self.atbreak = 0
  320. def send_paragraph(self, blankline):
  321. self.file.write('\n'*blankline)
  322. self.col = 0
  323. self.atbreak = 0
  324. def send_line_break(self):
  325. self.file.write('\n')
  326. self.col = 0
  327. self.atbreak = 0
  328. def send_hor_rule(self, *args, **kw):
  329. self.file.write('\n')
  330. self.file.write('-'*self.maxcol)
  331. self.file.write('\n')
  332. self.col = 0
  333. self.atbreak = 0
  334. def send_literal_data(self, data):
  335. self.file.write(data)
  336. i = data.rfind('\n')
  337. if i >= 0:
  338. self.col = 0
  339. data = data[i+1:]
  340. data = data.expandtabs()
  341. self.col = self.col + len(data)
  342. self.atbreak = 0
  343. def send_flowing_data(self, data):
  344. if not data: return
  345. atbreak = self.atbreak or data[0].isspace()
  346. col = self.col
  347. maxcol = self.maxcol
  348. write = self.file.write
  349. for word in data.split():
  350. if atbreak:
  351. if col + len(word) >= maxcol:
  352. write('\n')
  353. col = 0
  354. else:
  355. write(' ')
  356. col = col + 1
  357. write(word)
  358. col = col + len(word)
  359. atbreak = 1
  360. self.col = col
  361. self.atbreak = data[-1].isspace()
  362. def test(file = None):
  363. w = DumbWriter()
  364. f = AbstractFormatter(w)
  365. if file is not None:
  366. fp = open(file)
  367. elif sys.argv[1:]:
  368. fp = open(sys.argv[1])
  369. else:
  370. fp = sys.stdin
  371. try:
  372. for line in fp:
  373. if line == '\n':
  374. f.end_paragraph(1)
  375. else:
  376. f.add_flowing_data(line)
  377. finally:
  378. if fp is not sys.stdin:
  379. fp.close()
  380. f.end_paragraph(0)
  381. if __name__ == '__main__':
  382. test()