tnt.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. """
  2. pygments.lexers.tnt
  3. ~~~~~~~~~~~~~~~~~~~
  4. Lexer for Typographic Number Theory.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import Lexer
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \
  11. Punctuation, Error
  12. __all__ = ['TNTLexer']
  13. class TNTLexer(Lexer):
  14. """
  15. Lexer for Typographic Number Theory, as described in the book
  16. Gödel, Escher, Bach, by Douglas R. Hofstadter,
  17. or as summarized here:
  18. https://github.com/Kenny2github/language-tnt/blob/master/README.md#summary-of-tnt
  19. .. versionadded:: 2.7
  20. """
  21. name = 'Typographic Number Theory'
  22. aliases = ['tnt']
  23. filenames = ['*.tnt']
  24. cur = []
  25. LOGIC = set('⊃→]&∧^|∨Vv')
  26. OPERATORS = set('+.⋅*')
  27. VARIABLES = set('abcde')
  28. PRIMES = set("'′")
  29. NEGATORS = set('~!')
  30. QUANTIFIERS = set('AE∀∃')
  31. NUMBERS = set('0123456789')
  32. WHITESPACE = set('\t \v\n')
  33. RULES = re.compile('''(?xi)
  34. joining | separation | double-tilde | fantasy\\ rule
  35. | carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment
  36. | contrapositive | De\\ Morgan | switcheroo
  37. | specification | generalization | interchange
  38. | existence | symmetry | transitivity
  39. | add\\ S | drop\\ S | induction
  40. | axiom\\ ([1-5]) | premise | push | pop
  41. ''')
  42. LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*')
  43. COMMENT = re.compile(r'\[[^\n\]]+\]')
  44. def __init__(self, *args, **kwargs):
  45. Lexer.__init__(self, *args, **kwargs)
  46. self.cur = []
  47. def whitespace(self, start, text, required=False):
  48. """Tokenize whitespace."""
  49. end = start
  50. try:
  51. while text[end] in self.WHITESPACE:
  52. end += 1
  53. except IndexError:
  54. end = len(text)
  55. if required:
  56. assert end != start
  57. if end != start:
  58. self.cur.append((start, Text, text[start:end]))
  59. return end
  60. def variable(self, start, text):
  61. """Tokenize a variable."""
  62. assert text[start] in self.VARIABLES
  63. end = start+1
  64. while text[end] in self.PRIMES:
  65. end += 1
  66. self.cur.append((start, Name.Variable, text[start:end]))
  67. return end
  68. def term(self, start, text):
  69. """Tokenize a term."""
  70. if text[start] == 'S': # S...S(...) or S...0
  71. end = start+1
  72. while text[end] == 'S':
  73. end += 1
  74. self.cur.append((start, Number.Integer, text[start:end]))
  75. return self.term(end, text)
  76. if text[start] == '0': # the singleton 0
  77. self.cur.append((start, Number.Integer, text[start]))
  78. return start+1
  79. if text[start] in self.VARIABLES: # a''...
  80. return self.variable(start, text)
  81. if text[start] == '(': # (...+...)
  82. self.cur.append((start, Punctuation, text[start]))
  83. start = self.term(start+1, text)
  84. assert text[start] in self.OPERATORS
  85. self.cur.append((start, Operator, text[start]))
  86. start = self.term(start+1, text)
  87. assert text[start] == ')'
  88. self.cur.append((start, Punctuation, text[start]))
  89. return start+1
  90. raise AssertionError # no matches
  91. def formula(self, start, text):
  92. """Tokenize a formula."""
  93. if text[start] in self.NEGATORS: # ~<...>
  94. end = start+1
  95. while text[end] in self.NEGATORS:
  96. end += 1
  97. self.cur.append((start, Operator, text[start:end]))
  98. return self.formula(end, text)
  99. if text[start] in self.QUANTIFIERS: # Aa:<...>
  100. self.cur.append((start, Keyword.Declaration, text[start]))
  101. start = self.variable(start+1, text)
  102. assert text[start] == ':'
  103. self.cur.append((start, Punctuation, text[start]))
  104. return self.formula(start+1, text)
  105. if text[start] == '<': # <...&...>
  106. self.cur.append((start, Punctuation, text[start]))
  107. start = self.formula(start+1, text)
  108. assert text[start] in self.LOGIC
  109. self.cur.append((start, Operator, text[start]))
  110. start = self.formula(start+1, text)
  111. assert text[start] == '>'
  112. self.cur.append((start, Punctuation, text[start]))
  113. return start+1
  114. # ...=...
  115. start = self.term(start, text)
  116. assert text[start] == '='
  117. self.cur.append((start, Operator, text[start]))
  118. start = self.term(start+1, text)
  119. return start
  120. def rule(self, start, text):
  121. """Tokenize a rule."""
  122. match = self.RULES.match(text, start)
  123. assert match is not None
  124. groups = sorted(match.regs[1:]) # exclude whole match
  125. for group in groups:
  126. if group[0] >= 0: # this group matched
  127. self.cur.append((start, Keyword, text[start:group[0]]))
  128. self.cur.append((group[0], Number.Integer,
  129. text[group[0]:group[1]]))
  130. if group[1] != match.end():
  131. self.cur.append((group[1], Keyword,
  132. text[group[1]:match.end()]))
  133. break
  134. else:
  135. self.cur.append((start, Keyword, text[start:match.end()]))
  136. return match.end()
  137. def lineno(self, start, text):
  138. """Tokenize a line referral."""
  139. end = start
  140. while text[end] not in self.NUMBERS:
  141. end += 1
  142. self.cur.append((start, Punctuation, text[start]))
  143. self.cur.append((start+1, Text, text[start+1:end]))
  144. start = end
  145. match = self.LINENOS.match(text, start)
  146. assert match is not None
  147. assert text[match.end()] == ')'
  148. self.cur.append((match.start(), Number.Integer, match.group(0)))
  149. self.cur.append((match.end(), Punctuation, text[match.end()]))
  150. return match.end() + 1
  151. def error_till_line_end(self, start, text):
  152. """Mark everything from ``start`` to the end of the line as Error."""
  153. end = start
  154. try:
  155. while text[end] != '\n': # there's whitespace in rules
  156. end += 1
  157. except IndexError:
  158. end = len(text)
  159. if end != start:
  160. self.cur.append((start, Error, text[start:end]))
  161. end = self.whitespace(end, text)
  162. return end
  163. def get_tokens_unprocessed(self, text):
  164. """Returns a list of TNT tokens."""
  165. self.cur = []
  166. start = end = self.whitespace(0, text)
  167. while start <= end < len(text):
  168. try:
  169. # try line number
  170. while text[end] in self.NUMBERS:
  171. end += 1
  172. if end != start: # actual number present
  173. self.cur.append((start, Number.Integer, text[start:end]))
  174. # whitespace is required after a line number
  175. orig = len(self.cur)
  176. try:
  177. start = end = self.whitespace(end, text, True)
  178. except AssertionError:
  179. del self.cur[orig:]
  180. start = end = self.error_till_line_end(end, text)
  181. continue
  182. # at this point it could be a comment
  183. match = self.COMMENT.match(text, start)
  184. if match is not None:
  185. self.cur.append((start, Comment, text[start:match.end()]))
  186. start = end = match.end()
  187. # anything after the closing bracket is invalid
  188. start = end = self.error_till_line_end(start, text)
  189. # do not attempt to process the rest
  190. continue
  191. del match
  192. if text[start] in '[]': # fantasy push or pop
  193. self.cur.append((start, Keyword, text[start]))
  194. start += 1
  195. end += 1
  196. else:
  197. # one formula, possibly containing subformulae
  198. orig = len(self.cur)
  199. try:
  200. start = end = self.formula(start, text)
  201. except AssertionError: # not well-formed
  202. del self.cur[orig:]
  203. while text[end] not in self.WHITESPACE:
  204. end += 1
  205. self.cur.append((start, Error, text[start:end]))
  206. start = end
  207. # skip whitespace after formula
  208. orig = len(self.cur)
  209. try:
  210. start = end = self.whitespace(end, text, True)
  211. except AssertionError:
  212. del self.cur[orig:]
  213. start = end = self.error_till_line_end(start, text)
  214. continue
  215. # rule proving this formula a theorem
  216. orig = len(self.cur)
  217. try:
  218. start = end = self.rule(start, text)
  219. except AssertionError:
  220. del self.cur[orig:]
  221. start = end = self.error_till_line_end(start, text)
  222. continue
  223. # skip whitespace after rule
  224. start = end = self.whitespace(end, text)
  225. # line marker
  226. if text[start] == '(':
  227. orig = len(self.cur)
  228. try:
  229. start = end = self.lineno(start, text)
  230. except AssertionError:
  231. del self.cur[orig:]
  232. start = end = self.error_till_line_end(start, text)
  233. continue
  234. start = end = self.whitespace(start, text)
  235. except IndexError:
  236. try:
  237. del self.cur[orig:]
  238. except NameError:
  239. pass # if orig was never defined, fine
  240. self.error_till_line_end(start, text)
  241. return self.cur