123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- """
- pygments.lexers.tnt
- ~~~~~~~~~~~~~~~~~~~
- Lexer for Typographic Number Theory.
- :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexer import Lexer
- from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \
- Punctuation, Error
- __all__ = ['TNTLexer']
- class TNTLexer(Lexer):
- """
- Lexer for Typographic Number Theory, as described in the book
- Gödel, Escher, Bach, by Douglas R. Hofstadter,
- or as summarized here:
- https://github.com/Kenny2github/language-tnt/blob/master/README.md#summary-of-tnt
- .. versionadded:: 2.7
- """
- name = 'Typographic Number Theory'
- aliases = ['tnt']
- filenames = ['*.tnt']
- cur = []
- LOGIC = set('⊃→]&∧^|∨Vv')
- OPERATORS = set('+.⋅*')
- VARIABLES = set('abcde')
- PRIMES = set("'′")
- NEGATORS = set('~!')
- QUANTIFIERS = set('AE∀∃')
- NUMBERS = set('0123456789')
- WHITESPACE = set('\t \v\n')
- RULES = re.compile('''(?xi)
- joining | separation | double-tilde | fantasy\\ rule
- | carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment
- | contrapositive | De\\ Morgan | switcheroo
- | specification | generalization | interchange
- | existence | symmetry | transitivity
- | add\\ S | drop\\ S | induction
- | axiom\\ ([1-5]) | premise | push | pop
- ''')
- LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*')
- COMMENT = re.compile(r'\[[^\n\]]+\]')
- def __init__(self, *args, **kwargs):
- Lexer.__init__(self, *args, **kwargs)
- self.cur = []
- def whitespace(self, start, text, required=False):
- """Tokenize whitespace."""
- end = start
- try:
- while text[end] in self.WHITESPACE:
- end += 1
- except IndexError:
- end = len(text)
- if required:
- assert end != start
- if end != start:
- self.cur.append((start, Text, text[start:end]))
- return end
- def variable(self, start, text):
- """Tokenize a variable."""
- assert text[start] in self.VARIABLES
- end = start+1
- while text[end] in self.PRIMES:
- end += 1
- self.cur.append((start, Name.Variable, text[start:end]))
- return end
- def term(self, start, text):
- """Tokenize a term."""
- if text[start] == 'S': # S...S(...) or S...0
- end = start+1
- while text[end] == 'S':
- end += 1
- self.cur.append((start, Number.Integer, text[start:end]))
- return self.term(end, text)
- if text[start] == '0': # the singleton 0
- self.cur.append((start, Number.Integer, text[start]))
- return start+1
- if text[start] in self.VARIABLES: # a''...
- return self.variable(start, text)
- if text[start] == '(': # (...+...)
- self.cur.append((start, Punctuation, text[start]))
- start = self.term(start+1, text)
- assert text[start] in self.OPERATORS
- self.cur.append((start, Operator, text[start]))
- start = self.term(start+1, text)
- assert text[start] == ')'
- self.cur.append((start, Punctuation, text[start]))
- return start+1
- raise AssertionError # no matches
- def formula(self, start, text):
- """Tokenize a formula."""
- if text[start] in self.NEGATORS: # ~<...>
- end = start+1
- while text[end] in self.NEGATORS:
- end += 1
- self.cur.append((start, Operator, text[start:end]))
- return self.formula(end, text)
- if text[start] in self.QUANTIFIERS: # Aa:<...>
- self.cur.append((start, Keyword.Declaration, text[start]))
- start = self.variable(start+1, text)
- assert text[start] == ':'
- self.cur.append((start, Punctuation, text[start]))
- return self.formula(start+1, text)
- if text[start] == '<': # <...&...>
- self.cur.append((start, Punctuation, text[start]))
- start = self.formula(start+1, text)
- assert text[start] in self.LOGIC
- self.cur.append((start, Operator, text[start]))
- start = self.formula(start+1, text)
- assert text[start] == '>'
- self.cur.append((start, Punctuation, text[start]))
- return start+1
- # ...=...
- start = self.term(start, text)
- assert text[start] == '='
- self.cur.append((start, Operator, text[start]))
- start = self.term(start+1, text)
- return start
- def rule(self, start, text):
- """Tokenize a rule."""
- match = self.RULES.match(text, start)
- assert match is not None
- groups = sorted(match.regs[1:]) # exclude whole match
- for group in groups:
- if group[0] >= 0: # this group matched
- self.cur.append((start, Keyword, text[start:group[0]]))
- self.cur.append((group[0], Number.Integer,
- text[group[0]:group[1]]))
- if group[1] != match.end():
- self.cur.append((group[1], Keyword,
- text[group[1]:match.end()]))
- break
- else:
- self.cur.append((start, Keyword, text[start:match.end()]))
- return match.end()
- def lineno(self, start, text):
- """Tokenize a line referral."""
- end = start
- while text[end] not in self.NUMBERS:
- end += 1
- self.cur.append((start, Punctuation, text[start]))
- self.cur.append((start+1, Text, text[start+1:end]))
- start = end
- match = self.LINENOS.match(text, start)
- assert match is not None
- assert text[match.end()] == ')'
- self.cur.append((match.start(), Number.Integer, match.group(0)))
- self.cur.append((match.end(), Punctuation, text[match.end()]))
- return match.end() + 1
- def error_till_line_end(self, start, text):
- """Mark everything from ``start`` to the end of the line as Error."""
- end = start
- try:
- while text[end] != '\n': # there's whitespace in rules
- end += 1
- except IndexError:
- end = len(text)
- if end != start:
- self.cur.append((start, Error, text[start:end]))
- end = self.whitespace(end, text)
- return end
- def get_tokens_unprocessed(self, text):
- """Returns a list of TNT tokens."""
- self.cur = []
- start = end = self.whitespace(0, text)
- while start <= end < len(text):
- try:
- # try line number
- while text[end] in self.NUMBERS:
- end += 1
- if end != start: # actual number present
- self.cur.append((start, Number.Integer, text[start:end]))
- # whitespace is required after a line number
- orig = len(self.cur)
- try:
- start = end = self.whitespace(end, text, True)
- except AssertionError:
- del self.cur[orig:]
- start = end = self.error_till_line_end(end, text)
- continue
- # at this point it could be a comment
- match = self.COMMENT.match(text, start)
- if match is not None:
- self.cur.append((start, Comment, text[start:match.end()]))
- start = end = match.end()
- # anything after the closing bracket is invalid
- start = end = self.error_till_line_end(start, text)
- # do not attempt to process the rest
- continue
- del match
- if text[start] in '[]': # fantasy push or pop
- self.cur.append((start, Keyword, text[start]))
- start += 1
- end += 1
- else:
- # one formula, possibly containing subformulae
- orig = len(self.cur)
- try:
- start = end = self.formula(start, text)
- except AssertionError: # not well-formed
- del self.cur[orig:]
- while text[end] not in self.WHITESPACE:
- end += 1
- self.cur.append((start, Error, text[start:end]))
- start = end
- # skip whitespace after formula
- orig = len(self.cur)
- try:
- start = end = self.whitespace(end, text, True)
- except AssertionError:
- del self.cur[orig:]
- start = end = self.error_till_line_end(start, text)
- continue
- # rule proving this formula a theorem
- orig = len(self.cur)
- try:
- start = end = self.rule(start, text)
- except AssertionError:
- del self.cur[orig:]
- start = end = self.error_till_line_end(start, text)
- continue
- # skip whitespace after rule
- start = end = self.whitespace(end, text)
- # line marker
- if text[start] == '(':
- orig = len(self.cur)
- try:
- start = end = self.lineno(start, text)
- except AssertionError:
- del self.cur[orig:]
- start = end = self.error_till_line_end(start, text)
- continue
- start = end = self.whitespace(start, text)
- except IndexError:
- try:
- del self.cur[orig:]
- except NameError:
- pass # if orig was never defined, fine
- self.error_till_line_end(start, text)
- return self.cur
|