scanner.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. """
  2. pygments.scanner
  3. ~~~~~~~~~~~~~~~~
  4. This library implements a regex based scanner. Some languages
  5. like Pascal are easy to parse but have some keywords that
  6. depend on the context. Because of this it's impossible to lex
  7. that just by using a regular expression lexer like the
  8. `RegexLexer`.
  9. Have a look at the `DelphiLexer` to get an idea of how to use
  10. this scanner.
  11. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  12. :license: BSD, see LICENSE for details.
  13. """
  14. import re
  15. class EndOfText(RuntimeError):
  16. """
  17. Raise if end of text is reached and the user
  18. tried to call a match function.
  19. """
  20. class Scanner:
  21. """
  22. Simple scanner
  23. All method patterns are regular expression strings (not
  24. compiled expressions!)
  25. """
  26. def __init__(self, text, flags=0):
  27. """
  28. :param text: The text which should be scanned
  29. :param flags: default regular expression flags
  30. """
  31. self.data = text
  32. self.data_length = len(text)
  33. self.start_pos = 0
  34. self.pos = 0
  35. self.flags = flags
  36. self.last = None
  37. self.match = None
  38. self._re_cache = {}
  39. def eos(self):
  40. """`True` if the scanner reached the end of text."""
  41. return self.pos >= self.data_length
  42. eos = property(eos, eos.__doc__)
  43. def check(self, pattern):
  44. """
  45. Apply `pattern` on the current position and return
  46. the match object. (Doesn't touch pos). Use this for
  47. lookahead.
  48. """
  49. if self.eos:
  50. raise EndOfText()
  51. if pattern not in self._re_cache:
  52. self._re_cache[pattern] = re.compile(pattern, self.flags)
  53. return self._re_cache[pattern].match(self.data, self.pos)
  54. def test(self, pattern):
  55. """Apply a pattern on the current position and check
  56. if it patches. Doesn't touch pos.
  57. """
  58. return self.check(pattern) is not None
  59. def scan(self, pattern):
  60. """
  61. Scan the text for the given pattern and update pos/match
  62. and related fields. The return value is a boolen that
  63. indicates if the pattern matched. The matched value is
  64. stored on the instance as ``match``, the last value is
  65. stored as ``last``. ``start_pos`` is the position of the
  66. pointer before the pattern was matched, ``pos`` is the
  67. end position.
  68. """
  69. if self.eos:
  70. raise EndOfText()
  71. if pattern not in self._re_cache:
  72. self._re_cache[pattern] = re.compile(pattern, self.flags)
  73. self.last = self.match
  74. m = self._re_cache[pattern].match(self.data, self.pos)
  75. if m is None:
  76. return False
  77. self.start_pos = m.start()
  78. self.pos = m.end()
  79. self.match = m.group()
  80. return True
  81. def get_char(self):
  82. """Scan exactly one char."""
  83. self.scan('.')
  84. def __repr__(self):
  85. return '<%s %d/%d>' % (
  86. self.__class__.__name__,
  87. self.pos,
  88. self.data_length
  89. )