123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- """
- pygments.scanner
- ~~~~~~~~~~~~~~~~
- This library implements a regex based scanner. Some languages
- like Pascal are easy to parse but have some keywords that
- depend on the context. Because of this it's impossible to lex
- that just by using a regular expression lexer like the
- `RegexLexer`.
- Have a look at the `DelphiLexer` to get an idea of how to use
- this scanner.
- :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- class EndOfText(RuntimeError):
- """
- Raise if end of text is reached and the user
- tried to call a match function.
- """
- class Scanner:
- """
- Simple scanner
- All method patterns are regular expression strings (not
- compiled expressions!)
- """
- def __init__(self, text, flags=0):
- """
- :param text: The text which should be scanned
- :param flags: default regular expression flags
- """
- self.data = text
- self.data_length = len(text)
- self.start_pos = 0
- self.pos = 0
- self.flags = flags
- self.last = None
- self.match = None
- self._re_cache = {}
- def eos(self):
- """`True` if the scanner reached the end of text."""
- return self.pos >= self.data_length
- eos = property(eos, eos.__doc__)
- def check(self, pattern):
- """
- Apply `pattern` on the current position and return
- the match object. (Doesn't touch pos). Use this for
- lookahead.
- """
- if self.eos:
- raise EndOfText()
- if pattern not in self._re_cache:
- self._re_cache[pattern] = re.compile(pattern, self.flags)
- return self._re_cache[pattern].match(self.data, self.pos)
- def test(self, pattern):
- """Apply a pattern on the current position and check
- if it patches. Doesn't touch pos.
- """
- return self.check(pattern) is not None
- def scan(self, pattern):
- """
- Scan the text for the given pattern and update pos/match
- and related fields. The return value is a boolen that
- indicates if the pattern matched. The matched value is
- stored on the instance as ``match``, the last value is
- stored as ``last``. ``start_pos`` is the position of the
- pointer before the pattern was matched, ``pos`` is the
- end position.
- """
- if self.eos:
- raise EndOfText()
- if pattern not in self._re_cache:
- self._re_cache[pattern] = re.compile(pattern, self.flags)
- self.last = self.match
- m = self._re_cache[pattern].match(self.data, self.pos)
- if m is None:
- return False
- self.start_pos = m.start()
- self.pos = m.end()
- self.match = m.group()
- return True
- def get_char(self):
- """Scan exactly one char."""
- self.scan('.')
- def __repr__(self):
- return '<%s %d/%d>' % (
- self.__class__.__name__,
- self.pos,
- self.data_length
- )
|