special.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. """
  2. pygments.lexers.special
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Special lexers.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import ast
  9. import re
  10. from pygments.lexer import Lexer
  11. from pygments.token import Token, Error, Text
  12. from pygments.util import get_choice_opt
  13. __all__ = ['TextLexer', 'RawTokenLexer']
  14. class TextLexer(Lexer):
  15. """
  16. "Null" lexer, doesn't highlight anything.
  17. """
  18. name = 'Text only'
  19. aliases = ['text']
  20. filenames = ['*.txt']
  21. mimetypes = ['text/plain']
  22. priority = 0.01
  23. def get_tokens_unprocessed(self, text):
  24. yield 0, Text, text
  25. def analyse_text(text):
  26. return TextLexer.priority
  27. _ttype_cache = {}
  28. line_re = re.compile('.*?\n')
  29. class RawTokenLexer(Lexer):
  30. """
  31. Recreate a token stream formatted with the `RawTokenFormatter`.
  32. Additional options accepted:
  33. `compress`
  34. If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
  35. the given compression algorithm before lexing (default: ``""``).
  36. """
  37. name = 'Raw token data'
  38. aliases = []
  39. filenames = []
  40. mimetypes = ['application/x-pygments-tokens']
  41. def __init__(self, **options):
  42. self.compress = get_choice_opt(options, 'compress',
  43. ['', 'none', 'gz', 'bz2'], '')
  44. Lexer.__init__(self, **options)
  45. def get_tokens(self, text):
  46. if self.compress:
  47. if isinstance(text, str):
  48. text = text.encode('latin1')
  49. try:
  50. if self.compress == 'gz':
  51. import gzip
  52. text = gzip.decompress(text)
  53. elif self.compress == 'bz2':
  54. import bz2
  55. text = bz2.decompress(text)
  56. except OSError:
  57. yield Error, text.decode('latin1')
  58. if isinstance(text, bytes):
  59. text = text.decode('latin1')
  60. # do not call Lexer.get_tokens() because stripping is not optional.
  61. text = text.strip('\n') + '\n'
  62. for i, t, v in self.get_tokens_unprocessed(text):
  63. yield t, v
  64. def get_tokens_unprocessed(self, text):
  65. length = 0
  66. for match in line_re.finditer(text):
  67. try:
  68. ttypestr, val = match.group().rstrip().split('\t', 1)
  69. ttype = _ttype_cache.get(ttypestr)
  70. if not ttype:
  71. ttype = Token
  72. ttypes = ttypestr.split('.')[1:]
  73. for ttype_ in ttypes:
  74. if not ttype_ or not ttype_[0].isupper():
  75. raise ValueError('malformed token name')
  76. ttype = getattr(ttype, ttype_)
  77. _ttype_cache[ttypestr] = ttype
  78. val = ast.literal_eval(val)
  79. if not isinstance(val, str):
  80. raise ValueError('expected str')
  81. except (SyntaxError, ValueError):
  82. val = match.group()
  83. ttype = Error
  84. yield length, ttype, val
  85. length += len(val)