textedit.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. """
  2. pygments.lexers.textedit
  3. ~~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for languages related to text processing.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from bisect import bisect
  10. from pygments.lexer import RegexLexer, include, default, bygroups, using, this
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  12. Number, Punctuation
  13. from pygments.lexers.python import PythonLexer
  14. __all__ = ['AwkLexer', 'VimLexer']
  15. class AwkLexer(RegexLexer):
  16. """
  17. For Awk scripts.
  18. .. versionadded:: 1.5
  19. """
  20. name = 'Awk'
  21. aliases = ['awk', 'gawk', 'mawk', 'nawk']
  22. filenames = ['*.awk']
  23. mimetypes = ['application/x-awk']
  24. tokens = {
  25. 'commentsandwhitespace': [
  26. (r'\s+', Text),
  27. (r'#.*$', Comment.Single)
  28. ],
  29. 'slashstartsregex': [
  30. include('commentsandwhitespace'),
  31. (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
  32. r'\B', String.Regex, '#pop'),
  33. (r'(?=/)', Text, ('#pop', 'badregex')),
  34. default('#pop')
  35. ],
  36. 'badregex': [
  37. (r'\n', Text, '#pop')
  38. ],
  39. 'root': [
  40. (r'^(?=\s|/)', Text, 'slashstartsregex'),
  41. include('commentsandwhitespace'),
  42. (r'\+\+|--|\|\||&&|in\b|\$|!?~|'
  43. r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),
  44. (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
  45. (r'[})\].]', Punctuation),
  46. (r'(break|continue|do|while|exit|for|if|else|'
  47. r'return)\b', Keyword, 'slashstartsregex'),
  48. (r'function\b', Keyword.Declaration, 'slashstartsregex'),
  49. (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
  50. r'length|match|split|sprintf|sub|substr|tolower|toupper|close|'
  51. r'fflush|getline|next|nextfile|print|printf|strftime|systime|'
  52. r'delete|system)\b', Keyword.Reserved),
  53. (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|'
  54. r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'
  55. r'RSTART|RT|SUBSEP)\b', Name.Builtin),
  56. (r'[$a-zA-Z_]\w*', Name.Other),
  57. (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
  58. (r'0x[0-9a-fA-F]+', Number.Hex),
  59. (r'[0-9]+', Number.Integer),
  60. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  61. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  62. ]
  63. }
  64. class VimLexer(RegexLexer):
  65. """
  66. Lexer for VimL script files.
  67. .. versionadded:: 0.8
  68. """
  69. name = 'VimL'
  70. aliases = ['vim']
  71. filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',
  72. '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']
  73. mimetypes = ['text/x-vim']
  74. flags = re.MULTILINE
  75. _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'
  76. tokens = {
  77. 'root': [
  78. (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',
  79. bygroups(using(this), Keyword, Text, Operator, Text, Text,
  80. using(PythonLexer), Text)),
  81. (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',
  82. bygroups(using(this), Keyword, Text, using(PythonLexer))),
  83. (r'^\s*".*', Comment),
  84. (r'[ \t]+', Text),
  85. # TODO: regexes can have other delims
  86. (r'/[^/\\\n]*(?:\\[\s\S][^/\\\n]*)*/', String.Regex),
  87. (r'"[^"\\\n]*(?:\\[\s\S][^"\\\n]*)*"', String.Double),
  88. (r"'[^\n']*(?:''[^\n']*)*'", String.Single),
  89. # Who decided that doublequote was a good comment character??
  90. (r'(?<=\s)"[^\-:.%#=*].*', Comment),
  91. (r'-?\d+', Number),
  92. (r'#[0-9a-f]{6}', Number.Hex),
  93. (r'^:', Punctuation),
  94. (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
  95. (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
  96. Keyword),
  97. (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
  98. (r'\b\w+\b', Name.Other), # These are postprocessed below
  99. (r'.', Text),
  100. ],
  101. }
  102. def __init__(self, **options):
  103. from pygments.lexers._vim_builtins import command, option, auto
  104. self._cmd = command
  105. self._opt = option
  106. self._aut = auto
  107. RegexLexer.__init__(self, **options)
  108. def is_in(self, w, mapping):
  109. r"""
  110. It's kind of difficult to decide if something might be a keyword
  111. in VimL because it allows you to abbreviate them. In fact,
  112. 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
  113. valid ways to call it so rather than making really awful regexps
  114. like::
  115. \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
  116. we match `\b\w+\b` and then call is_in() on those tokens. See
  117. `scripts/get_vimkw.py` for how the lists are extracted.
  118. """
  119. p = bisect(mapping, (w,))
  120. if p > 0:
  121. if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
  122. mapping[p-1][1][:len(w)] == w:
  123. return True
  124. if p < len(mapping):
  125. return mapping[p][0] == w[:len(mapping[p][0])] and \
  126. mapping[p][1][:len(w)] == w
  127. return False
  128. def get_tokens_unprocessed(self, text):
  129. # TODO: builtins are only subsequent tokens on lines
  130. # and 'keywords' only happen at the beginning except
  131. # for :au ones
  132. for index, token, value in \
  133. RegexLexer.get_tokens_unprocessed(self, text):
  134. if token is Name.Other:
  135. if self.is_in(value, self._cmd):
  136. yield index, Keyword, value
  137. elif self.is_in(value, self._opt) or \
  138. self.is_in(value, self._aut):
  139. yield index, Name.Builtin, value
  140. else:
  141. yield index, Text, value
  142. else:
  143. yield index, token, value