r.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. """
  2. pygments.lexers.r
  3. ~~~~~~~~~~~~~~~~~
  4. Lexers for the R/S languages.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import Lexer, RegexLexer, include, do_insertions, bygroups
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation, Generic
  12. __all__ = ['RConsoleLexer', 'SLexer', 'RdLexer']
  13. line_re = re.compile('.*?\n')
  14. class RConsoleLexer(Lexer):
  15. """
  16. For R console transcripts or R CMD BATCH output files.
  17. """
  18. name = 'RConsole'
  19. aliases = ['rconsole', 'rout']
  20. filenames = ['*.Rout']
  21. def get_tokens_unprocessed(self, text):
  22. slexer = SLexer(**self.options)
  23. current_code_block = ''
  24. insertions = []
  25. for match in line_re.finditer(text):
  26. line = match.group()
  27. if line.startswith('>') or line.startswith('+'):
  28. # Colorize the prompt as such,
  29. # then put rest of line into current_code_block
  30. insertions.append((len(current_code_block),
  31. [(0, Generic.Prompt, line[:2])]))
  32. current_code_block += line[2:]
  33. else:
  34. # We have reached a non-prompt line!
  35. # If we have stored prompt lines, need to process them first.
  36. if current_code_block:
  37. # Weave together the prompts and highlight code.
  38. yield from do_insertions(
  39. insertions, slexer.get_tokens_unprocessed(current_code_block))
  40. # Reset vars for next code block.
  41. current_code_block = ''
  42. insertions = []
  43. # Now process the actual line itself, this is output from R.
  44. yield match.start(), Generic.Output, line
  45. # If we happen to end on a code block with nothing after it, need to
  46. # process the last code block. This is neither elegant nor DRY so
  47. # should be changed.
  48. if current_code_block:
  49. yield from do_insertions(
  50. insertions, slexer.get_tokens_unprocessed(current_code_block))
  51. class SLexer(RegexLexer):
  52. """
  53. For S, S-plus, and R source code.
  54. .. versionadded:: 0.10
  55. """
  56. name = 'S'
  57. aliases = ['splus', 's', 'r']
  58. filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron']
  59. mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r',
  60. 'text/x-R', 'text/x-r-history', 'text/x-r-profile']
  61. valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.'
  62. tokens = {
  63. 'comments': [
  64. (r'#.*$', Comment.Single),
  65. ],
  66. 'valid_name': [
  67. (valid_name, Name),
  68. ],
  69. 'punctuation': [
  70. (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation),
  71. ],
  72. 'keywords': [
  73. (r'(if|else|for|while|repeat|in|next|break|return|switch|function)'
  74. r'(?![\w.])',
  75. Keyword.Reserved),
  76. ],
  77. 'operators': [
  78. (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator),
  79. (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator),
  80. ],
  81. 'builtin_symbols': [
  82. (r'(NULL|NA(_(integer|real|complex|character)_)?|'
  83. r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))'
  84. r'(?![\w.])',
  85. Keyword.Constant),
  86. (r'(T|F)\b', Name.Builtin.Pseudo),
  87. ],
  88. 'numbers': [
  89. # hex number
  90. (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),
  91. # decimal number
  92. (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?',
  93. Number),
  94. ],
  95. 'statements': [
  96. include('comments'),
  97. # whitespaces
  98. (r'\s+', Text),
  99. (r'\'', String, 'string_squote'),
  100. (r'\"', String, 'string_dquote'),
  101. include('builtin_symbols'),
  102. include('valid_name'),
  103. include('numbers'),
  104. include('keywords'),
  105. include('punctuation'),
  106. include('operators'),
  107. ],
  108. 'root': [
  109. # calls:
  110. (r'(%s)\s*(?=\()' % valid_name, Name.Function),
  111. include('statements'),
  112. # blocks:
  113. (r'\{|\}', Punctuation),
  114. # (r'\{', Punctuation, 'block'),
  115. (r'.', Text),
  116. ],
  117. # 'block': [
  118. # include('statements'),
  119. # ('\{', Punctuation, '#push'),
  120. # ('\}', Punctuation, '#pop')
  121. # ],
  122. 'string_squote': [
  123. (r'([^\'\\]|\\.)*\'', String, '#pop'),
  124. ],
  125. 'string_dquote': [
  126. (r'([^"\\]|\\.)*"', String, '#pop'),
  127. ],
  128. }
  129. def analyse_text(text):
  130. if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):
  131. return 0.11
  132. class RdLexer(RegexLexer):
  133. """
  134. Pygments Lexer for R documentation (Rd) files
  135. This is a very minimal implementation, highlighting little more
  136. than the macros. A description of Rd syntax is found in `Writing R
  137. Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_
  138. and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_.
  139. .. versionadded:: 1.6
  140. """
  141. name = 'Rd'
  142. aliases = ['rd']
  143. filenames = ['*.Rd']
  144. mimetypes = ['text/x-r-doc']
  145. # To account for verbatim / LaTeX-like / and R-like areas
  146. # would require parsing.
  147. tokens = {
  148. 'root': [
  149. # catch escaped brackets and percent sign
  150. (r'\\[\\{}%]', String.Escape),
  151. # comments
  152. (r'%.*$', Comment),
  153. # special macros with no arguments
  154. (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant),
  155. # macros
  156. (r'\\[a-zA-Z]+\b', Keyword),
  157. # special preprocessor macros
  158. (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc),
  159. # non-escaped brackets
  160. (r'[{}]', Name.Builtin),
  161. # everything else
  162. (r'[^\\%\n{}]+', Text),
  163. (r'.', Text),
  164. ]
  165. }