123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- """
- pygments.lexers.r
- ~~~~~~~~~~~~~~~~~
- Lexers for the R/S languages.
- :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexer import Lexer, RegexLexer, include, do_insertions, bygroups
- from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation, Generic
- __all__ = ['RConsoleLexer', 'SLexer', 'RdLexer']
- line_re = re.compile('.*?\n')
- class RConsoleLexer(Lexer):
- """
- For R console transcripts or R CMD BATCH output files.
- """
- name = 'RConsole'
- aliases = ['rconsole', 'rout']
- filenames = ['*.Rout']
- def get_tokens_unprocessed(self, text):
- slexer = SLexer(**self.options)
- current_code_block = ''
- insertions = []
- for match in line_re.finditer(text):
- line = match.group()
- if line.startswith('>') or line.startswith('+'):
- # Colorize the prompt as such,
- # then put rest of line into current_code_block
- insertions.append((len(current_code_block),
- [(0, Generic.Prompt, line[:2])]))
- current_code_block += line[2:]
- else:
- # We have reached a non-prompt line!
- # If we have stored prompt lines, need to process them first.
- if current_code_block:
- # Weave together the prompts and highlight code.
- yield from do_insertions(
- insertions, slexer.get_tokens_unprocessed(current_code_block))
- # Reset vars for next code block.
- current_code_block = ''
- insertions = []
- # Now process the actual line itself, this is output from R.
- yield match.start(), Generic.Output, line
- # If we happen to end on a code block with nothing after it, need to
- # process the last code block. This is neither elegant nor DRY so
- # should be changed.
- if current_code_block:
- yield from do_insertions(
- insertions, slexer.get_tokens_unprocessed(current_code_block))
- class SLexer(RegexLexer):
- """
- For S, S-plus, and R source code.
- .. versionadded:: 0.10
- """
- name = 'S'
- aliases = ['splus', 's', 'r']
- filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron']
- mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r',
- 'text/x-R', 'text/x-r-history', 'text/x-r-profile']
- valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.'
- tokens = {
- 'comments': [
- (r'#.*$', Comment.Single),
- ],
- 'valid_name': [
- (valid_name, Name),
- ],
- 'punctuation': [
- (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation),
- ],
- 'keywords': [
- (r'(if|else|for|while|repeat|in|next|break|return|switch|function)'
- r'(?![\w.])',
- Keyword.Reserved),
- ],
- 'operators': [
- (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator),
- (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator),
- ],
- 'builtin_symbols': [
- (r'(NULL|NA(_(integer|real|complex|character)_)?|'
- r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))'
- r'(?![\w.])',
- Keyword.Constant),
- (r'(T|F)\b', Name.Builtin.Pseudo),
- ],
- 'numbers': [
- # hex number
- (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),
- # decimal number
- (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?',
- Number),
- ],
- 'statements': [
- include('comments'),
- # whitespaces
- (r'\s+', Text),
- (r'\'', String, 'string_squote'),
- (r'\"', String, 'string_dquote'),
- include('builtin_symbols'),
- include('valid_name'),
- include('numbers'),
- include('keywords'),
- include('punctuation'),
- include('operators'),
- ],
- 'root': [
- # calls:
- (r'(%s)\s*(?=\()' % valid_name, Name.Function),
- include('statements'),
- # blocks:
- (r'\{|\}', Punctuation),
- # (r'\{', Punctuation, 'block'),
- (r'.', Text),
- ],
- # 'block': [
- # include('statements'),
- # ('\{', Punctuation, '#push'),
- # ('\}', Punctuation, '#pop')
- # ],
- 'string_squote': [
- (r'([^\'\\]|\\.)*\'', String, '#pop'),
- ],
- 'string_dquote': [
- (r'([^"\\]|\\.)*"', String, '#pop'),
- ],
- }
- def analyse_text(text):
- if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):
- return 0.11
- class RdLexer(RegexLexer):
- """
- Pygments Lexer for R documentation (Rd) files
- This is a very minimal implementation, highlighting little more
- than the macros. A description of Rd syntax is found in `Writing R
- Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_
- and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_.
- .. versionadded:: 1.6
- """
- name = 'Rd'
- aliases = ['rd']
- filenames = ['*.Rd']
- mimetypes = ['text/x-r-doc']
- # To account for verbatim / LaTeX-like / and R-like areas
- # would require parsing.
- tokens = {
- 'root': [
- # catch escaped brackets and percent sign
- (r'\\[\\{}%]', String.Escape),
- # comments
- (r'%.*$', Comment),
- # special macros with no arguments
- (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant),
- # macros
- (r'\\[a-zA-Z]+\b', Keyword),
- # special preprocessor macros
- (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc),
- # non-escaped brackets
- (r'[{}]', Name.Builtin),
- # everything else
- (r'[^\\%\n{}]+', Text),
- (r'.', Text),
- ]
- }
|