123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282 |
- """
- pygments.lexers.julia
- ~~~~~~~~~~~~~~~~~~~~~
- Lexers for the Julia language.
- :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
- words, include
- from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation, Generic
- from pygments.util import shebang_matches
- from pygments.lexers._julia_builtins import OPERATORS_LIST, DOTTED_OPERATORS_LIST, \
- KEYWORD_LIST, BUILTIN_LIST, LITERAL_LIST
- __all__ = ['JuliaLexer', 'JuliaConsoleLexer']
- # see https://docs.julialang.org/en/v1/manual/variables/#Allowed-Variable-Names
- allowed_variable = \
- '(?:[a-zA-Z_\u00A1-\U0010ffff][a-zA-Z_0-9!\u00A1-\U0010ffff]*)'
- # see https://github.com/JuliaLang/julia/blob/master/src/flisp/julia_opsuffs.h
- operator_suffixes = r'[²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ′″‴‵‶‷⁗⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ]*'
- class JuliaLexer(RegexLexer):
- """
- For `Julia <http://julialang.org/>`_ source code.
- .. versionadded:: 1.6
- """
- name = 'Julia'
- aliases = ['julia', 'jl']
- filenames = ['*.jl']
- mimetypes = ['text/x-julia', 'application/x-julia']
- flags = re.MULTILINE | re.UNICODE
- tokens = {
- 'root': [
- (r'\n', Text),
- (r'[^\S\n]+', Text),
- (r'#=', Comment.Multiline, "blockcomment"),
- (r'#.*$', Comment),
- (r'[\[\](),;]', Punctuation),
- # symbols
- # intercept range expressions first
- (r'(' + allowed_variable + r')(\s*)(:)(' + allowed_variable + ')',
- bygroups(Name, Text, Operator, Name)),
- # then match :name which does not follow closing brackets, digits, or the
- # ::, <:, and :> operators
- (r'(?<![\]):<>\d.])(:' + allowed_variable + ')', String.Symbol),
- # type assertions - excludes expressions like ::typeof(sin) and ::avec[1]
- (r'(?<=::)(\s*)(' + allowed_variable + r')\b(?![(\[])', bygroups(Text, Keyword.Type)),
- # type comparisons
- # - MyType <: A or MyType >: A
- ('(' + allowed_variable + r')(\s*)([<>]:)(\s*)(' + allowed_variable + r')\b(?![(\[])',
- bygroups(Keyword.Type, Text, Operator, Text, Keyword.Type)),
- # - <: B or >: B
- (r'([<>]:)(\s*)(' + allowed_variable + r')\b(?![(\[])',
- bygroups(Operator, Text, Keyword.Type)),
- # - A <: or A >:
- (r'\b(' + allowed_variable + r')(\s*)([<>]:)',
- bygroups(Keyword.Type, Text, Operator)),
- # operators
- # Suffixes aren't actually allowed on all operators, but we'll ignore that
- # since those cases are invalid Julia code.
- (words([*OPERATORS_LIST, *DOTTED_OPERATORS_LIST], suffix=operator_suffixes), Operator),
- (words(['.' + o for o in DOTTED_OPERATORS_LIST], suffix=operator_suffixes), Operator),
- (words(['...', '..']), Operator),
- # NOTE
- # Patterns below work only for definition sites and thus hardly reliable.
- #
- # functions
- # (r'(function)(\s+)(' + allowed_variable + ')',
- # bygroups(Keyword, Text, Name.Function)),
- # chars
- (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|"
- r"\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char),
- # try to match trailing transpose
- (r'(?<=[.\w)\]])(\'' + operator_suffixes + ')+', Operator),
- # raw strings
- (r'(raw)(""")', bygroups(String.Affix, String), 'tqrawstring'),
- (r'(raw)(")', bygroups(String.Affix, String), 'rawstring'),
- # regular expressions
- (r'(r)(""")', bygroups(String.Affix, String.Regex), 'tqregex'),
- (r'(r)(")', bygroups(String.Affix, String.Regex), 'regex'),
- # other strings
- (r'(' + allowed_variable + ')?(""")', bygroups(String.Affix, String), 'tqstring'),
- (r'(' + allowed_variable + ')?(")', bygroups(String.Affix, String), 'string'),
- # backticks
- (r'(' + allowed_variable + ')?(```)', bygroups(String.Affix, String.Backtick), 'tqcommand'),
- (r'(' + allowed_variable + ')?(`)', bygroups(String.Affix, String.Backtick), 'command'),
- # type names
- # - names that begin a curly expression
- ('(' + allowed_variable + r')(\{)',
- bygroups(Keyword.Type, Punctuation), 'curly'),
- # - names as part of bare 'where'
- (r'(where)(\s+)(' + allowed_variable + ')',
- bygroups(Keyword, Text, Keyword.Type)),
- # - curly expressions in general
- (r'(\{)', Punctuation, 'curly'),
- # - names as part of type declaration
- (r'(abstract[ \t]+type|primitive[ \t]+type|mutable[ \t]+struct|struct)([\s()]+)(' +
- allowed_variable + r')', bygroups(Keyword, Text, Keyword.Type)),
- # macros
- (r'@' + allowed_variable, Name.Decorator),
- (words([*OPERATORS_LIST, '..', '.', *DOTTED_OPERATORS_LIST],
- prefix='@', suffix=operator_suffixes), Name.Decorator),
- # keywords
- (words(KEYWORD_LIST, suffix=r'\b'), Keyword),
- # builtin types
- (words(BUILTIN_LIST, suffix=r'\b'), Keyword.Type),
- # builtin literals
- (words(LITERAL_LIST, suffix=r'\b'), Name.Builtin),
- # names
- (allowed_variable, Name),
- # numbers
- (r'(\d+((_\d+)+)?\.(?!\.)(\d+((_\d+)+)?)?|\.\d+((_\d+)+)?)([eEf][+-]?[0-9]+)?', Number.Float),
- (r'\d+((_\d+)+)?[eEf][+-]?[0-9]+', Number.Float),
- (r'0x[a-fA-F0-9]+((_[a-fA-F0-9]+)+)?(\.([a-fA-F0-9]+((_[a-fA-F0-9]+)+)?)?)?p[+-]?\d+', Number.Float),
- (r'0b[01]+((_[01]+)+)?', Number.Bin),
- (r'0o[0-7]+((_[0-7]+)+)?', Number.Oct),
- (r'0x[a-fA-F0-9]+((_[a-fA-F0-9]+)+)?', Number.Hex),
- (r'\d+((_\d+)+)?', Number.Integer),
- # single dot operator matched last to permit e.g. ".1" as a float
- (words(['.']), Operator),
- ],
- "blockcomment": [
- (r'[^=#]', Comment.Multiline),
- (r'#=', Comment.Multiline, '#push'),
- (r'=#', Comment.Multiline, '#pop'),
- (r'[=#]', Comment.Multiline),
- ],
- 'curly': [
- (r'\{', Punctuation, '#push'),
- (r'\}', Punctuation, '#pop'),
- (allowed_variable, Keyword.Type),
- include('root'),
- ],
- 'tqrawstring': [
- (r'"""', String, '#pop'),
- (r'([^"]|"[^"][^"])+', String),
- ],
- 'rawstring': [
- (r'"', String, '#pop'),
- (r'\\"', String.Escape),
- (r'([^"\\]|\\[^"])+', String),
- ],
- # Interpolation is defined as "$" followed by the shortest full expression, which is
- # something we can't parse.
- # Include the most common cases here: $word, and $(paren'd expr).
- 'interp': [
- (r'\$' + allowed_variable, String.Interpol),
- (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
- ],
- 'in-intp': [
- (r'\(', Punctuation, '#push'),
- (r'\)', Punctuation, '#pop'),
- include('root'),
- ],
- 'string': [
- (r'(")(' + allowed_variable + r'|\d+)?', bygroups(String, String.Affix), '#pop'),
- # FIXME: This escape pattern is not perfect.
- (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
- include('interp'),
- # @printf and @sprintf formats
- (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]',
- String.Interpol),
- (r'[^"$%\\]+', String),
- (r'.', String),
- ],
- 'tqstring': [
- (r'(""")(' + allowed_variable + r'|\d+)?', bygroups(String, String.Affix), '#pop'),
- (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
- include('interp'),
- (r'[^"$%\\]+', String),
- (r'.', String),
- ],
- 'regex': [
- (r'(")([imsxa]*)?', bygroups(String.Regex, String.Affix), '#pop'),
- (r'\\"', String.Regex),
- (r'[^\\"]+', String.Regex),
- ],
- 'tqregex': [
- (r'(""")([imsxa]*)?', bygroups(String.Regex, String.Affix), '#pop'),
- (r'[^"]+', String.Regex),
- ],
- 'command': [
- (r'(`)(' + allowed_variable + r'|\d+)?', bygroups(String.Backtick, String.Affix), '#pop'),
- (r'\\[`$]', String.Escape),
- include('interp'),
- (r'[^\\`$]+', String.Backtick),
- (r'.', String.Backtick),
- ],
- 'tqcommand': [
- (r'(```)(' + allowed_variable + r'|\d+)?', bygroups(String.Backtick, String.Affix), '#pop'),
- (r'\\\$', String.Escape),
- include('interp'),
- (r'[^\\`$]+', String.Backtick),
- (r'.', String.Backtick),
- ],
- }
- def analyse_text(text):
- return shebang_matches(text, r'julia')
- class JuliaConsoleLexer(Lexer):
- """
- For Julia console sessions. Modeled after MatlabSessionLexer.
- .. versionadded:: 1.6
- """
- name = 'Julia console'
- aliases = ['jlcon', 'julia-repl']
- def get_tokens_unprocessed(self, text):
- jllexer = JuliaLexer(**self.options)
- start = 0
- curcode = ''
- insertions = []
- output = False
- error = False
- for line in text.splitlines(True):
- if line.startswith('julia>'):
- insertions.append((len(curcode), [(0, Generic.Prompt, line[:6])]))
- curcode += line[6:]
- output = False
- error = False
- elif line.startswith('help?>') or line.startswith('shell>'):
- yield start, Generic.Prompt, line[:6]
- yield start + 6, Text, line[6:]
- output = False
- error = False
- elif line.startswith(' ') and not output:
- insertions.append((len(curcode), [(0, Text, line[:6])]))
- curcode += line[6:]
- else:
- if curcode:
- yield from do_insertions(
- insertions, jllexer.get_tokens_unprocessed(curcode))
- curcode = ''
- insertions = []
- if line.startswith('ERROR: ') or error:
- yield start, Generic.Error, line
- error = True
- else:
- yield start, Generic.Output, line
- output = True
- start += len(line)
- if curcode:
- yield from do_insertions(
- insertions, jllexer.get_tokens_unprocessed(curcode))
|