123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957 |
- """
- pygments.lexers.ml
- ~~~~~~~~~~~~~~~~~~
- Lexers for ML family languages.
- :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexer import RegexLexer, include, bygroups, default, words
- from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation, Error
- __all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer']
- class SMLLexer(RegexLexer):
- """
- For the Standard ML language.
- .. versionadded:: 1.5
- """
- name = 'Standard ML'
- aliases = ['sml']
- filenames = ['*.sml', '*.sig', '*.fun']
- mimetypes = ['text/x-standardml', 'application/x-standardml']
- alphanumid_reserved = {
- # Core
- 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
- 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
- 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
- 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
- # Modules
- 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
- 'struct', 'structure', 'where',
- }
- symbolicid_reserved = {
- # Core
- ':', r'\|', '=', '=>', '->', '#',
- # Modules
- ':>',
- }
- nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'}
- alphanumid_re = r"[a-zA-Z][\w']*"
- symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
- # A character constant is a sequence of the form #s, where s is a string
- # constant denoting a string of size one character. This setup just parses
- # the entire string as either a String.Double or a String.Char (depending
- # on the argument), even if the String.Char is an erronous
- # multiple-character string.
- def stringy(whatkind):
- return [
- (r'[^"\\]', whatkind),
- (r'\\[\\"abtnvfr]', String.Escape),
- # Control-character notation is used for codes < 32,
- # where \^@ == \000
- (r'\\\^[\x40-\x5e]', String.Escape),
- # Docs say 'decimal digits'
- (r'\\[0-9]{3}', String.Escape),
- (r'\\u[0-9a-fA-F]{4}', String.Escape),
- (r'\\\s+\\', String.Interpol),
- (r'"', whatkind, '#pop'),
- ]
- # Callbacks for distinguishing tokens and reserved words
- def long_id_callback(self, match):
- if match.group(1) in self.alphanumid_reserved:
- token = Error
- else:
- token = Name.Namespace
- yield match.start(1), token, match.group(1)
- yield match.start(2), Punctuation, match.group(2)
- def end_id_callback(self, match):
- if match.group(1) in self.alphanumid_reserved:
- token = Error
- elif match.group(1) in self.symbolicid_reserved:
- token = Error
- else:
- token = Name
- yield match.start(1), token, match.group(1)
- def id_callback(self, match):
- str = match.group(1)
- if str in self.alphanumid_reserved:
- token = Keyword.Reserved
- elif str in self.symbolicid_reserved:
- token = Punctuation
- else:
- token = Name
- yield match.start(1), token, str
- tokens = {
- # Whitespace and comments are (almost) everywhere
- 'whitespace': [
- (r'\s+', Text),
- (r'\(\*', Comment.Multiline, 'comment'),
- ],
- 'delimiters': [
- # This lexer treats these delimiters specially:
- # Delimiters define scopes, and the scope is how the meaning of
- # the `|' is resolved - is it a case/handle expression, or function
- # definition by cases? (This is not how the Definition works, but
- # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
- (r'\(|\[|\{', Punctuation, 'main'),
- (r'\)|\]|\}', Punctuation, '#pop'),
- (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
- (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
- (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
- ],
- 'core': [
- # Punctuation that doesn't overlap symbolic identifiers
- (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved),
- Punctuation),
- # Special constants: strings, floats, numbers in decimal and hex
- (r'#"', String.Char, 'char'),
- (r'"', String.Double, 'string'),
- (r'~?0x[0-9a-fA-F]+', Number.Hex),
- (r'0wx[0-9a-fA-F]+', Number.Hex),
- (r'0w\d+', Number.Integer),
- (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
- (r'~?\d+\.\d+', Number.Float),
- (r'~?\d+[eE]~?\d+', Number.Float),
- (r'~?\d+', Number.Integer),
- # Labels
- (r'#\s*[1-9][0-9]*', Name.Label),
- (r'#\s*(%s)' % alphanumid_re, Name.Label),
- (r'#\s+(%s)' % symbolicid_re, Name.Label),
- # Some reserved words trigger a special, local lexer state change
- (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
- (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'),
- (r'\b(functor|include|open|signature|structure)\b(?!\')',
- Keyword.Reserved, 'sname'),
- (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
- # Regular identifiers, long and otherwise
- (r'\'[\w\']*', Name.Decorator),
- (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
- (r'(%s)' % alphanumid_re, id_callback),
- (r'(%s)' % symbolicid_re, id_callback),
- ],
- 'dotted': [
- (r'(%s)(\.)' % alphanumid_re, long_id_callback),
- (r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
- (r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
- (r'\s+', Error),
- (r'\S+', Error),
- ],
- # Main parser (prevents errors in files that have scoping errors)
- 'root': [
- default('main')
- ],
- # In this scope, I expect '|' to not be followed by a function name,
- # and I expect 'and' to be followed by a binding site
- 'main': [
- include('whitespace'),
- # Special behavior of val/and/fun
- (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
- (r'\b(fun)\b(?!\')', Keyword.Reserved,
- ('#pop', 'main-fun', 'fname')),
- include('delimiters'),
- include('core'),
- (r'\S+', Error),
- ],
- # In this scope, I expect '|' and 'and' to be followed by a function
- 'main-fun': [
- include('whitespace'),
- (r'\s', Text),
- (r'\(\*', Comment.Multiline, 'comment'),
- # Special behavior of val/and/fun
- (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
- (r'\b(val)\b(?!\')', Keyword.Reserved,
- ('#pop', 'main', 'vname')),
- # Special behavior of '|' and '|'-manipulating keywords
- (r'\|', Punctuation, 'fname'),
- (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
- ('#pop', 'main')),
- include('delimiters'),
- include('core'),
- (r'\S+', Error),
- ],
- # Character and string parsers
- 'char': stringy(String.Char),
- 'string': stringy(String.Double),
- 'breakout': [
- (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
- ],
- # Dealing with what comes after module system keywords
- 'sname': [
- include('whitespace'),
- include('breakout'),
- (r'(%s)' % alphanumid_re, Name.Namespace),
- default('#pop'),
- ],
- # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
- 'fname': [
- include('whitespace'),
- (r'\'[\w\']*', Name.Decorator),
- (r'\(', Punctuation, 'tyvarseq'),
- (r'(%s)' % alphanumid_re, Name.Function, '#pop'),
- (r'(%s)' % symbolicid_re, Name.Function, '#pop'),
- # Ignore interesting function declarations like "fun (x + y) = ..."
- default('#pop'),
- ],
- # Dealing with what comes after the 'val' (or 'and') keyword
- 'vname': [
- include('whitespace'),
- (r'\'[\w\']*', Name.Decorator),
- (r'\(', Punctuation, 'tyvarseq'),
- (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
- bygroups(Name.Variable, Text, Punctuation), '#pop'),
- (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
- bygroups(Name.Variable, Text, Punctuation), '#pop'),
- (r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
- (r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
- # Ignore interesting patterns like 'val (x, y)'
- default('#pop'),
- ],
- # Dealing with what comes after the 'type' (or 'and') keyword
- 'tname': [
- include('whitespace'),
- include('breakout'),
- (r'\'[\w\']*', Name.Decorator),
- (r'\(', Punctuation, 'tyvarseq'),
- (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
- (r'(%s)' % alphanumid_re, Keyword.Type),
- (r'(%s)' % symbolicid_re, Keyword.Type),
- (r'\S+', Error, '#pop'),
- ],
- # A type binding includes most identifiers
- 'typbind': [
- include('whitespace'),
- (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
- include('breakout'),
- include('core'),
- (r'\S+', Error, '#pop'),
- ],
- # Dealing with what comes after the 'datatype' (or 'and') keyword
- 'dname': [
- include('whitespace'),
- include('breakout'),
- (r'\'[\w\']*', Name.Decorator),
- (r'\(', Punctuation, 'tyvarseq'),
- (r'(=)(\s*)(datatype)',
- bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
- (r'=(?!%s)' % symbolicid_re, Punctuation,
- ('#pop', 'datbind', 'datcon')),
- (r'(%s)' % alphanumid_re, Keyword.Type),
- (r'(%s)' % symbolicid_re, Keyword.Type),
- (r'\S+', Error, '#pop'),
- ],
- # common case - A | B | C of int
- 'datbind': [
- include('whitespace'),
- (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
- (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
- (r'\b(of)\b(?!\')', Keyword.Reserved),
- (r'(\|)(\s*)(%s)' % alphanumid_re,
- bygroups(Punctuation, Text, Name.Class)),
- (r'(\|)(\s+)(%s)' % symbolicid_re,
- bygroups(Punctuation, Text, Name.Class)),
- include('breakout'),
- include('core'),
- (r'\S+', Error),
- ],
- # Dealing with what comes after an exception
- 'ename': [
- include('whitespace'),
- (r'(and\b)(\s+)(%s)' % alphanumid_re,
- bygroups(Keyword.Reserved, Text, Name.Class)),
- (r'(and\b)(\s*)(%s)' % symbolicid_re,
- bygroups(Keyword.Reserved, Text, Name.Class)),
- (r'\b(of)\b(?!\')', Keyword.Reserved),
- (r'(%s)|(%s)' % (alphanumid_re, symbolicid_re), Name.Class),
- default('#pop'),
- ],
- 'datcon': [
- include('whitespace'),
- (r'(%s)' % alphanumid_re, Name.Class, '#pop'),
- (r'(%s)' % symbolicid_re, Name.Class, '#pop'),
- (r'\S+', Error, '#pop'),
- ],
- # Series of type variables
- 'tyvarseq': [
- (r'\s', Text),
- (r'\(\*', Comment.Multiline, 'comment'),
- (r'\'[\w\']*', Name.Decorator),
- (alphanumid_re, Name),
- (r',', Punctuation),
- (r'\)', Punctuation, '#pop'),
- (symbolicid_re, Name),
- ],
- 'comment': [
- (r'[^(*)]', Comment.Multiline),
- (r'\(\*', Comment.Multiline, '#push'),
- (r'\*\)', Comment.Multiline, '#pop'),
- (r'[(*)]', Comment.Multiline),
- ],
- }
- class OcamlLexer(RegexLexer):
- """
- For the OCaml language.
- .. versionadded:: 0.7
- """
- name = 'OCaml'
- aliases = ['ocaml']
- filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
- mimetypes = ['text/x-ocaml']
- keywords = (
- 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
- 'downto', 'else', 'end', 'exception', 'external', 'false',
- 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
- 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
- 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
- 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
- 'type', 'value', 'val', 'virtual', 'when', 'while', 'with',
- )
- keyopts = (
- '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
- r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
- '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
- r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'
- )
- operators = r'[!$%&*+\./:<=>?@^|~-]'
- word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
- prefix_syms = r'[!?~]'
- infix_syms = r'[=<>@^|&+\*/$%-]'
- primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
- tokens = {
- 'escape-sequence': [
- (r'\\[\\"\'ntbr]', String.Escape),
- (r'\\[0-9]{3}', String.Escape),
- (r'\\x[0-9a-fA-F]{2}', String.Escape),
- ],
- 'root': [
- (r'\s+', Text),
- (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
- (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
- (r'\b([A-Z][\w\']*)', Name.Class),
- (r'\(\*(?![)])', Comment, 'comment'),
- (r'\b(%s)\b' % '|'.join(keywords), Keyword),
- (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
- (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
- (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
- (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
- (r"[^\W\d][\w']*", Name),
- (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
- (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
- (r'0[oO][0-7][0-7_]*', Number.Oct),
- (r'0[bB][01][01_]*', Number.Bin),
- (r'\d[\d_]*', Number.Integer),
- (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
- String.Char),
- (r"'.'", String.Char),
- (r"'", Keyword), # a stray quote is another syntax element
- (r'"', String.Double, 'string'),
- (r'[~?][a-z][\w\']*:', Name.Variable),
- ],
- 'comment': [
- (r'[^(*)]+', Comment),
- (r'\(\*', Comment, '#push'),
- (r'\*\)', Comment, '#pop'),
- (r'[(*)]', Comment),
- ],
- 'string': [
- (r'[^\\"]+', String.Double),
- include('escape-sequence'),
- (r'\\\n', String.Double),
- (r'"', String.Double, '#pop'),
- ],
- 'dotted': [
- (r'\s+', Text),
- (r'\.', Punctuation),
- (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
- (r'[A-Z][\w\']*', Name.Class, '#pop'),
- (r'[a-z_][\w\']*', Name, '#pop'),
- default('#pop'),
- ],
- }
- class OpaLexer(RegexLexer):
- """
- Lexer for the Opa language (http://opalang.org).
- .. versionadded:: 1.5
- """
- name = 'Opa'
- aliases = ['opa']
- filenames = ['*.opa']
- mimetypes = ['text/x-opa']
- # most of these aren't strictly keywords
- # but if you color only real keywords, you might just
- # as well not color anything
- keywords = (
- 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',
- 'else', 'end', 'external', 'forall', 'function', 'if', 'import',
- 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',
- 'type', 'val', 'with', 'xml_parser',
- )
- # matches both stuff and `stuff`
- ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
- op_re = r'[.=\-<>,@~%/+?*&^!]'
- punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
- # because they are also used for inserts
- tokens = {
- # copied from the caml lexer, should be adapted
- 'escape-sequence': [
- (r'\\[\\"\'ntr}]', String.Escape),
- (r'\\[0-9]{3}', String.Escape),
- (r'\\x[0-9a-fA-F]{2}', String.Escape),
- ],
- # factorizing these rules, because they are inserted many times
- 'comments': [
- (r'/\*', Comment, 'nested-comment'),
- (r'//.*?$', Comment),
- ],
- 'comments-and-spaces': [
- include('comments'),
- (r'\s+', Text),
- ],
- 'root': [
- include('comments-and-spaces'),
- # keywords
- (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
- # directives
- # we could parse the actual set of directives instead of anything
- # starting with @, but this is troublesome
- # because it needs to be adjusted all the time
- # and assuming we parse only sources that compile, it is useless
- (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),
- # number literals
- (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
- (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
- (r'-?\d+[eE][+\-]?\d+', Number.Float),
- (r'0[xX][\da-fA-F]+', Number.Hex),
- (r'0[oO][0-7]+', Number.Oct),
- (r'0[bB][01]+', Number.Bin),
- (r'\d+', Number.Integer),
- # color literals
- (r'#[\da-fA-F]{3,6}', Number.Integer),
- # string literals
- (r'"', String.Double, 'string'),
- # char literal, should be checked because this is the regexp from
- # the caml lexer
- (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
- String.Char),
- # this is meant to deal with embedded exprs in strings
- # every time we find a '}' we pop a state so that if we were
- # inside a string, we are back in the string state
- # as a consequence, we must also push a state every time we find a
- # '{' or else we will have errors when parsing {} for instance
- (r'\{', Operator, '#push'),
- (r'\}', Operator, '#pop'),
- # html literals
- # this is a much more strict that the actual parser,
- # since a<b would not be parsed as html
- # but then again, the parser is way too lax, and we can't hope
- # to have something as tolerant
- (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
- # db path
- # matching the '[_]' in '/a[_]' because it is a part
- # of the syntax of the db path definition
- # unfortunately, i don't know how to match the ']' in
- # /a[1], so this is somewhat inconsistent
- (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
- # putting the same color on <- as on db path, since
- # it can be used only to mean Db.write
- (r'<-(?!'+op_re+r')', Name.Variable),
- # 'modules'
- # although modules are not distinguished by their names as in caml
- # the standard library seems to follow the convention that modules
- # only area capitalized
- (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
- # operators
- # = has a special role because this is the only
- # way to syntactic distinguish binding constructions
- # unfortunately, this colors the equal in {x=2} too
- (r'=(?!'+op_re+r')', Keyword),
- (r'(%s)+' % op_re, Operator),
- (r'(%s)+' % punc_re, Operator),
- # coercions
- (r':', Operator, 'type'),
- # type variables
- # we need this rule because we don't parse specially type
- # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
- ("'"+ident_re, Keyword.Type),
- # id literal, #something, or #{expr}
- (r'#'+ident_re, String.Single),
- (r'#(?=\{)', String.Single),
- # identifiers
- # this avoids to color '2' in 'a2' as an integer
- (ident_re, Text),
- # default, not sure if that is needed or not
- # (r'.', Text),
- ],
- # it is quite painful to have to parse types to know where they end
- # this is the general rule for a type
- # a type is either:
- # * -> ty
- # * type-with-slash
- # * type-with-slash -> ty
- # * type-with-slash (, type-with-slash)+ -> ty
- #
- # the code is pretty funky in here, but this code would roughly
- # translate in caml to:
- # let rec type stream =
- # match stream with
- # | [< "->"; stream >] -> type stream
- # | [< ""; stream >] ->
- # type_with_slash stream
- # type_lhs_1 stream;
- # and type_1 stream = ...
- 'type': [
- include('comments-and-spaces'),
- (r'->', Keyword.Type),
- default(('#pop', 'type-lhs-1', 'type-with-slash')),
- ],
- # parses all the atomic or closed constructions in the syntax of type
- # expressions: record types, tuple types, type constructors, basic type
- # and type variables
- 'type-1': [
- include('comments-and-spaces'),
- (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
- (r'~?\{', Keyword.Type, ('#pop', 'type-record')),
- (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
- (ident_re, Keyword.Type, '#pop'),
- ("'"+ident_re, Keyword.Type),
- # this case is not in the syntax but sometimes
- # we think we are parsing types when in fact we are parsing
- # some css, so we just pop the states until we get back into
- # the root state
- default('#pop'),
- ],
- # type-with-slash is either:
- # * type-1
- # * type-1 (/ type-1)+
- 'type-with-slash': [
- include('comments-and-spaces'),
- default(('#pop', 'slash-type-1', 'type-1')),
- ],
- 'slash-type-1': [
- include('comments-and-spaces'),
- ('/', Keyword.Type, ('#pop', 'type-1')),
- # same remark as above
- default('#pop'),
- ],
- # we go in this state after having parsed a type-with-slash
- # while trying to parse a type
- # and at this point we must determine if we are parsing an arrow
- # type (in which case we must continue parsing) or not (in which
- # case we stop)
- 'type-lhs-1': [
- include('comments-and-spaces'),
- (r'->', Keyword.Type, ('#pop', 'type')),
- (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
- default('#pop'),
- ],
- 'type-arrow': [
- include('comments-and-spaces'),
- # the look ahead here allows to parse f(x : int, y : float -> truc)
- # correctly
- (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
- (r'->', Keyword.Type, ('#pop', 'type')),
- # same remark as above
- default('#pop'),
- ],
- # no need to do precise parsing for tuples and records
- # because they are closed constructions, so we can simply
- # find the closing delimiter
- # note that this function would be not work if the source
- # contained identifiers like `{)` (although it could be patched
- # to support it)
- 'type-tuple': [
- include('comments-and-spaces'),
- (r'[^()/*]+', Keyword.Type),
- (r'[/*]', Keyword.Type),
- (r'\(', Keyword.Type, '#push'),
- (r'\)', Keyword.Type, '#pop'),
- ],
- 'type-record': [
- include('comments-and-spaces'),
- (r'[^{}/*]+', Keyword.Type),
- (r'[/*]', Keyword.Type),
- (r'\{', Keyword.Type, '#push'),
- (r'\}', Keyword.Type, '#pop'),
- ],
- # 'type-tuple': [
- # include('comments-and-spaces'),
- # (r'\)', Keyword.Type, '#pop'),
- # default(('#pop', 'type-tuple-1', 'type-1')),
- # ],
- # 'type-tuple-1': [
- # include('comments-and-spaces'),
- # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
- # (r',', Keyword.Type, 'type-1'),
- # ],
- # 'type-record':[
- # include('comments-and-spaces'),
- # (r'\}', Keyword.Type, '#pop'),
- # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
- # ],
- # 'type-record-field-expr': [
- #
- # ],
- 'nested-comment': [
- (r'[^/*]+', Comment),
- (r'/\*', Comment, '#push'),
- (r'\*/', Comment, '#pop'),
- (r'[/*]', Comment),
- ],
- # the copy pasting between string and single-string
- # is kinda sad. Is there a way to avoid that??
- 'string': [
- (r'[^\\"{]+', String.Double),
- (r'"', String.Double, '#pop'),
- (r'\{', Operator, 'root'),
- include('escape-sequence'),
- ],
- 'single-string': [
- (r'[^\\\'{]+', String.Double),
- (r'\'', String.Double, '#pop'),
- (r'\{', Operator, 'root'),
- include('escape-sequence'),
- ],
- # all the html stuff
- # can't really reuse some existing html parser
- # because we must be able to parse embedded expressions
- # we are in this state after someone parsed the '<' that
- # started the html literal
- 'html-open-tag': [
- (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
- (r'>', String.Single, ('#pop', 'html-content')),
- ],
- # we are in this state after someone parsed the '</' that
- # started the end of the closing tag
- 'html-end-tag': [
- # this is a star, because </> is allowed
- (r'[\w\-:]*>', String.Single, '#pop'),
- ],
- # we are in this state after having parsed '<ident(:ident)?'
- # we thus parse a possibly empty list of attributes
- 'html-attr': [
- (r'\s+', Text),
- (r'[\w\-:]+=', String.Single, 'html-attr-value'),
- (r'/>', String.Single, '#pop'),
- (r'>', String.Single, ('#pop', 'html-content')),
- ],
- 'html-attr-value': [
- (r"'", String.Single, ('#pop', 'single-string')),
- (r'"', String.Single, ('#pop', 'string')),
- (r'#'+ident_re, String.Single, '#pop'),
- (r'#(?=\{)', String.Single, ('#pop', 'root')),
- (r'[^"\'{`=<>]+', String.Single, '#pop'),
- (r'\{', Operator, ('#pop', 'root')), # this is a tail call!
- ],
- # we should probably deal with '\' escapes here
- 'html-content': [
- (r'<!--', Comment, 'html-comment'),
- (r'</', String.Single, ('#pop', 'html-end-tag')),
- (r'<', String.Single, 'html-open-tag'),
- (r'\{', Operator, 'root'),
- (r'[^<{]+', String.Single),
- ],
- 'html-comment': [
- (r'-->', Comment, '#pop'),
- (r'[^\-]+|-', Comment),
- ],
- }
- class ReasonLexer(RegexLexer):
- """
- For the ReasonML language (https://reasonml.github.io/).
- .. versionadded:: 2.6
- """
- name = 'ReasonML'
- aliases = ['reasonml', 'reason']
- filenames = ['*.re', '*.rei']
- mimetypes = ['text/x-reasonml']
- keywords = (
- 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto',
- 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun',
- 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy',
- 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of',
- 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
- 'type', 'val', 'virtual', 'when', 'while', 'with',
- )
- keyopts = (
- '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
- r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
- '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
- r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~'
- )
- operators = r'[!$%&*+\./:<=>?@^|~-]'
- word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or')
- prefix_syms = r'[!?~]'
- infix_syms = r'[=<>@^|&+\*/$%-]'
- primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
- tokens = {
- 'escape-sequence': [
- (r'\\[\\"\'ntbr]', String.Escape),
- (r'\\[0-9]{3}', String.Escape),
- (r'\\x[0-9a-fA-F]{2}', String.Escape),
- ],
- 'root': [
- (r'\s+', Text),
- (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
- (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
- (r'\b([A-Z][\w\']*)', Name.Class),
- (r'//.*?\n', Comment.Single),
- (r'\/\*(?!/)', Comment.Multiline, 'comment'),
- (r'\b(%s)\b' % '|'.join(keywords), Keyword),
- (r'(%s)' % '|'.join(keyopts[::-1]), Operator.Word),
- (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
- (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
- (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
- (r"[^\W\d][\w']*", Name),
- (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
- (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
- (r'0[oO][0-7][0-7_]*', Number.Oct),
- (r'0[bB][01][01_]*', Number.Bin),
- (r'\d[\d_]*', Number.Integer),
- (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
- String.Char),
- (r"'.'", String.Char),
- (r"'", Keyword),
- (r'"', String.Double, 'string'),
- (r'[~?][a-z][\w\']*:', Name.Variable),
- ],
- 'comment': [
- (r'[^/*]+', Comment.Multiline),
- (r'\/\*', Comment.Multiline, '#push'),
- (r'\*\/', Comment.Multiline, '#pop'),
- (r'\*', Comment.Multiline),
- ],
- 'string': [
- (r'[^\\"]+', String.Double),
- include('escape-sequence'),
- (r'\\\n', String.Double),
- (r'"', String.Double, '#pop'),
- ],
- 'dotted': [
- (r'\s+', Text),
- (r'\.', Punctuation),
- (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
- (r'[A-Z][\w\']*', Name.Class, '#pop'),
- (r'[a-z_][\w\']*', Name, '#pop'),
- default('#pop'),
- ],
- }
- class FStarLexer(RegexLexer):
- """
- For the F* language (https://www.fstar-lang.org/).
- .. versionadded:: 2.7
- """
- name = 'FStar'
- aliases = ['fstar']
- filenames = ['*.fst', '*.fsti']
- mimetypes = ['text/x-fstar']
- keywords = (
- 'abstract', 'attributes', 'noeq', 'unopteq', 'and'
- 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures',
- 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if',
- 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible',
- 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract',
- 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable',
- 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect',
- 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable',
- 'val', 'when', 'with', 'not'
- )
- decl_keywords = ('let', 'rec')
- assume_keywords = ('assume', 'admit', 'assert', 'calc')
- keyopts = (
- r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'\(\|', r'\|\)', r'#', r'u#',
- r'&', r'\(', r'\)', r'\(\)', r',', r'~>', r'->', r'<-', r'<--', r'<==>',
- r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|',
- r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{',
- r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$'
- )
- operators = r'[!$%&*+\./:<=>?@^|~-]'
- prefix_syms = r'[!?~]'
- infix_syms = r'[=<>@^|&+\*/$%-]'
- primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
- tokens = {
- 'escape-sequence': [
- (r'\\[\\"\'ntbr]', String.Escape),
- (r'\\[0-9]{3}', String.Escape),
- (r'\\x[0-9a-fA-F]{2}', String.Escape),
- ],
- 'root': [
- (r'\s+', Text),
- (r'false|true|False|True|\(\)|\[\]', Name.Builtin.Pseudo),
- (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
- (r'\b([A-Z][\w\']*)', Name.Class),
- (r'\(\*(?![)])', Comment, 'comment'),
- (r'^\/\/.+$', Comment),
- (r'\b(%s)\b' % '|'.join(keywords), Keyword),
- (r'\b(%s)\b' % '|'.join(assume_keywords), Name.Exception),
- (r'\b(%s)\b' % '|'.join(decl_keywords), Keyword.Declaration),
- (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
- (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
- (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
- (r"[^\W\d][\w']*", Name),
- (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
- (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
- (r'0[oO][0-7][0-7_]*', Number.Oct),
- (r'0[bB][01][01_]*', Number.Bin),
- (r'\d[\d_]*', Number.Integer),
- (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
- String.Char),
- (r"'.'", String.Char),
- (r"'", Keyword), # a stray quote is another syntax element
- (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications
- (r"\`", Keyword), # for quoting
- (r'"', String.Double, 'string'),
- (r'[~?][a-z][\w\']*:', Name.Variable),
- ],
- 'comment': [
- (r'[^(*)]+', Comment),
- (r'\(\*', Comment, '#push'),
- (r'\*\)', Comment, '#pop'),
- (r'[(*)]', Comment),
- ],
- 'string': [
- (r'[^\\"]+', String.Double),
- include('escape-sequence'),
- (r'\\\n', String.Double),
- (r'"', String.Double, '#pop'),
- ],
- 'dotted': [
- (r'\s+', Text),
- (r'\.', Punctuation),
- (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
- (r'[A-Z][\w\']*', Name.Class, '#pop'),
- (r'[a-z_][\w\']*', Name, '#pop'),
- default('#pop'),
- ],
- }
|