ml.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957
  1. """
  2. pygments.lexers.ml
  3. ~~~~~~~~~~~~~~~~~~
  4. Lexers for ML family languages.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, bygroups, default, words
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation, Error
  12. __all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer', 'ReasonLexer', 'FStarLexer']
  13. class SMLLexer(RegexLexer):
  14. """
  15. For the Standard ML language.
  16. .. versionadded:: 1.5
  17. """
  18. name = 'Standard ML'
  19. aliases = ['sml']
  20. filenames = ['*.sml', '*.sig', '*.fun']
  21. mimetypes = ['text/x-standardml', 'application/x-standardml']
  22. alphanumid_reserved = {
  23. # Core
  24. 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
  25. 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
  26. 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
  27. 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
  28. # Modules
  29. 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
  30. 'struct', 'structure', 'where',
  31. }
  32. symbolicid_reserved = {
  33. # Core
  34. ':', r'\|', '=', '=>', '->', '#',
  35. # Modules
  36. ':>',
  37. }
  38. nonid_reserved = {'(', ')', '[', ']', '{', '}', ',', ';', '...', '_'}
  39. alphanumid_re = r"[a-zA-Z][\w']*"
  40. symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
  41. # A character constant is a sequence of the form #s, where s is a string
  42. # constant denoting a string of size one character. This setup just parses
  43. # the entire string as either a String.Double or a String.Char (depending
  44. # on the argument), even if the String.Char is an erronous
  45. # multiple-character string.
  46. def stringy(whatkind):
  47. return [
  48. (r'[^"\\]', whatkind),
  49. (r'\\[\\"abtnvfr]', String.Escape),
  50. # Control-character notation is used for codes < 32,
  51. # where \^@ == \000
  52. (r'\\\^[\x40-\x5e]', String.Escape),
  53. # Docs say 'decimal digits'
  54. (r'\\[0-9]{3}', String.Escape),
  55. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  56. (r'\\\s+\\', String.Interpol),
  57. (r'"', whatkind, '#pop'),
  58. ]
  59. # Callbacks for distinguishing tokens and reserved words
  60. def long_id_callback(self, match):
  61. if match.group(1) in self.alphanumid_reserved:
  62. token = Error
  63. else:
  64. token = Name.Namespace
  65. yield match.start(1), token, match.group(1)
  66. yield match.start(2), Punctuation, match.group(2)
  67. def end_id_callback(self, match):
  68. if match.group(1) in self.alphanumid_reserved:
  69. token = Error
  70. elif match.group(1) in self.symbolicid_reserved:
  71. token = Error
  72. else:
  73. token = Name
  74. yield match.start(1), token, match.group(1)
  75. def id_callback(self, match):
  76. str = match.group(1)
  77. if str in self.alphanumid_reserved:
  78. token = Keyword.Reserved
  79. elif str in self.symbolicid_reserved:
  80. token = Punctuation
  81. else:
  82. token = Name
  83. yield match.start(1), token, str
  84. tokens = {
  85. # Whitespace and comments are (almost) everywhere
  86. 'whitespace': [
  87. (r'\s+', Text),
  88. (r'\(\*', Comment.Multiline, 'comment'),
  89. ],
  90. 'delimiters': [
  91. # This lexer treats these delimiters specially:
  92. # Delimiters define scopes, and the scope is how the meaning of
  93. # the `|' is resolved - is it a case/handle expression, or function
  94. # definition by cases? (This is not how the Definition works, but
  95. # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
  96. (r'\(|\[|\{', Punctuation, 'main'),
  97. (r'\)|\]|\}', Punctuation, '#pop'),
  98. (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
  99. (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
  100. (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
  101. ],
  102. 'core': [
  103. # Punctuation that doesn't overlap symbolic identifiers
  104. (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved),
  105. Punctuation),
  106. # Special constants: strings, floats, numbers in decimal and hex
  107. (r'#"', String.Char, 'char'),
  108. (r'"', String.Double, 'string'),
  109. (r'~?0x[0-9a-fA-F]+', Number.Hex),
  110. (r'0wx[0-9a-fA-F]+', Number.Hex),
  111. (r'0w\d+', Number.Integer),
  112. (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
  113. (r'~?\d+\.\d+', Number.Float),
  114. (r'~?\d+[eE]~?\d+', Number.Float),
  115. (r'~?\d+', Number.Integer),
  116. # Labels
  117. (r'#\s*[1-9][0-9]*', Name.Label),
  118. (r'#\s*(%s)' % alphanumid_re, Name.Label),
  119. (r'#\s+(%s)' % symbolicid_re, Name.Label),
  120. # Some reserved words trigger a special, local lexer state change
  121. (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
  122. (r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'),
  123. (r'\b(functor|include|open|signature|structure)\b(?!\')',
  124. Keyword.Reserved, 'sname'),
  125. (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
  126. # Regular identifiers, long and otherwise
  127. (r'\'[\w\']*', Name.Decorator),
  128. (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
  129. (r'(%s)' % alphanumid_re, id_callback),
  130. (r'(%s)' % symbolicid_re, id_callback),
  131. ],
  132. 'dotted': [
  133. (r'(%s)(\.)' % alphanumid_re, long_id_callback),
  134. (r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
  135. (r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
  136. (r'\s+', Error),
  137. (r'\S+', Error),
  138. ],
  139. # Main parser (prevents errors in files that have scoping errors)
  140. 'root': [
  141. default('main')
  142. ],
  143. # In this scope, I expect '|' to not be followed by a function name,
  144. # and I expect 'and' to be followed by a binding site
  145. 'main': [
  146. include('whitespace'),
  147. # Special behavior of val/and/fun
  148. (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
  149. (r'\b(fun)\b(?!\')', Keyword.Reserved,
  150. ('#pop', 'main-fun', 'fname')),
  151. include('delimiters'),
  152. include('core'),
  153. (r'\S+', Error),
  154. ],
  155. # In this scope, I expect '|' and 'and' to be followed by a function
  156. 'main-fun': [
  157. include('whitespace'),
  158. (r'\s', Text),
  159. (r'\(\*', Comment.Multiline, 'comment'),
  160. # Special behavior of val/and/fun
  161. (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
  162. (r'\b(val)\b(?!\')', Keyword.Reserved,
  163. ('#pop', 'main', 'vname')),
  164. # Special behavior of '|' and '|'-manipulating keywords
  165. (r'\|', Punctuation, 'fname'),
  166. (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
  167. ('#pop', 'main')),
  168. include('delimiters'),
  169. include('core'),
  170. (r'\S+', Error),
  171. ],
  172. # Character and string parsers
  173. 'char': stringy(String.Char),
  174. 'string': stringy(String.Double),
  175. 'breakout': [
  176. (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
  177. ],
  178. # Dealing with what comes after module system keywords
  179. 'sname': [
  180. include('whitespace'),
  181. include('breakout'),
  182. (r'(%s)' % alphanumid_re, Name.Namespace),
  183. default('#pop'),
  184. ],
  185. # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
  186. 'fname': [
  187. include('whitespace'),
  188. (r'\'[\w\']*', Name.Decorator),
  189. (r'\(', Punctuation, 'tyvarseq'),
  190. (r'(%s)' % alphanumid_re, Name.Function, '#pop'),
  191. (r'(%s)' % symbolicid_re, Name.Function, '#pop'),
  192. # Ignore interesting function declarations like "fun (x + y) = ..."
  193. default('#pop'),
  194. ],
  195. # Dealing with what comes after the 'val' (or 'and') keyword
  196. 'vname': [
  197. include('whitespace'),
  198. (r'\'[\w\']*', Name.Decorator),
  199. (r'\(', Punctuation, 'tyvarseq'),
  200. (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
  201. bygroups(Name.Variable, Text, Punctuation), '#pop'),
  202. (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
  203. bygroups(Name.Variable, Text, Punctuation), '#pop'),
  204. (r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
  205. (r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
  206. # Ignore interesting patterns like 'val (x, y)'
  207. default('#pop'),
  208. ],
  209. # Dealing with what comes after the 'type' (or 'and') keyword
  210. 'tname': [
  211. include('whitespace'),
  212. include('breakout'),
  213. (r'\'[\w\']*', Name.Decorator),
  214. (r'\(', Punctuation, 'tyvarseq'),
  215. (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
  216. (r'(%s)' % alphanumid_re, Keyword.Type),
  217. (r'(%s)' % symbolicid_re, Keyword.Type),
  218. (r'\S+', Error, '#pop'),
  219. ],
  220. # A type binding includes most identifiers
  221. 'typbind': [
  222. include('whitespace'),
  223. (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
  224. include('breakout'),
  225. include('core'),
  226. (r'\S+', Error, '#pop'),
  227. ],
  228. # Dealing with what comes after the 'datatype' (or 'and') keyword
  229. 'dname': [
  230. include('whitespace'),
  231. include('breakout'),
  232. (r'\'[\w\']*', Name.Decorator),
  233. (r'\(', Punctuation, 'tyvarseq'),
  234. (r'(=)(\s*)(datatype)',
  235. bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
  236. (r'=(?!%s)' % symbolicid_re, Punctuation,
  237. ('#pop', 'datbind', 'datcon')),
  238. (r'(%s)' % alphanumid_re, Keyword.Type),
  239. (r'(%s)' % symbolicid_re, Keyword.Type),
  240. (r'\S+', Error, '#pop'),
  241. ],
  242. # common case - A | B | C of int
  243. 'datbind': [
  244. include('whitespace'),
  245. (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
  246. (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
  247. (r'\b(of)\b(?!\')', Keyword.Reserved),
  248. (r'(\|)(\s*)(%s)' % alphanumid_re,
  249. bygroups(Punctuation, Text, Name.Class)),
  250. (r'(\|)(\s+)(%s)' % symbolicid_re,
  251. bygroups(Punctuation, Text, Name.Class)),
  252. include('breakout'),
  253. include('core'),
  254. (r'\S+', Error),
  255. ],
  256. # Dealing with what comes after an exception
  257. 'ename': [
  258. include('whitespace'),
  259. (r'(and\b)(\s+)(%s)' % alphanumid_re,
  260. bygroups(Keyword.Reserved, Text, Name.Class)),
  261. (r'(and\b)(\s*)(%s)' % symbolicid_re,
  262. bygroups(Keyword.Reserved, Text, Name.Class)),
  263. (r'\b(of)\b(?!\')', Keyword.Reserved),
  264. (r'(%s)|(%s)' % (alphanumid_re, symbolicid_re), Name.Class),
  265. default('#pop'),
  266. ],
  267. 'datcon': [
  268. include('whitespace'),
  269. (r'(%s)' % alphanumid_re, Name.Class, '#pop'),
  270. (r'(%s)' % symbolicid_re, Name.Class, '#pop'),
  271. (r'\S+', Error, '#pop'),
  272. ],
  273. # Series of type variables
  274. 'tyvarseq': [
  275. (r'\s', Text),
  276. (r'\(\*', Comment.Multiline, 'comment'),
  277. (r'\'[\w\']*', Name.Decorator),
  278. (alphanumid_re, Name),
  279. (r',', Punctuation),
  280. (r'\)', Punctuation, '#pop'),
  281. (symbolicid_re, Name),
  282. ],
  283. 'comment': [
  284. (r'[^(*)]', Comment.Multiline),
  285. (r'\(\*', Comment.Multiline, '#push'),
  286. (r'\*\)', Comment.Multiline, '#pop'),
  287. (r'[(*)]', Comment.Multiline),
  288. ],
  289. }
  290. class OcamlLexer(RegexLexer):
  291. """
  292. For the OCaml language.
  293. .. versionadded:: 0.7
  294. """
  295. name = 'OCaml'
  296. aliases = ['ocaml']
  297. filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
  298. mimetypes = ['text/x-ocaml']
  299. keywords = (
  300. 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
  301. 'downto', 'else', 'end', 'exception', 'external', 'false',
  302. 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
  303. 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
  304. 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
  305. 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
  306. 'type', 'value', 'val', 'virtual', 'when', 'while', 'with',
  307. )
  308. keyopts = (
  309. '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
  310. r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
  311. '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
  312. r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'
  313. )
  314. operators = r'[!$%&*+\./:<=>?@^|~-]'
  315. word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
  316. prefix_syms = r'[!?~]'
  317. infix_syms = r'[=<>@^|&+\*/$%-]'
  318. primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
  319. tokens = {
  320. 'escape-sequence': [
  321. (r'\\[\\"\'ntbr]', String.Escape),
  322. (r'\\[0-9]{3}', String.Escape),
  323. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  324. ],
  325. 'root': [
  326. (r'\s+', Text),
  327. (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
  328. (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
  329. (r'\b([A-Z][\w\']*)', Name.Class),
  330. (r'\(\*(?![)])', Comment, 'comment'),
  331. (r'\b(%s)\b' % '|'.join(keywords), Keyword),
  332. (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
  333. (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
  334. (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
  335. (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
  336. (r"[^\W\d][\w']*", Name),
  337. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  338. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  339. (r'0[oO][0-7][0-7_]*', Number.Oct),
  340. (r'0[bB][01][01_]*', Number.Bin),
  341. (r'\d[\d_]*', Number.Integer),
  342. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  343. String.Char),
  344. (r"'.'", String.Char),
  345. (r"'", Keyword), # a stray quote is another syntax element
  346. (r'"', String.Double, 'string'),
  347. (r'[~?][a-z][\w\']*:', Name.Variable),
  348. ],
  349. 'comment': [
  350. (r'[^(*)]+', Comment),
  351. (r'\(\*', Comment, '#push'),
  352. (r'\*\)', Comment, '#pop'),
  353. (r'[(*)]', Comment),
  354. ],
  355. 'string': [
  356. (r'[^\\"]+', String.Double),
  357. include('escape-sequence'),
  358. (r'\\\n', String.Double),
  359. (r'"', String.Double, '#pop'),
  360. ],
  361. 'dotted': [
  362. (r'\s+', Text),
  363. (r'\.', Punctuation),
  364. (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
  365. (r'[A-Z][\w\']*', Name.Class, '#pop'),
  366. (r'[a-z_][\w\']*', Name, '#pop'),
  367. default('#pop'),
  368. ],
  369. }
  370. class OpaLexer(RegexLexer):
  371. """
  372. Lexer for the Opa language (http://opalang.org).
  373. .. versionadded:: 1.5
  374. """
  375. name = 'Opa'
  376. aliases = ['opa']
  377. filenames = ['*.opa']
  378. mimetypes = ['text/x-opa']
  379. # most of these aren't strictly keywords
  380. # but if you color only real keywords, you might just
  381. # as well not color anything
  382. keywords = (
  383. 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',
  384. 'else', 'end', 'external', 'forall', 'function', 'if', 'import',
  385. 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',
  386. 'type', 'val', 'with', 'xml_parser',
  387. )
  388. # matches both stuff and `stuff`
  389. ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
  390. op_re = r'[.=\-<>,@~%/+?*&^!]'
  391. punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
  392. # because they are also used for inserts
  393. tokens = {
  394. # copied from the caml lexer, should be adapted
  395. 'escape-sequence': [
  396. (r'\\[\\"\'ntr}]', String.Escape),
  397. (r'\\[0-9]{3}', String.Escape),
  398. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  399. ],
  400. # factorizing these rules, because they are inserted many times
  401. 'comments': [
  402. (r'/\*', Comment, 'nested-comment'),
  403. (r'//.*?$', Comment),
  404. ],
  405. 'comments-and-spaces': [
  406. include('comments'),
  407. (r'\s+', Text),
  408. ],
  409. 'root': [
  410. include('comments-and-spaces'),
  411. # keywords
  412. (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
  413. # directives
  414. # we could parse the actual set of directives instead of anything
  415. # starting with @, but this is troublesome
  416. # because it needs to be adjusted all the time
  417. # and assuming we parse only sources that compile, it is useless
  418. (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),
  419. # number literals
  420. (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
  421. (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
  422. (r'-?\d+[eE][+\-]?\d+', Number.Float),
  423. (r'0[xX][\da-fA-F]+', Number.Hex),
  424. (r'0[oO][0-7]+', Number.Oct),
  425. (r'0[bB][01]+', Number.Bin),
  426. (r'\d+', Number.Integer),
  427. # color literals
  428. (r'#[\da-fA-F]{3,6}', Number.Integer),
  429. # string literals
  430. (r'"', String.Double, 'string'),
  431. # char literal, should be checked because this is the regexp from
  432. # the caml lexer
  433. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
  434. String.Char),
  435. # this is meant to deal with embedded exprs in strings
  436. # every time we find a '}' we pop a state so that if we were
  437. # inside a string, we are back in the string state
  438. # as a consequence, we must also push a state every time we find a
  439. # '{' or else we will have errors when parsing {} for instance
  440. (r'\{', Operator, '#push'),
  441. (r'\}', Operator, '#pop'),
  442. # html literals
  443. # this is a much more strict that the actual parser,
  444. # since a<b would not be parsed as html
  445. # but then again, the parser is way too lax, and we can't hope
  446. # to have something as tolerant
  447. (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
  448. # db path
  449. # matching the '[_]' in '/a[_]' because it is a part
  450. # of the syntax of the db path definition
  451. # unfortunately, i don't know how to match the ']' in
  452. # /a[1], so this is somewhat inconsistent
  453. (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
  454. # putting the same color on <- as on db path, since
  455. # it can be used only to mean Db.write
  456. (r'<-(?!'+op_re+r')', Name.Variable),
  457. # 'modules'
  458. # although modules are not distinguished by their names as in caml
  459. # the standard library seems to follow the convention that modules
  460. # only area capitalized
  461. (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
  462. # operators
  463. # = has a special role because this is the only
  464. # way to syntactic distinguish binding constructions
  465. # unfortunately, this colors the equal in {x=2} too
  466. (r'=(?!'+op_re+r')', Keyword),
  467. (r'(%s)+' % op_re, Operator),
  468. (r'(%s)+' % punc_re, Operator),
  469. # coercions
  470. (r':', Operator, 'type'),
  471. # type variables
  472. # we need this rule because we don't parse specially type
  473. # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
  474. ("'"+ident_re, Keyword.Type),
  475. # id literal, #something, or #{expr}
  476. (r'#'+ident_re, String.Single),
  477. (r'#(?=\{)', String.Single),
  478. # identifiers
  479. # this avoids to color '2' in 'a2' as an integer
  480. (ident_re, Text),
  481. # default, not sure if that is needed or not
  482. # (r'.', Text),
  483. ],
  484. # it is quite painful to have to parse types to know where they end
  485. # this is the general rule for a type
  486. # a type is either:
  487. # * -> ty
  488. # * type-with-slash
  489. # * type-with-slash -> ty
  490. # * type-with-slash (, type-with-slash)+ -> ty
  491. #
  492. # the code is pretty funky in here, but this code would roughly
  493. # translate in caml to:
  494. # let rec type stream =
  495. # match stream with
  496. # | [< "->"; stream >] -> type stream
  497. # | [< ""; stream >] ->
  498. # type_with_slash stream
  499. # type_lhs_1 stream;
  500. # and type_1 stream = ...
  501. 'type': [
  502. include('comments-and-spaces'),
  503. (r'->', Keyword.Type),
  504. default(('#pop', 'type-lhs-1', 'type-with-slash')),
  505. ],
  506. # parses all the atomic or closed constructions in the syntax of type
  507. # expressions: record types, tuple types, type constructors, basic type
  508. # and type variables
  509. 'type-1': [
  510. include('comments-and-spaces'),
  511. (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
  512. (r'~?\{', Keyword.Type, ('#pop', 'type-record')),
  513. (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
  514. (ident_re, Keyword.Type, '#pop'),
  515. ("'"+ident_re, Keyword.Type),
  516. # this case is not in the syntax but sometimes
  517. # we think we are parsing types when in fact we are parsing
  518. # some css, so we just pop the states until we get back into
  519. # the root state
  520. default('#pop'),
  521. ],
  522. # type-with-slash is either:
  523. # * type-1
  524. # * type-1 (/ type-1)+
  525. 'type-with-slash': [
  526. include('comments-and-spaces'),
  527. default(('#pop', 'slash-type-1', 'type-1')),
  528. ],
  529. 'slash-type-1': [
  530. include('comments-and-spaces'),
  531. ('/', Keyword.Type, ('#pop', 'type-1')),
  532. # same remark as above
  533. default('#pop'),
  534. ],
  535. # we go in this state after having parsed a type-with-slash
  536. # while trying to parse a type
  537. # and at this point we must determine if we are parsing an arrow
  538. # type (in which case we must continue parsing) or not (in which
  539. # case we stop)
  540. 'type-lhs-1': [
  541. include('comments-and-spaces'),
  542. (r'->', Keyword.Type, ('#pop', 'type')),
  543. (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
  544. default('#pop'),
  545. ],
  546. 'type-arrow': [
  547. include('comments-and-spaces'),
  548. # the look ahead here allows to parse f(x : int, y : float -> truc)
  549. # correctly
  550. (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
  551. (r'->', Keyword.Type, ('#pop', 'type')),
  552. # same remark as above
  553. default('#pop'),
  554. ],
  555. # no need to do precise parsing for tuples and records
  556. # because they are closed constructions, so we can simply
  557. # find the closing delimiter
  558. # note that this function would be not work if the source
  559. # contained identifiers like `{)` (although it could be patched
  560. # to support it)
  561. 'type-tuple': [
  562. include('comments-and-spaces'),
  563. (r'[^()/*]+', Keyword.Type),
  564. (r'[/*]', Keyword.Type),
  565. (r'\(', Keyword.Type, '#push'),
  566. (r'\)', Keyword.Type, '#pop'),
  567. ],
  568. 'type-record': [
  569. include('comments-and-spaces'),
  570. (r'[^{}/*]+', Keyword.Type),
  571. (r'[/*]', Keyword.Type),
  572. (r'\{', Keyword.Type, '#push'),
  573. (r'\}', Keyword.Type, '#pop'),
  574. ],
  575. # 'type-tuple': [
  576. # include('comments-and-spaces'),
  577. # (r'\)', Keyword.Type, '#pop'),
  578. # default(('#pop', 'type-tuple-1', 'type-1')),
  579. # ],
  580. # 'type-tuple-1': [
  581. # include('comments-and-spaces'),
  582. # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
  583. # (r',', Keyword.Type, 'type-1'),
  584. # ],
  585. # 'type-record':[
  586. # include('comments-and-spaces'),
  587. # (r'\}', Keyword.Type, '#pop'),
  588. # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
  589. # ],
  590. # 'type-record-field-expr': [
  591. #
  592. # ],
  593. 'nested-comment': [
  594. (r'[^/*]+', Comment),
  595. (r'/\*', Comment, '#push'),
  596. (r'\*/', Comment, '#pop'),
  597. (r'[/*]', Comment),
  598. ],
  599. # the copy pasting between string and single-string
  600. # is kinda sad. Is there a way to avoid that??
  601. 'string': [
  602. (r'[^\\"{]+', String.Double),
  603. (r'"', String.Double, '#pop'),
  604. (r'\{', Operator, 'root'),
  605. include('escape-sequence'),
  606. ],
  607. 'single-string': [
  608. (r'[^\\\'{]+', String.Double),
  609. (r'\'', String.Double, '#pop'),
  610. (r'\{', Operator, 'root'),
  611. include('escape-sequence'),
  612. ],
  613. # all the html stuff
  614. # can't really reuse some existing html parser
  615. # because we must be able to parse embedded expressions
  616. # we are in this state after someone parsed the '<' that
  617. # started the html literal
  618. 'html-open-tag': [
  619. (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
  620. (r'>', String.Single, ('#pop', 'html-content')),
  621. ],
  622. # we are in this state after someone parsed the '</' that
  623. # started the end of the closing tag
  624. 'html-end-tag': [
  625. # this is a star, because </> is allowed
  626. (r'[\w\-:]*>', String.Single, '#pop'),
  627. ],
  628. # we are in this state after having parsed '<ident(:ident)?'
  629. # we thus parse a possibly empty list of attributes
  630. 'html-attr': [
  631. (r'\s+', Text),
  632. (r'[\w\-:]+=', String.Single, 'html-attr-value'),
  633. (r'/>', String.Single, '#pop'),
  634. (r'>', String.Single, ('#pop', 'html-content')),
  635. ],
  636. 'html-attr-value': [
  637. (r"'", String.Single, ('#pop', 'single-string')),
  638. (r'"', String.Single, ('#pop', 'string')),
  639. (r'#'+ident_re, String.Single, '#pop'),
  640. (r'#(?=\{)', String.Single, ('#pop', 'root')),
  641. (r'[^"\'{`=<>]+', String.Single, '#pop'),
  642. (r'\{', Operator, ('#pop', 'root')), # this is a tail call!
  643. ],
  644. # we should probably deal with '\' escapes here
  645. 'html-content': [
  646. (r'<!--', Comment, 'html-comment'),
  647. (r'</', String.Single, ('#pop', 'html-end-tag')),
  648. (r'<', String.Single, 'html-open-tag'),
  649. (r'\{', Operator, 'root'),
  650. (r'[^<{]+', String.Single),
  651. ],
  652. 'html-comment': [
  653. (r'-->', Comment, '#pop'),
  654. (r'[^\-]+|-', Comment),
  655. ],
  656. }
  657. class ReasonLexer(RegexLexer):
  658. """
  659. For the ReasonML language (https://reasonml.github.io/).
  660. .. versionadded:: 2.6
  661. """
  662. name = 'ReasonML'
  663. aliases = ['reasonml', 'reason']
  664. filenames = ['*.re', '*.rei']
  665. mimetypes = ['text/x-reasonml']
  666. keywords = (
  667. 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', 'downto',
  668. 'else', 'end', 'exception', 'external', 'false', 'for', 'fun', 'esfun',
  669. 'function', 'functor', 'if', 'in', 'include', 'inherit', 'initializer', 'lazy',
  670. 'let', 'switch', 'module', 'pub', 'mutable', 'new', 'nonrec', 'object', 'of',
  671. 'open', 'pri', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
  672. 'type', 'val', 'virtual', 'when', 'while', 'with',
  673. )
  674. keyopts = (
  675. '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
  676. r'-\.', '=>', r'\.', r'\.\.', r'\.\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
  677. '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
  678. r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|\|', r'\|]', r'\}', '~'
  679. )
  680. operators = r'[!$%&*+\./:<=>?@^|~-]'
  681. word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lsr', 'lxor', 'mod', 'or')
  682. prefix_syms = r'[!?~]'
  683. infix_syms = r'[=<>@^|&+\*/$%-]'
  684. primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
  685. tokens = {
  686. 'escape-sequence': [
  687. (r'\\[\\"\'ntbr]', String.Escape),
  688. (r'\\[0-9]{3}', String.Escape),
  689. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  690. ],
  691. 'root': [
  692. (r'\s+', Text),
  693. (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
  694. (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
  695. (r'\b([A-Z][\w\']*)', Name.Class),
  696. (r'//.*?\n', Comment.Single),
  697. (r'\/\*(?!/)', Comment.Multiline, 'comment'),
  698. (r'\b(%s)\b' % '|'.join(keywords), Keyword),
  699. (r'(%s)' % '|'.join(keyopts[::-1]), Operator.Word),
  700. (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
  701. (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
  702. (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
  703. (r"[^\W\d][\w']*", Name),
  704. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  705. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  706. (r'0[oO][0-7][0-7_]*', Number.Oct),
  707. (r'0[bB][01][01_]*', Number.Bin),
  708. (r'\d[\d_]*', Number.Integer),
  709. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  710. String.Char),
  711. (r"'.'", String.Char),
  712. (r"'", Keyword),
  713. (r'"', String.Double, 'string'),
  714. (r'[~?][a-z][\w\']*:', Name.Variable),
  715. ],
  716. 'comment': [
  717. (r'[^/*]+', Comment.Multiline),
  718. (r'\/\*', Comment.Multiline, '#push'),
  719. (r'\*\/', Comment.Multiline, '#pop'),
  720. (r'\*', Comment.Multiline),
  721. ],
  722. 'string': [
  723. (r'[^\\"]+', String.Double),
  724. include('escape-sequence'),
  725. (r'\\\n', String.Double),
  726. (r'"', String.Double, '#pop'),
  727. ],
  728. 'dotted': [
  729. (r'\s+', Text),
  730. (r'\.', Punctuation),
  731. (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
  732. (r'[A-Z][\w\']*', Name.Class, '#pop'),
  733. (r'[a-z_][\w\']*', Name, '#pop'),
  734. default('#pop'),
  735. ],
  736. }
  737. class FStarLexer(RegexLexer):
  738. """
  739. For the F* language (https://www.fstar-lang.org/).
  740. .. versionadded:: 2.7
  741. """
  742. name = 'FStar'
  743. aliases = ['fstar']
  744. filenames = ['*.fst', '*.fsti']
  745. mimetypes = ['text/x-fstar']
  746. keywords = (
  747. 'abstract', 'attributes', 'noeq', 'unopteq', 'and'
  748. 'begin', 'by', 'default', 'effect', 'else', 'end', 'ensures',
  749. 'exception', 'exists', 'false', 'forall', 'fun', 'function', 'if',
  750. 'in', 'include', 'inline', 'inline_for_extraction', 'irreducible',
  751. 'logic', 'match', 'module', 'mutable', 'new', 'new_effect', 'noextract',
  752. 'of', 'open', 'opaque', 'private', 'range_of', 'reifiable',
  753. 'reify', 'reflectable', 'requires', 'set_range_of', 'sub_effect',
  754. 'synth', 'then', 'total', 'true', 'try', 'type', 'unfold', 'unfoldable',
  755. 'val', 'when', 'with', 'not'
  756. )
  757. decl_keywords = ('let', 'rec')
  758. assume_keywords = ('assume', 'admit', 'assert', 'calc')
  759. keyopts = (
  760. r'~', r'-', r'/\\', r'\\/', r'<:', r'<@', r'\(\|', r'\|\)', r'#', r'u#',
  761. r'&', r'\(', r'\)', r'\(\)', r',', r'~>', r'->', r'<-', r'<--', r'<==>',
  762. r'==>', r'\.', r'\?', r'\?\.', r'\.\[', r'\.\(', r'\.\(\|', r'\.\[\|',
  763. r'\{:pattern', r':', r'::', r':=', r';', r';;', r'=', r'%\[', r'!\{',
  764. r'\[', r'\[@', r'\[\|', r'\|>', r'\]', r'\|\]', r'\{', r'\|', r'\}', r'\$'
  765. )
  766. operators = r'[!$%&*+\./:<=>?@^|~-]'
  767. prefix_syms = r'[!?~]'
  768. infix_syms = r'[=<>@^|&+\*/$%-]'
  769. primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
  770. tokens = {
  771. 'escape-sequence': [
  772. (r'\\[\\"\'ntbr]', String.Escape),
  773. (r'\\[0-9]{3}', String.Escape),
  774. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  775. ],
  776. 'root': [
  777. (r'\s+', Text),
  778. (r'false|true|False|True|\(\)|\[\]', Name.Builtin.Pseudo),
  779. (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
  780. (r'\b([A-Z][\w\']*)', Name.Class),
  781. (r'\(\*(?![)])', Comment, 'comment'),
  782. (r'^\/\/.+$', Comment),
  783. (r'\b(%s)\b' % '|'.join(keywords), Keyword),
  784. (r'\b(%s)\b' % '|'.join(assume_keywords), Name.Exception),
  785. (r'\b(%s)\b' % '|'.join(decl_keywords), Keyword.Declaration),
  786. (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
  787. (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
  788. (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
  789. (r"[^\W\d][\w']*", Name),
  790. (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
  791. (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
  792. (r'0[oO][0-7][0-7_]*', Number.Oct),
  793. (r'0[bB][01][01_]*', Number.Bin),
  794. (r'\d[\d_]*', Number.Integer),
  795. (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
  796. String.Char),
  797. (r"'.'", String.Char),
  798. (r"'", Keyword), # a stray quote is another syntax element
  799. (r"\`([\w\'.]+)\`", Operator.Word), # for infix applications
  800. (r"\`", Keyword), # for quoting
  801. (r'"', String.Double, 'string'),
  802. (r'[~?][a-z][\w\']*:', Name.Variable),
  803. ],
  804. 'comment': [
  805. (r'[^(*)]+', Comment),
  806. (r'\(\*', Comment, '#push'),
  807. (r'\*\)', Comment, '#pop'),
  808. (r'[(*)]', Comment),
  809. ],
  810. 'string': [
  811. (r'[^\\"]+', String.Double),
  812. include('escape-sequence'),
  813. (r'\\\n', String.Double),
  814. (r'"', String.Double, '#pop'),
  815. ],
  816. 'dotted': [
  817. (r'\s+', Text),
  818. (r'\.', Punctuation),
  819. (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
  820. (r'[A-Z][\w\']*', Name.Class, '#pop'),
  821. (r'[a-z_][\w\']*', Name, '#pop'),
  822. default('#pop'),
  823. ],
  824. }