haskell.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869
  1. """
  2. pygments.lexers.haskell
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for Haskell and related languages.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
  10. default, include, inherit
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  12. Number, Punctuation, Generic
  13. from pygments import unistring as uni
  14. __all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
  15. 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
  16. 'LiterateCryptolLexer', 'KokaLexer']
  17. line_re = re.compile('.*?\n')
  18. class HaskellLexer(RegexLexer):
  19. """
  20. A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
  21. .. versionadded:: 0.8
  22. """
  23. name = 'Haskell'
  24. aliases = ['haskell', 'hs']
  25. filenames = ['*.hs']
  26. mimetypes = ['text/x-haskell']
  27. flags = re.MULTILINE | re.UNICODE
  28. reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
  29. 'family', 'if', 'in', 'infix[lr]?', 'instance',
  30. 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
  31. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  32. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  33. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  34. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  35. tokens = {
  36. 'root': [
  37. # Whitespace:
  38. (r'\s+', Text),
  39. # (r'--\s*|.*$', Comment.Doc),
  40. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  41. (r'\{-', Comment.Multiline, 'comment'),
  42. # Lexemes:
  43. # Identifiers
  44. (r'\bimport\b', Keyword.Reserved, 'import'),
  45. (r'\bmodule\b', Keyword.Reserved, 'module'),
  46. (r'\berror\b', Name.Exception),
  47. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  48. (r"'[^\\]'", String.Char), # this has to come before the TH quote
  49. (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
  50. (r"'?[_" + uni.Ll + r"][\w']*", Name),
  51. (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
  52. (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
  53. (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
  54. (r"(')\([^)]*\)", Keyword.Type), # ..
  55. (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators
  56. # Operators
  57. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  58. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  59. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  60. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  61. # Numbers
  62. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
  63. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
  64. r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
  65. (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
  66. (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
  67. (r'0[bB]_*[01](_*[01])*', Number.Bin),
  68. (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
  69. (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
  70. (r'\d(_*\d)*', Number.Integer),
  71. # Character/String Literals
  72. (r"'", String.Char, 'character'),
  73. (r'"', String, 'string'),
  74. # Special
  75. (r'\[\]', Keyword.Type),
  76. (r'\(\)', Name.Builtin),
  77. (r'[][(),;`{}]', Punctuation),
  78. ],
  79. 'import': [
  80. # Import statements
  81. (r'\s+', Text),
  82. (r'"', String, 'string'),
  83. # after "funclist" state
  84. (r'\)', Punctuation, '#pop'),
  85. (r'qualified\b', Keyword),
  86. # import X as Y
  87. (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
  88. bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
  89. # import X hiding (functions)
  90. (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
  91. bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
  92. # import X (functions)
  93. (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
  94. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  95. # import X
  96. (r'[\w.]+', Name.Namespace, '#pop'),
  97. ],
  98. 'module': [
  99. (r'\s+', Text),
  100. (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
  101. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  102. (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
  103. ],
  104. 'funclist': [
  105. (r'\s+', Text),
  106. (r'[' + uni.Lu + r']\w*', Keyword.Type),
  107. (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
  108. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  109. (r'\{-', Comment.Multiline, 'comment'),
  110. (r',', Punctuation),
  111. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  112. # (HACK, but it makes sense to push two instances, believe me)
  113. (r'\(', Punctuation, ('funclist', 'funclist')),
  114. (r'\)', Punctuation, '#pop:2'),
  115. ],
  116. # NOTE: the next four states are shared in the AgdaLexer; make sure
  117. # any change is compatible with Agda as well or copy over and change
  118. 'comment': [
  119. # Multiline Comments
  120. (r'[^-{}]+', Comment.Multiline),
  121. (r'\{-', Comment.Multiline, '#push'),
  122. (r'-\}', Comment.Multiline, '#pop'),
  123. (r'[-{}]', Comment.Multiline),
  124. ],
  125. 'character': [
  126. # Allows multi-chars, incorrectly.
  127. (r"[^\\']'", String.Char, '#pop'),
  128. (r"\\", String.Escape, 'escape'),
  129. ("'", String.Char, '#pop'),
  130. ],
  131. 'string': [
  132. (r'[^\\"]+', String),
  133. (r"\\", String.Escape, 'escape'),
  134. ('"', String, '#pop'),
  135. ],
  136. 'escape': [
  137. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  138. (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
  139. ('|'.join(ascii), String.Escape, '#pop'),
  140. (r'o[0-7]+', String.Escape, '#pop'),
  141. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  142. (r'\d+', String.Escape, '#pop'),
  143. (r'\s+\\', String.Escape, '#pop'),
  144. ],
  145. }
  146. class HspecLexer(HaskellLexer):
  147. """
  148. A Haskell lexer with support for Hspec constructs.
  149. .. versionadded:: 2.4.0
  150. """
  151. name = 'Hspec'
  152. aliases = ['hspec']
  153. filenames = []
  154. mimetypes = []
  155. tokens = {
  156. 'root': [
  157. (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
  158. (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
  159. (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
  160. inherit,
  161. ],
  162. }
  163. class IdrisLexer(RegexLexer):
  164. """
  165. A lexer for the dependently typed programming language Idris.
  166. Based on the Haskell and Agda Lexer.
  167. .. versionadded:: 2.0
  168. """
  169. name = 'Idris'
  170. aliases = ['idris', 'idr']
  171. filenames = ['*.idr']
  172. mimetypes = ['text/x-idris']
  173. reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
  174. 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
  175. 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
  176. 'total', 'partial',
  177. 'interface', 'implementation', 'export', 'covering', 'constructor',
  178. 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
  179. 'pattern', 'term', 'syntax', 'prefix',
  180. 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
  181. 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
  182. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  183. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  184. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  185. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  186. directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
  187. 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
  188. tokens = {
  189. 'root': [
  190. # Comments
  191. (r'^(\s*)(%%(%s))' % '|'.join(directives),
  192. bygroups(Text, Keyword.Reserved)),
  193. (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
  194. (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
  195. (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
  196. # Declaration
  197. (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
  198. bygroups(Text, Name.Function, Text, Operator.Word, Text)),
  199. # Identifiers
  200. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  201. (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
  202. (r"('')?[A-Z][\w\']*", Keyword.Type),
  203. (r'[a-z][\w\']*', Text),
  204. # Special Symbols
  205. (r'(<-|::|->|=>|=)', Operator.Word), # specials
  206. (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  207. # Numbers
  208. (r'\d+[eE][+-]?\d+', Number.Float),
  209. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  210. (r'0[xX][\da-fA-F]+', Number.Hex),
  211. (r'\d+', Number.Integer),
  212. # Strings
  213. (r"'", String.Char, 'character'),
  214. (r'"', String, 'string'),
  215. (r'[^\s(){}]+', Text),
  216. (r'\s+?', Text), # Whitespace
  217. ],
  218. 'module': [
  219. (r'\s+', Text),
  220. (r'([A-Z][\w.]*)(\s+)(\()',
  221. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  222. (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
  223. ],
  224. 'funclist': [
  225. (r'\s+', Text),
  226. (r'[A-Z]\w*', Keyword.Type),
  227. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  228. (r'--.*$', Comment.Single),
  229. (r'\{-', Comment.Multiline, 'comment'),
  230. (r',', Punctuation),
  231. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  232. # (HACK, but it makes sense to push two instances, believe me)
  233. (r'\(', Punctuation, ('funclist', 'funclist')),
  234. (r'\)', Punctuation, '#pop:2'),
  235. ],
  236. # NOTE: the next four states are shared in the AgdaLexer; make sure
  237. # any change is compatible with Agda as well or copy over and change
  238. 'comment': [
  239. # Multiline Comments
  240. (r'[^-{}]+', Comment.Multiline),
  241. (r'\{-', Comment.Multiline, '#push'),
  242. (r'-\}', Comment.Multiline, '#pop'),
  243. (r'[-{}]', Comment.Multiline),
  244. ],
  245. 'character': [
  246. # Allows multi-chars, incorrectly.
  247. (r"[^\\']", String.Char),
  248. (r"\\", String.Escape, 'escape'),
  249. ("'", String.Char, '#pop'),
  250. ],
  251. 'string': [
  252. (r'[^\\"]+', String),
  253. (r"\\", String.Escape, 'escape'),
  254. ('"', String, '#pop'),
  255. ],
  256. 'escape': [
  257. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  258. (r'\^[][A-Z@^_]', String.Escape, '#pop'),
  259. ('|'.join(ascii), String.Escape, '#pop'),
  260. (r'o[0-7]+', String.Escape, '#pop'),
  261. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  262. (r'\d+', String.Escape, '#pop'),
  263. (r'\s+\\', String.Escape, '#pop')
  264. ],
  265. }
  266. class AgdaLexer(RegexLexer):
  267. """
  268. For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
  269. dependently typed functional programming language and proof assistant.
  270. .. versionadded:: 2.0
  271. """
  272. name = 'Agda'
  273. aliases = ['agda']
  274. filenames = ['*.agda']
  275. mimetypes = ['text/x-agda']
  276. reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
  277. 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
  278. 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
  279. 'pattern', 'postulate', 'primitive', 'private',
  280. 'quote', 'quoteGoal', 'quoteTerm',
  281. 'record', 'renaming', 'rewrite', 'syntax', 'tactic',
  282. 'unquote', 'unquoteDecl', 'using', 'where', 'with']
  283. tokens = {
  284. 'root': [
  285. # Declaration
  286. (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
  287. bygroups(Text, Name.Function, Text, Operator.Word, Text)),
  288. # Comments
  289. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  290. (r'\{-', Comment.Multiline, 'comment'),
  291. # Holes
  292. (r'\{!', Comment.Directive, 'hole'),
  293. # Lexemes:
  294. # Identifiers
  295. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  296. (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
  297. (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
  298. # Special Symbols
  299. (r'(\(|\)|\{|\})', Operator),
  300. (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
  301. # Numbers
  302. (r'\d+[eE][+-]?\d+', Number.Float),
  303. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  304. (r'0[xX][\da-fA-F]+', Number.Hex),
  305. (r'\d+', Number.Integer),
  306. # Strings
  307. (r"'", String.Char, 'character'),
  308. (r'"', String, 'string'),
  309. (r'[^\s(){}]+', Text),
  310. (r'\s+?', Text), # Whitespace
  311. ],
  312. 'hole': [
  313. # Holes
  314. (r'[^!{}]+', Comment.Directive),
  315. (r'\{!', Comment.Directive, '#push'),
  316. (r'!\}', Comment.Directive, '#pop'),
  317. (r'[!{}]', Comment.Directive),
  318. ],
  319. 'module': [
  320. (r'\{-', Comment.Multiline, 'comment'),
  321. (r'[a-zA-Z][\w.]*', Name, '#pop'),
  322. (r'[\W0-9_]+', Text)
  323. ],
  324. 'comment': HaskellLexer.tokens['comment'],
  325. 'character': HaskellLexer.tokens['character'],
  326. 'string': HaskellLexer.tokens['string'],
  327. 'escape': HaskellLexer.tokens['escape']
  328. }
  329. class CryptolLexer(RegexLexer):
  330. """
  331. FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
  332. .. versionadded:: 2.0
  333. """
  334. name = 'Cryptol'
  335. aliases = ['cryptol', 'cry']
  336. filenames = ['*.cry']
  337. mimetypes = ['text/x-cryptol']
  338. reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
  339. 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
  340. 'max', 'min', 'module', 'newtype', 'pragma', 'property',
  341. 'then', 'type', 'where', 'width')
  342. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  343. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  344. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  345. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  346. tokens = {
  347. 'root': [
  348. # Whitespace:
  349. (r'\s+', Text),
  350. # (r'--\s*|.*$', Comment.Doc),
  351. (r'//.*$', Comment.Single),
  352. (r'/\*', Comment.Multiline, 'comment'),
  353. # Lexemes:
  354. # Identifiers
  355. (r'\bimport\b', Keyword.Reserved, 'import'),
  356. (r'\bmodule\b', Keyword.Reserved, 'module'),
  357. (r'\berror\b', Name.Exception),
  358. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  359. (r'^[_a-z][\w\']*', Name.Function),
  360. (r"'?[_a-z][\w']*", Name),
  361. (r"('')?[A-Z][\w\']*", Keyword.Type),
  362. # Operators
  363. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  364. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  365. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  366. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  367. # Numbers
  368. (r'\d+[eE][+-]?\d+', Number.Float),
  369. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  370. (r'0[oO][0-7]+', Number.Oct),
  371. (r'0[xX][\da-fA-F]+', Number.Hex),
  372. (r'\d+', Number.Integer),
  373. # Character/String Literals
  374. (r"'", String.Char, 'character'),
  375. (r'"', String, 'string'),
  376. # Special
  377. (r'\[\]', Keyword.Type),
  378. (r'\(\)', Name.Builtin),
  379. (r'[][(),;`{}]', Punctuation),
  380. ],
  381. 'import': [
  382. # Import statements
  383. (r'\s+', Text),
  384. (r'"', String, 'string'),
  385. # after "funclist" state
  386. (r'\)', Punctuation, '#pop'),
  387. (r'qualified\b', Keyword),
  388. # import X as Y
  389. (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
  390. bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
  391. # import X hiding (functions)
  392. (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
  393. bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
  394. # import X (functions)
  395. (r'([A-Z][\w.]*)(\s+)(\()',
  396. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  397. # import X
  398. (r'[\w.]+', Name.Namespace, '#pop'),
  399. ],
  400. 'module': [
  401. (r'\s+', Text),
  402. (r'([A-Z][\w.]*)(\s+)(\()',
  403. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  404. (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
  405. ],
  406. 'funclist': [
  407. (r'\s+', Text),
  408. (r'[A-Z]\w*', Keyword.Type),
  409. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  410. # TODO: these don't match the comments in docs, remove.
  411. # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  412. # (r'{-', Comment.Multiline, 'comment'),
  413. (r',', Punctuation),
  414. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  415. # (HACK, but it makes sense to push two instances, believe me)
  416. (r'\(', Punctuation, ('funclist', 'funclist')),
  417. (r'\)', Punctuation, '#pop:2'),
  418. ],
  419. 'comment': [
  420. # Multiline Comments
  421. (r'[^/*]+', Comment.Multiline),
  422. (r'/\*', Comment.Multiline, '#push'),
  423. (r'\*/', Comment.Multiline, '#pop'),
  424. (r'[*/]', Comment.Multiline),
  425. ],
  426. 'character': [
  427. # Allows multi-chars, incorrectly.
  428. (r"[^\\']'", String.Char, '#pop'),
  429. (r"\\", String.Escape, 'escape'),
  430. ("'", String.Char, '#pop'),
  431. ],
  432. 'string': [
  433. (r'[^\\"]+', String),
  434. (r"\\", String.Escape, 'escape'),
  435. ('"', String, '#pop'),
  436. ],
  437. 'escape': [
  438. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  439. (r'\^[][A-Z@^_]', String.Escape, '#pop'),
  440. ('|'.join(ascii), String.Escape, '#pop'),
  441. (r'o[0-7]+', String.Escape, '#pop'),
  442. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  443. (r'\d+', String.Escape, '#pop'),
  444. (r'\s+\\', String.Escape, '#pop'),
  445. ],
  446. }
  447. EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
  448. 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
  449. 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
  450. 'trace'}
  451. def get_tokens_unprocessed(self, text):
  452. stack = ['root']
  453. for index, token, value in \
  454. RegexLexer.get_tokens_unprocessed(self, text, stack):
  455. if token is Name and value in self.EXTRA_KEYWORDS:
  456. yield index, Name.Builtin, value
  457. else:
  458. yield index, token, value
  459. class LiterateLexer(Lexer):
  460. """
  461. Base class for lexers of literate file formats based on LaTeX or Bird-style
  462. (prefixing each code line with ">").
  463. Additional options accepted:
  464. `litstyle`
  465. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  466. is autodetected: if the first non-whitespace character in the source
  467. is a backslash or percent character, LaTeX is assumed, else Bird.
  468. """
  469. bird_re = re.compile(r'(>[ \t]*)(.*\n)')
  470. def __init__(self, baselexer, **options):
  471. self.baselexer = baselexer
  472. Lexer.__init__(self, **options)
  473. def get_tokens_unprocessed(self, text):
  474. style = self.options.get('litstyle')
  475. if style is None:
  476. style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
  477. code = ''
  478. insertions = []
  479. if style == 'bird':
  480. # bird-style
  481. for match in line_re.finditer(text):
  482. line = match.group()
  483. m = self.bird_re.match(line)
  484. if m:
  485. insertions.append((len(code),
  486. [(0, Comment.Special, m.group(1))]))
  487. code += m.group(2)
  488. else:
  489. insertions.append((len(code), [(0, Text, line)]))
  490. else:
  491. # latex-style
  492. from pygments.lexers.markup import TexLexer
  493. lxlexer = TexLexer(**self.options)
  494. codelines = 0
  495. latex = ''
  496. for match in line_re.finditer(text):
  497. line = match.group()
  498. if codelines:
  499. if line.lstrip().startswith('\\end{code}'):
  500. codelines = 0
  501. latex += line
  502. else:
  503. code += line
  504. elif line.lstrip().startswith('\\begin{code}'):
  505. codelines = 1
  506. latex += line
  507. insertions.append((len(code),
  508. list(lxlexer.get_tokens_unprocessed(latex))))
  509. latex = ''
  510. else:
  511. latex += line
  512. insertions.append((len(code),
  513. list(lxlexer.get_tokens_unprocessed(latex))))
  514. yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))
  515. class LiterateHaskellLexer(LiterateLexer):
  516. """
  517. For Literate Haskell (Bird-style or LaTeX) source.
  518. Additional options accepted:
  519. `litstyle`
  520. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  521. is autodetected: if the first non-whitespace character in the source
  522. is a backslash or percent character, LaTeX is assumed, else Bird.
  523. .. versionadded:: 0.9
  524. """
  525. name = 'Literate Haskell'
  526. aliases = ['literate-haskell', 'lhaskell', 'lhs']
  527. filenames = ['*.lhs']
  528. mimetypes = ['text/x-literate-haskell']
  529. def __init__(self, **options):
  530. hslexer = HaskellLexer(**options)
  531. LiterateLexer.__init__(self, hslexer, **options)
  532. class LiterateIdrisLexer(LiterateLexer):
  533. """
  534. For Literate Idris (Bird-style or LaTeX) source.
  535. Additional options accepted:
  536. `litstyle`
  537. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  538. is autodetected: if the first non-whitespace character in the source
  539. is a backslash or percent character, LaTeX is assumed, else Bird.
  540. .. versionadded:: 2.0
  541. """
  542. name = 'Literate Idris'
  543. aliases = ['literate-idris', 'lidris', 'lidr']
  544. filenames = ['*.lidr']
  545. mimetypes = ['text/x-literate-idris']
  546. def __init__(self, **options):
  547. hslexer = IdrisLexer(**options)
  548. LiterateLexer.__init__(self, hslexer, **options)
  549. class LiterateAgdaLexer(LiterateLexer):
  550. """
  551. For Literate Agda source.
  552. Additional options accepted:
  553. `litstyle`
  554. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  555. is autodetected: if the first non-whitespace character in the source
  556. is a backslash or percent character, LaTeX is assumed, else Bird.
  557. .. versionadded:: 2.0
  558. """
  559. name = 'Literate Agda'
  560. aliases = ['literate-agda', 'lagda']
  561. filenames = ['*.lagda']
  562. mimetypes = ['text/x-literate-agda']
  563. def __init__(self, **options):
  564. agdalexer = AgdaLexer(**options)
  565. LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
  566. class LiterateCryptolLexer(LiterateLexer):
  567. """
  568. For Literate Cryptol (Bird-style or LaTeX) source.
  569. Additional options accepted:
  570. `litstyle`
  571. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  572. is autodetected: if the first non-whitespace character in the source
  573. is a backslash or percent character, LaTeX is assumed, else Bird.
  574. .. versionadded:: 2.0
  575. """
  576. name = 'Literate Cryptol'
  577. aliases = ['literate-cryptol', 'lcryptol', 'lcry']
  578. filenames = ['*.lcry']
  579. mimetypes = ['text/x-literate-cryptol']
  580. def __init__(self, **options):
  581. crylexer = CryptolLexer(**options)
  582. LiterateLexer.__init__(self, crylexer, **options)
  583. class KokaLexer(RegexLexer):
  584. """
  585. Lexer for the `Koka <http://koka.codeplex.com>`_
  586. language.
  587. .. versionadded:: 1.6
  588. """
  589. name = 'Koka'
  590. aliases = ['koka']
  591. filenames = ['*.kk', '*.kki']
  592. mimetypes = ['text/x-koka']
  593. keywords = [
  594. 'infix', 'infixr', 'infixl',
  595. 'type', 'cotype', 'rectype', 'alias',
  596. 'struct', 'con',
  597. 'fun', 'function', 'val', 'var',
  598. 'external',
  599. 'if', 'then', 'else', 'elif', 'return', 'match',
  600. 'private', 'public', 'private',
  601. 'module', 'import', 'as',
  602. 'include', 'inline',
  603. 'rec',
  604. 'try', 'yield', 'enum',
  605. 'interface', 'instance',
  606. ]
  607. # keywords that are followed by a type
  608. typeStartKeywords = [
  609. 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
  610. ]
  611. # keywords valid in a type
  612. typekeywords = [
  613. 'forall', 'exists', 'some', 'with',
  614. ]
  615. # builtin names and special names
  616. builtin = [
  617. 'for', 'while', 'repeat',
  618. 'foreach', 'foreach-indexed',
  619. 'error', 'catch', 'finally',
  620. 'cs', 'js', 'file', 'ref', 'assigned',
  621. ]
  622. # symbols that can be in an operator
  623. symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
  624. # symbol boundary: an operator keyword should not be followed by any of these
  625. sboundary = '(?!' + symbols + ')'
  626. # name boundary: a keyword should not be followed by any of these
  627. boundary = r'(?![\w/])'
  628. # koka token abstractions
  629. tokenType = Name.Attribute
  630. tokenTypeDef = Name.Class
  631. tokenConstructor = Generic.Emph
  632. # main lexer
  633. tokens = {
  634. 'root': [
  635. include('whitespace'),
  636. # go into type mode
  637. (r'::?' + sboundary, tokenType, 'type'),
  638. (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  639. 'alias-type'),
  640. (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  641. 'struct-type'),
  642. ((r'(%s)' % '|'.join(typeStartKeywords)) +
  643. r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  644. 'type'),
  645. # special sequences of tokens (we use ?: for non-capturing group as
  646. # required by 'bygroups')
  647. (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
  648. bygroups(Keyword, Text, Keyword, Name.Namespace)),
  649. (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
  650. r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
  651. r'((?:[a-z]\w*/)*[a-z]\w*))?',
  652. bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
  653. Keyword, Name.Namespace)),
  654. (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
  655. r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
  656. bygroups(Keyword, Text, Name.Function)),
  657. (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
  658. r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
  659. bygroups(Keyword, Text, Keyword, Name.Function)),
  660. # keywords
  661. (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
  662. (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
  663. (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
  664. (r'::?|:=|\->|[=.]' + sboundary, Keyword),
  665. # names
  666. (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
  667. bygroups(Name.Namespace, tokenConstructor)),
  668. (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
  669. (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
  670. bygroups(Name.Namespace, Name)),
  671. (r'_\w*', Name.Variable),
  672. # literal string
  673. (r'@"', String.Double, 'litstring'),
  674. # operators
  675. (symbols + "|/(?![*/])", Operator),
  676. (r'`', Operator),
  677. (r'[{}()\[\];,]', Punctuation),
  678. # literals. No check for literal characters with len > 1
  679. (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
  680. (r'0[xX][0-9a-fA-F]+', Number.Hex),
  681. (r'[0-9]+', Number.Integer),
  682. (r"'", String.Char, 'char'),
  683. (r'"', String.Double, 'string'),
  684. ],
  685. # type started by alias
  686. 'alias-type': [
  687. (r'=', Keyword),
  688. include('type')
  689. ],
  690. # type started by struct
  691. 'struct-type': [
  692. (r'(?=\((?!,*\)))', Punctuation, '#pop'),
  693. include('type')
  694. ],
  695. # type started by colon
  696. 'type': [
  697. (r'[(\[<]', tokenType, 'type-nested'),
  698. include('type-content')
  699. ],
  700. # type nested in brackets: can contain parameters, comma etc.
  701. 'type-nested': [
  702. (r'[)\]>]', tokenType, '#pop'),
  703. (r'[(\[<]', tokenType, 'type-nested'),
  704. (r',', tokenType),
  705. (r'([a-z]\w*)(\s*)(:)(?!:)',
  706. bygroups(Name, Text, tokenType)), # parameter name
  707. include('type-content')
  708. ],
  709. # shared contents of a type
  710. 'type-content': [
  711. include('whitespace'),
  712. # keywords
  713. (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
  714. (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
  715. Keyword, '#pop'), # need to match because names overlap...
  716. # kinds
  717. (r'[EPHVX]' + boundary, tokenType),
  718. # type names
  719. (r'[a-z][0-9]*(?![\w/])', tokenType),
  720. (r'_\w*', tokenType.Variable), # Generic.Emph
  721. (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
  722. bygroups(Name.Namespace, tokenType)),
  723. (r'((?:[a-z]\w*/)*)([a-z]\w+)',
  724. bygroups(Name.Namespace, tokenType)),
  725. # type keyword operators
  726. (r'::|->|[.:|]', tokenType),
  727. # catchall
  728. default('#pop')
  729. ],
  730. # comments and literals
  731. 'whitespace': [
  732. (r'\n\s*#.*$', Comment.Preproc),
  733. (r'\s+', Text),
  734. (r'/\*', Comment.Multiline, 'comment'),
  735. (r'//.*$', Comment.Single)
  736. ],
  737. 'comment': [
  738. (r'[^/*]+', Comment.Multiline),
  739. (r'/\*', Comment.Multiline, '#push'),
  740. (r'\*/', Comment.Multiline, '#pop'),
  741. (r'[*/]', Comment.Multiline),
  742. ],
  743. 'litstring': [
  744. (r'[^"]+', String.Double),
  745. (r'""', String.Escape),
  746. (r'"', String.Double, '#pop'),
  747. ],
  748. 'string': [
  749. (r'[^\\"\n]+', String.Double),
  750. include('escape-sequence'),
  751. (r'["\n]', String.Double, '#pop'),
  752. ],
  753. 'char': [
  754. (r'[^\\\'\n]+', String.Char),
  755. include('escape-sequence'),
  756. (r'[\'\n]', String.Char, '#pop'),
  757. ],
  758. 'escape-sequence': [
  759. (r'\\[nrt\\"\']', String.Escape),
  760. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  761. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  762. # Yes, \U literals are 6 hex digits.
  763. (r'\\U[0-9a-fA-F]{6}', String.Escape)
  764. ]
  765. }