parsers.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799
  1. """
  2. pygments.lexers.parsers
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for parser generators.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, DelegatingLexer, \
  10. include, bygroups, using
  11. from pygments.token import Punctuation, Other, Text, Comment, Operator, \
  12. Keyword, Name, String, Number, Whitespace
  13. from pygments.lexers.jvm import JavaLexer
  14. from pygments.lexers.c_cpp import CLexer, CppLexer
  15. from pygments.lexers.objective import ObjectiveCLexer
  16. from pygments.lexers.d import DLexer
  17. from pygments.lexers.dotnet import CSharpLexer
  18. from pygments.lexers.ruby import RubyLexer
  19. from pygments.lexers.python import PythonLexer
  20. from pygments.lexers.perl import PerlLexer
  21. __all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
  22. 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
  23. 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
  24. 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
  25. 'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
  26. 'AntlrJavaLexer', 'AntlrActionScriptLexer',
  27. 'TreetopLexer', 'EbnfLexer']
  28. class RagelLexer(RegexLexer):
  29. """
  30. A pure `Ragel <http://www.complang.org/ragel/>`_ lexer. Use this for
  31. fragments of Ragel. For ``.rl`` files, use RagelEmbeddedLexer instead
  32. (or one of the language-specific subclasses).
  33. .. versionadded:: 1.1
  34. """
  35. name = 'Ragel'
  36. aliases = ['ragel']
  37. filenames = []
  38. tokens = {
  39. 'whitespace': [
  40. (r'\s+', Whitespace)
  41. ],
  42. 'comments': [
  43. (r'\#.*$', Comment),
  44. ],
  45. 'keywords': [
  46. (r'(access|action|alphtype)\b', Keyword),
  47. (r'(getkey|write|machine|include)\b', Keyword),
  48. (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
  49. (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
  50. ],
  51. 'numbers': [
  52. (r'0x[0-9A-Fa-f]+', Number.Hex),
  53. (r'[+-]?[0-9]+', Number.Integer),
  54. ],
  55. 'literals': [
  56. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  57. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  58. (r'\[(\\\\|\\[^\\]|[^\\\]])*\]', String), # square bracket literals
  59. (r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', String.Regex), # regular expressions
  60. ],
  61. 'identifiers': [
  62. (r'[a-zA-Z_]\w*', Name.Variable),
  63. ],
  64. 'operators': [
  65. (r',', Operator), # Join
  66. (r'\||&|--?', Operator), # Union, Intersection and Subtraction
  67. (r'\.|<:|:>>?', Operator), # Concatention
  68. (r':', Operator), # Label
  69. (r'->', Operator), # Epsilon Transition
  70. (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
  71. (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
  72. (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
  73. (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
  74. (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
  75. (r'>|@|\$|%', Operator), # Transition Actions and Priorities
  76. (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition
  77. (r'!|\^', Operator), # Negation
  78. (r'\(|\)', Operator), # Grouping
  79. ],
  80. 'root': [
  81. include('literals'),
  82. include('whitespace'),
  83. include('comments'),
  84. include('keywords'),
  85. include('numbers'),
  86. include('identifiers'),
  87. include('operators'),
  88. (r'\{', Punctuation, 'host'),
  89. (r'=', Operator),
  90. (r';', Punctuation),
  91. ],
  92. 'host': [
  93. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  94. r'[^{}\'"/#]+', # exclude unsafe characters
  95. r'[^\\]\\[{}]', # allow escaped { or }
  96. # strings and comments may safely contain unsafe characters
  97. r'"(\\\\|\\[^\\]|[^"\\])*"',
  98. r"'(\\\\|\\[^\\]|[^'\\])*'",
  99. r'//.*$\n?', # single line comment
  100. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  101. r'\#.*$\n?', # ruby comment
  102. # regular expression: There's no reason for it to start
  103. # with a * and this stops confusion with comments.
  104. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
  105. # / is safe now that we've handled regex and javadoc comments
  106. r'/',
  107. )) + r')+', Other),
  108. (r'\{', Punctuation, '#push'),
  109. (r'\}', Punctuation, '#pop'),
  110. ],
  111. }
  112. class RagelEmbeddedLexer(RegexLexer):
  113. """
  114. A lexer for `Ragel`_ embedded in a host language file.
  115. This will only highlight Ragel statements. If you want host language
  116. highlighting then call the language-specific Ragel lexer.
  117. .. versionadded:: 1.1
  118. """
  119. name = 'Embedded Ragel'
  120. aliases = ['ragel-em']
  121. filenames = ['*.rl']
  122. tokens = {
  123. 'root': [
  124. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  125. r'[^%\'"/#]+', # exclude unsafe characters
  126. r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
  127. # strings and comments may safely contain unsafe characters
  128. r'"(\\\\|\\[^\\]|[^"\\])*"',
  129. r"'(\\\\|\\[^\\]|[^'\\])*'",
  130. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  131. r'//.*$\n?', # single line comment
  132. r'\#.*$\n?', # ruby/ragel comment
  133. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # regular expression
  134. # / is safe now that we've handled regex and javadoc comments
  135. r'/',
  136. )) + r')+', Other),
  137. # Single Line FSM.
  138. # Please don't put a quoted newline in a single line FSM.
  139. # That's just mean. It will break this.
  140. (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
  141. using(RagelLexer),
  142. Punctuation, Text)),
  143. # Multi Line FSM.
  144. (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'),
  145. ],
  146. 'multi-line-fsm': [
  147. (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
  148. r'(' + r'|'.join((
  149. r'[^}\'"\[/#]', # exclude unsafe characters
  150. r'\}(?=[^%]|$)', # } is okay as long as it's not followed by %
  151. r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
  152. r'[^\\]\\[{}]', # ...and } is okay if it's escaped
  153. # allow / if it's preceded with one of these symbols
  154. # (ragel EOF actions)
  155. r'(>|\$|%|<|@|<>)/',
  156. # specifically allow regex followed immediately by *
  157. # so it doesn't get mistaken for a comment
  158. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/\*',
  159. # allow / as long as it's not followed by another / or by a *
  160. r'/(?=[^/*]|$)',
  161. # We want to match as many of these as we can in one block.
  162. # Not sure if we need the + sign here,
  163. # does it help performance?
  164. )) + r')+',
  165. # strings and comments may safely contain unsafe characters
  166. r'"(\\\\|\\[^\\]|[^"\\])*"',
  167. r"'(\\\\|\\[^\\]|[^'\\])*'",
  168. r"\[(\\\\|\\[^\\]|[^\]\\])*\]", # square bracket literal
  169. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  170. r'//.*$\n?', # single line comment
  171. r'\#.*$\n?', # ruby/ragel comment
  172. )) + r')+', using(RagelLexer)),
  173. (r'\}%%', Punctuation, '#pop'),
  174. ]
  175. }
  176. def analyse_text(text):
  177. return '@LANG: indep' in text
  178. class RagelRubyLexer(DelegatingLexer):
  179. """
  180. A lexer for `Ragel`_ in a Ruby host file.
  181. .. versionadded:: 1.1
  182. """
  183. name = 'Ragel in Ruby Host'
  184. aliases = ['ragel-ruby', 'ragel-rb']
  185. filenames = ['*.rl']
  186. def __init__(self, **options):
  187. super().__init__(RubyLexer, RagelEmbeddedLexer, **options)
  188. def analyse_text(text):
  189. return '@LANG: ruby' in text
  190. class RagelCLexer(DelegatingLexer):
  191. """
  192. A lexer for `Ragel`_ in a C host file.
  193. .. versionadded:: 1.1
  194. """
  195. name = 'Ragel in C Host'
  196. aliases = ['ragel-c']
  197. filenames = ['*.rl']
  198. def __init__(self, **options):
  199. super().__init__(CLexer, RagelEmbeddedLexer, **options)
  200. def analyse_text(text):
  201. return '@LANG: c' in text
  202. class RagelDLexer(DelegatingLexer):
  203. """
  204. A lexer for `Ragel`_ in a D host file.
  205. .. versionadded:: 1.1
  206. """
  207. name = 'Ragel in D Host'
  208. aliases = ['ragel-d']
  209. filenames = ['*.rl']
  210. def __init__(self, **options):
  211. super().__init__(DLexer, RagelEmbeddedLexer, **options)
  212. def analyse_text(text):
  213. return '@LANG: d' in text
  214. class RagelCppLexer(DelegatingLexer):
  215. """
  216. A lexer for `Ragel`_ in a CPP host file.
  217. .. versionadded:: 1.1
  218. """
  219. name = 'Ragel in CPP Host'
  220. aliases = ['ragel-cpp']
  221. filenames = ['*.rl']
  222. def __init__(self, **options):
  223. super().__init__(CppLexer, RagelEmbeddedLexer, **options)
  224. def analyse_text(text):
  225. return '@LANG: c++' in text
  226. class RagelObjectiveCLexer(DelegatingLexer):
  227. """
  228. A lexer for `Ragel`_ in an Objective C host file.
  229. .. versionadded:: 1.1
  230. """
  231. name = 'Ragel in Objective C Host'
  232. aliases = ['ragel-objc']
  233. filenames = ['*.rl']
  234. def __init__(self, **options):
  235. super().__init__(ObjectiveCLexer, RagelEmbeddedLexer, **options)
  236. def analyse_text(text):
  237. return '@LANG: objc' in text
  238. class RagelJavaLexer(DelegatingLexer):
  239. """
  240. A lexer for `Ragel`_ in a Java host file.
  241. .. versionadded:: 1.1
  242. """
  243. name = 'Ragel in Java Host'
  244. aliases = ['ragel-java']
  245. filenames = ['*.rl']
  246. def __init__(self, **options):
  247. super().__init__(JavaLexer, RagelEmbeddedLexer, **options)
  248. def analyse_text(text):
  249. return '@LANG: java' in text
  250. class AntlrLexer(RegexLexer):
  251. """
  252. Generic `ANTLR`_ Lexer.
  253. Should not be called directly, instead
  254. use DelegatingLexer for your target language.
  255. .. versionadded:: 1.1
  256. .. _ANTLR: http://www.antlr.org/
  257. """
  258. name = 'ANTLR'
  259. aliases = ['antlr']
  260. filenames = []
  261. _id = r'[A-Za-z]\w*'
  262. _TOKEN_REF = r'[A-Z]\w*'
  263. _RULE_REF = r'[a-z]\w*'
  264. _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
  265. _INT = r'[0-9]+'
  266. tokens = {
  267. 'whitespace': [
  268. (r'\s+', Whitespace),
  269. ],
  270. 'comments': [
  271. (r'//.*$', Comment),
  272. (r'/\*(.|\n)*?\*/', Comment),
  273. ],
  274. 'root': [
  275. include('whitespace'),
  276. include('comments'),
  277. (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
  278. bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
  279. Punctuation)),
  280. # optionsSpec
  281. (r'options\b', Keyword, 'options'),
  282. # tokensSpec
  283. (r'tokens\b', Keyword, 'tokens'),
  284. # attrScope
  285. (r'(scope)(\s*)(' + _id + r')(\s*)(\{)',
  286. bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
  287. Punctuation), 'action'),
  288. # exception
  289. (r'(catch|finally)\b', Keyword, 'exception'),
  290. # action
  291. (r'(@' + _id + r')(\s*)(::)?(\s*)(' + _id + r')(\s*)(\{)',
  292. bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
  293. Name.Label, Whitespace, Punctuation), 'action'),
  294. # rule
  295. (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?',
  296. bygroups(Keyword, Whitespace, Name.Label, Punctuation),
  297. ('rule-alts', 'rule-prelims')),
  298. ],
  299. 'exception': [
  300. (r'\n', Whitespace, '#pop'),
  301. (r'\s', Whitespace),
  302. include('comments'),
  303. (r'\[', Punctuation, 'nested-arg-action'),
  304. (r'\{', Punctuation, 'action'),
  305. ],
  306. 'rule-prelims': [
  307. include('whitespace'),
  308. include('comments'),
  309. (r'returns\b', Keyword),
  310. (r'\[', Punctuation, 'nested-arg-action'),
  311. (r'\{', Punctuation, 'action'),
  312. # throwsSpec
  313. (r'(throws)(\s+)(' + _id + ')',
  314. bygroups(Keyword, Whitespace, Name.Label)),
  315. (r'(,)(\s*)(' + _id + ')',
  316. bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
  317. # optionsSpec
  318. (r'options\b', Keyword, 'options'),
  319. # ruleScopeSpec - scope followed by target language code or name of action
  320. # TODO finish implementing other possibilities for scope
  321. # L173 ANTLRv3.g from ANTLR book
  322. (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation),
  323. 'action'),
  324. (r'(scope)(\s+)(' + _id + r')(\s*)(;)',
  325. bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
  326. # ruleAction
  327. (r'(@' + _id + r')(\s*)(\{)',
  328. bygroups(Name.Label, Whitespace, Punctuation), 'action'),
  329. # finished prelims, go to rule alts!
  330. (r':', Punctuation, '#pop')
  331. ],
  332. 'rule-alts': [
  333. include('whitespace'),
  334. include('comments'),
  335. # These might need to go in a separate 'block' state triggered by (
  336. (r'options\b', Keyword, 'options'),
  337. (r':', Punctuation),
  338. # literals
  339. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  340. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  341. (r'<<([^>]|>[^>])>>', String),
  342. # identifiers
  343. # Tokens start with capital letter.
  344. (r'\$?[A-Z_]\w*', Name.Constant),
  345. # Rules start with small letter.
  346. (r'\$?[a-z_]\w*', Name.Variable),
  347. # operators
  348. (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
  349. (r',', Punctuation),
  350. (r'\[', Punctuation, 'nested-arg-action'),
  351. (r'\{', Punctuation, 'action'),
  352. (r';', Punctuation, '#pop')
  353. ],
  354. 'tokens': [
  355. include('whitespace'),
  356. include('comments'),
  357. (r'\{', Punctuation),
  358. (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
  359. + r')?(\s*)(;)',
  360. bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
  361. String, Whitespace, Punctuation)),
  362. (r'\}', Punctuation, '#pop'),
  363. ],
  364. 'options': [
  365. include('whitespace'),
  366. include('comments'),
  367. (r'\{', Punctuation),
  368. (r'(' + _id + r')(\s*)(=)(\s*)(' +
  369. '|'.join((_id, _STRING_LITERAL, _INT, r'\*')) + r')(\s*)(;)',
  370. bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
  371. Text, Whitespace, Punctuation)),
  372. (r'\}', Punctuation, '#pop'),
  373. ],
  374. 'action': [
  375. (r'(' + r'|'.join(( # keep host code in largest possible chunks
  376. r'[^${}\'"/\\]+', # exclude unsafe characters
  377. # strings and comments may safely contain unsafe characters
  378. r'"(\\\\|\\[^\\]|[^"\\])*"',
  379. r"'(\\\\|\\[^\\]|[^'\\])*'",
  380. r'//.*$\n?', # single line comment
  381. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  382. # regular expression: There's no reason for it to start
  383. # with a * and this stops confusion with comments.
  384. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
  385. # backslashes are okay, as long as we are not backslashing a %
  386. r'\\(?!%)',
  387. # Now that we've handled regex and javadoc comments
  388. # it's safe to let / through.
  389. r'/',
  390. )) + r')+', Other),
  391. (r'(\\)(%)', bygroups(Punctuation, Other)),
  392. (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
  393. bygroups(Name.Variable, Punctuation, Name.Property)),
  394. (r'\{', Punctuation, '#push'),
  395. (r'\}', Punctuation, '#pop'),
  396. ],
  397. 'nested-arg-action': [
  398. (r'(' + r'|'.join(( # keep host code in largest possible chunks.
  399. r'[^$\[\]\'"/]+', # exclude unsafe characters
  400. # strings and comments may safely contain unsafe characters
  401. r'"(\\\\|\\[^\\]|[^"\\])*"',
  402. r"'(\\\\|\\[^\\]|[^'\\])*'",
  403. r'//.*$\n?', # single line comment
  404. r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
  405. # regular expression: There's no reason for it to start
  406. # with a * and this stops confusion with comments.
  407. r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
  408. # Now that we've handled regex and javadoc comments
  409. # it's safe to let / through.
  410. r'/',
  411. )) + r')+', Other),
  412. (r'\[', Punctuation, '#push'),
  413. (r'\]', Punctuation, '#pop'),
  414. (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
  415. bygroups(Name.Variable, Punctuation, Name.Property)),
  416. (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
  417. ]
  418. }
  419. def analyse_text(text):
  420. return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
  421. # http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
  422. class AntlrCppLexer(DelegatingLexer):
  423. """
  424. `ANTLR`_ with CPP Target
  425. .. versionadded:: 1.1
  426. """
  427. name = 'ANTLR With CPP Target'
  428. aliases = ['antlr-cpp']
  429. filenames = ['*.G', '*.g']
  430. def __init__(self, **options):
  431. super().__init__(CppLexer, AntlrLexer, **options)
  432. def analyse_text(text):
  433. return AntlrLexer.analyse_text(text) and \
  434. re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
  435. class AntlrObjectiveCLexer(DelegatingLexer):
  436. """
  437. `ANTLR`_ with Objective-C Target
  438. .. versionadded:: 1.1
  439. """
  440. name = 'ANTLR With ObjectiveC Target'
  441. aliases = ['antlr-objc']
  442. filenames = ['*.G', '*.g']
  443. def __init__(self, **options):
  444. super().__init__(ObjectiveCLexer, AntlrLexer, **options)
  445. def analyse_text(text):
  446. return AntlrLexer.analyse_text(text) and \
  447. re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
  448. class AntlrCSharpLexer(DelegatingLexer):
  449. """
  450. `ANTLR`_ with C# Target
  451. .. versionadded:: 1.1
  452. """
  453. name = 'ANTLR With C# Target'
  454. aliases = ['antlr-csharp', 'antlr-c#']
  455. filenames = ['*.G', '*.g']
  456. def __init__(self, **options):
  457. super().__init__(CSharpLexer, AntlrLexer, **options)
  458. def analyse_text(text):
  459. return AntlrLexer.analyse_text(text) and \
  460. re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
  461. class AntlrPythonLexer(DelegatingLexer):
  462. """
  463. `ANTLR`_ with Python Target
  464. .. versionadded:: 1.1
  465. """
  466. name = 'ANTLR With Python Target'
  467. aliases = ['antlr-python']
  468. filenames = ['*.G', '*.g']
  469. def __init__(self, **options):
  470. super().__init__(PythonLexer, AntlrLexer, **options)
  471. def analyse_text(text):
  472. return AntlrLexer.analyse_text(text) and \
  473. re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
  474. class AntlrJavaLexer(DelegatingLexer):
  475. """
  476. `ANTLR`_ with Java Target
  477. .. versionadded:: 1.
  478. """
  479. name = 'ANTLR With Java Target'
  480. aliases = ['antlr-java']
  481. filenames = ['*.G', '*.g']
  482. def __init__(self, **options):
  483. super().__init__(JavaLexer, AntlrLexer, **options)
  484. def analyse_text(text):
  485. # Antlr language is Java by default
  486. return AntlrLexer.analyse_text(text) and 0.9
  487. class AntlrRubyLexer(DelegatingLexer):
  488. """
  489. `ANTLR`_ with Ruby Target
  490. .. versionadded:: 1.1
  491. """
  492. name = 'ANTLR With Ruby Target'
  493. aliases = ['antlr-ruby', 'antlr-rb']
  494. filenames = ['*.G', '*.g']
  495. def __init__(self, **options):
  496. super().__init__(RubyLexer, AntlrLexer, **options)
  497. def analyse_text(text):
  498. return AntlrLexer.analyse_text(text) and \
  499. re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
  500. class AntlrPerlLexer(DelegatingLexer):
  501. """
  502. `ANTLR`_ with Perl Target
  503. .. versionadded:: 1.1
  504. """
  505. name = 'ANTLR With Perl Target'
  506. aliases = ['antlr-perl']
  507. filenames = ['*.G', '*.g']
  508. def __init__(self, **options):
  509. super().__init__(PerlLexer, AntlrLexer, **options)
  510. def analyse_text(text):
  511. return AntlrLexer.analyse_text(text) and \
  512. re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
  513. class AntlrActionScriptLexer(DelegatingLexer):
  514. """
  515. `ANTLR`_ with ActionScript Target
  516. .. versionadded:: 1.1
  517. """
  518. name = 'ANTLR With ActionScript Target'
  519. aliases = ['antlr-actionscript', 'antlr-as']
  520. filenames = ['*.G', '*.g']
  521. def __init__(self, **options):
  522. from pygments.lexers.actionscript import ActionScriptLexer
  523. super().__init__(ActionScriptLexer, AntlrLexer, **options)
  524. def analyse_text(text):
  525. return AntlrLexer.analyse_text(text) and \
  526. re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
  527. class TreetopBaseLexer(RegexLexer):
  528. """
  529. A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
  530. Not for direct use; use TreetopLexer instead.
  531. .. versionadded:: 1.6
  532. """
  533. tokens = {
  534. 'root': [
  535. include('space'),
  536. (r'require[ \t]+[^\n\r]+[\n\r]', Other),
  537. (r'module\b', Keyword.Namespace, 'module'),
  538. (r'grammar\b', Keyword, 'grammar'),
  539. ],
  540. 'module': [
  541. include('space'),
  542. include('end'),
  543. (r'module\b', Keyword, '#push'),
  544. (r'grammar\b', Keyword, 'grammar'),
  545. (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace),
  546. ],
  547. 'grammar': [
  548. include('space'),
  549. include('end'),
  550. (r'rule\b', Keyword, 'rule'),
  551. (r'include\b', Keyword, 'include'),
  552. (r'[A-Z]\w*', Name),
  553. ],
  554. 'include': [
  555. include('space'),
  556. (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'),
  557. ],
  558. 'rule': [
  559. include('space'),
  560. include('end'),
  561. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  562. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  563. (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
  564. (r'[A-Za-z_]\w*', Name),
  565. (r'[()]', Punctuation),
  566. (r'[?+*/&!~]', Operator),
  567. (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex),
  568. (r'([0-9]*)(\.\.)([0-9]*)',
  569. bygroups(Number.Integer, Operator, Number.Integer)),
  570. (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)),
  571. (r'\{', Punctuation, 'inline_module'),
  572. (r'\.', String.Regex),
  573. ],
  574. 'inline_module': [
  575. (r'\{', Other, 'ruby'),
  576. (r'\}', Punctuation, '#pop'),
  577. (r'[^{}]+', Other),
  578. ],
  579. 'ruby': [
  580. (r'\{', Other, '#push'),
  581. (r'\}', Other, '#pop'),
  582. (r'[^{}]+', Other),
  583. ],
  584. 'space': [
  585. (r'[ \t\n\r]+', Whitespace),
  586. (r'#[^\n]*', Comment.Single),
  587. ],
  588. 'end': [
  589. (r'end\b', Keyword, '#pop'),
  590. ],
  591. }
  592. class TreetopLexer(DelegatingLexer):
  593. """
  594. A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
  595. .. versionadded:: 1.6
  596. """
  597. name = 'Treetop'
  598. aliases = ['treetop']
  599. filenames = ['*.treetop', '*.tt']
  600. def __init__(self, **options):
  601. super().__init__(RubyLexer, TreetopBaseLexer, **options)
  602. class EbnfLexer(RegexLexer):
  603. """
  604. Lexer for `ISO/IEC 14977 EBNF
  605. <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
  606. grammars.
  607. .. versionadded:: 2.0
  608. """
  609. name = 'EBNF'
  610. aliases = ['ebnf']
  611. filenames = ['*.ebnf']
  612. mimetypes = ['text/x-ebnf']
  613. tokens = {
  614. 'root': [
  615. include('whitespace'),
  616. include('comment_start'),
  617. include('identifier'),
  618. (r'=', Operator, 'production'),
  619. ],
  620. 'production': [
  621. include('whitespace'),
  622. include('comment_start'),
  623. include('identifier'),
  624. (r'"[^"]*"', String.Double),
  625. (r"'[^']*'", String.Single),
  626. (r'(\?[^?]*\?)', Name.Entity),
  627. (r'[\[\]{}(),|]', Punctuation),
  628. (r'-', Operator),
  629. (r';', Punctuation, '#pop'),
  630. (r'\.', Punctuation, '#pop'),
  631. ],
  632. 'whitespace': [
  633. (r'\s+', Text),
  634. ],
  635. 'comment_start': [
  636. (r'\(\*', Comment.Multiline, 'comment'),
  637. ],
  638. 'comment': [
  639. (r'[^*)]', Comment.Multiline),
  640. include('comment_start'),
  641. (r'\*\)', Comment.Multiline, '#pop'),
  642. (r'[*)]', Comment.Multiline),
  643. ],
  644. 'identifier': [
  645. (r'([a-zA-Z][\w \-]*)', Keyword),
  646. ],
  647. }