markup.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762
  1. """
  2. pygments.lexers.markup
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for non-HTML markup languages.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexers.html import HtmlLexer, XmlLexer
  10. from pygments.lexers.javascript import JavascriptLexer
  11. from pygments.lexers.css import CssLexer
  12. from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
  13. using, this, do_insertions, default, words
  14. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  15. Number, Punctuation, Generic, Other
  16. from pygments.util import get_bool_opt, ClassNotFound
  17. __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
  18. 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
  19. 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
  20. 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer']
  21. class BBCodeLexer(RegexLexer):
  22. """
  23. A lexer that highlights BBCode(-like) syntax.
  24. .. versionadded:: 0.6
  25. """
  26. name = 'BBCode'
  27. aliases = ['bbcode']
  28. mimetypes = ['text/x-bbcode']
  29. tokens = {
  30. 'root': [
  31. (r'[^[]+', Text),
  32. # tag/end tag begin
  33. (r'\[/?\w+', Keyword, 'tag'),
  34. # stray bracket
  35. (r'\[', Text),
  36. ],
  37. 'tag': [
  38. (r'\s+', Text),
  39. # attribute with value
  40. (r'(\w+)(=)("?[^\s"\]]+"?)',
  41. bygroups(Name.Attribute, Operator, String)),
  42. # tag argument (a la [color=green])
  43. (r'(=)("?[^\s"\]]+"?)',
  44. bygroups(Operator, String)),
  45. # tag end
  46. (r'\]', Keyword, '#pop'),
  47. ],
  48. }
  49. class MoinWikiLexer(RegexLexer):
  50. """
  51. For MoinMoin (and Trac) Wiki markup.
  52. .. versionadded:: 0.7
  53. """
  54. name = 'MoinMoin/Trac Wiki markup'
  55. aliases = ['trac-wiki', 'moin']
  56. filenames = []
  57. mimetypes = ['text/x-trac-wiki']
  58. flags = re.MULTILINE | re.IGNORECASE
  59. tokens = {
  60. 'root': [
  61. (r'^#.*$', Comment),
  62. (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
  63. # Titles
  64. (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
  65. bygroups(Generic.Heading, using(this), Generic.Heading, String)),
  66. # Literal code blocks, with optional shebang
  67. (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
  68. (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
  69. # Lists
  70. (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
  71. (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
  72. # Other Formatting
  73. (r'\[\[\w+.*?\]\]', Keyword), # Macro
  74. (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
  75. bygroups(Keyword, String, Keyword)), # Link
  76. (r'^----+$', Keyword), # Horizontal rules
  77. (r'[^\n\'\[{!_~^,|]+', Text),
  78. (r'\n', Text),
  79. (r'.', Text),
  80. ],
  81. 'codeblock': [
  82. (r'\}\}\}', Name.Builtin, '#pop'),
  83. # these blocks are allowed to be nested in Trac, but not MoinMoin
  84. (r'\{\{\{', Text, '#push'),
  85. (r'[^{}]+', Comment.Preproc), # slurp boring text
  86. (r'.', Comment.Preproc), # allow loose { or }
  87. ],
  88. }
  89. class RstLexer(RegexLexer):
  90. """
  91. For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
  92. .. versionadded:: 0.7
  93. Additional options accepted:
  94. `handlecodeblocks`
  95. Highlight the contents of ``.. sourcecode:: language``,
  96. ``.. code:: language`` and ``.. code-block:: language``
  97. directives with a lexer for the given language (default:
  98. ``True``).
  99. .. versionadded:: 0.8
  100. """
  101. name = 'reStructuredText'
  102. aliases = ['restructuredtext', 'rst', 'rest']
  103. filenames = ['*.rst', '*.rest']
  104. mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
  105. flags = re.MULTILINE
  106. def _handle_sourcecode(self, match):
  107. from pygments.lexers import get_lexer_by_name
  108. # section header
  109. yield match.start(1), Punctuation, match.group(1)
  110. yield match.start(2), Text, match.group(2)
  111. yield match.start(3), Operator.Word, match.group(3)
  112. yield match.start(4), Punctuation, match.group(4)
  113. yield match.start(5), Text, match.group(5)
  114. yield match.start(6), Keyword, match.group(6)
  115. yield match.start(7), Text, match.group(7)
  116. # lookup lexer if wanted and existing
  117. lexer = None
  118. if self.handlecodeblocks:
  119. try:
  120. lexer = get_lexer_by_name(match.group(6).strip())
  121. except ClassNotFound:
  122. pass
  123. indention = match.group(8)
  124. indention_size = len(indention)
  125. code = (indention + match.group(9) + match.group(10) + match.group(11))
  126. # no lexer for this language. handle it like it was a code block
  127. if lexer is None:
  128. yield match.start(8), String, code
  129. return
  130. # highlight the lines with the lexer.
  131. ins = []
  132. codelines = code.splitlines(True)
  133. code = ''
  134. for line in codelines:
  135. if len(line) > indention_size:
  136. ins.append((len(code), [(0, Text, line[:indention_size])]))
  137. code += line[indention_size:]
  138. else:
  139. code += line
  140. yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))
  141. # from docutils.parsers.rst.states
  142. closers = '\'")]}>\u2019\u201d\xbb!?'
  143. unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
  144. end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
  145. % (re.escape(unicode_delimiters),
  146. re.escape(closers)))
  147. tokens = {
  148. 'root': [
  149. # Heading with overline
  150. (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
  151. r'(.+)(\n)(\1)(\n)',
  152. bygroups(Generic.Heading, Text, Generic.Heading,
  153. Text, Generic.Heading, Text)),
  154. # Plain heading
  155. (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
  156. r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
  157. bygroups(Generic.Heading, Text, Generic.Heading, Text)),
  158. # Bulleted lists
  159. (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
  160. bygroups(Text, Number, using(this, state='inline'))),
  161. # Numbered lists
  162. (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
  163. bygroups(Text, Number, using(this, state='inline'))),
  164. (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
  165. bygroups(Text, Number, using(this, state='inline'))),
  166. # Numbered, but keep words at BOL from becoming lists
  167. (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
  168. bygroups(Text, Number, using(this, state='inline'))),
  169. (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
  170. bygroups(Text, Number, using(this, state='inline'))),
  171. # Line blocks
  172. (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
  173. bygroups(Text, Operator, using(this, state='inline'))),
  174. # Sourcecode directives
  175. (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
  176. r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',
  177. _handle_sourcecode),
  178. # A directive
  179. (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
  180. bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
  181. using(this, state='inline'))),
  182. # A reference target
  183. (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
  184. bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
  185. # A footnote/citation target
  186. (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
  187. bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
  188. # A substitution def
  189. (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
  190. bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
  191. Punctuation, Text, using(this, state='inline'))),
  192. # Comments
  193. (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
  194. # Field list marker
  195. (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
  196. bygroups(Text, Name.Class, Text)),
  197. # Definition list
  198. (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
  199. bygroups(using(this, state='inline'), using(this, state='inline'))),
  200. # Code blocks
  201. (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',
  202. bygroups(String.Escape, Text, String, String, Text, String)),
  203. include('inline'),
  204. ],
  205. 'inline': [
  206. (r'\\.', Text), # escape
  207. (r'``', String, 'literal'), # code
  208. (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
  209. bygroups(String, String.Interpol, String)),
  210. (r'`.+?`__?', String), # reference
  211. (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
  212. bygroups(Name.Variable, Name.Attribute)), # role
  213. (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
  214. bygroups(Name.Attribute, Name.Variable)), # role (content first)
  215. (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
  216. (r'\*.+?\*', Generic.Emph), # Emphasis
  217. (r'\[.*?\]_', String), # Footnote or citation
  218. (r'<.+?>', Name.Tag), # Hyperlink
  219. (r'[^\\\n\[*`:]+', Text),
  220. (r'.', Text),
  221. ],
  222. 'literal': [
  223. (r'[^`]+', String),
  224. (r'``' + end_string_suffix, String, '#pop'),
  225. (r'`', String),
  226. ]
  227. }
  228. def __init__(self, **options):
  229. self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
  230. RegexLexer.__init__(self, **options)
  231. def analyse_text(text):
  232. if text[:2] == '..' and text[2:3] != '.':
  233. return 0.3
  234. p1 = text.find("\n")
  235. p2 = text.find("\n", p1 + 1)
  236. if (p2 > -1 and # has two lines
  237. p1 * 2 + 1 == p2 and # they are the same length
  238. text[p1+1] in '-=' and # the next line both starts and ends with
  239. text[p1+1] == text[p2-1]): # ...a sufficiently high header
  240. return 0.5
  241. class TexLexer(RegexLexer):
  242. """
  243. Lexer for the TeX and LaTeX typesetting languages.
  244. """
  245. name = 'TeX'
  246. aliases = ['tex', 'latex']
  247. filenames = ['*.tex', '*.aux', '*.toc']
  248. mimetypes = ['text/x-tex', 'text/x-latex']
  249. tokens = {
  250. 'general': [
  251. (r'%.*?\n', Comment),
  252. (r'[{}]', Name.Builtin),
  253. (r'[&_^]', Name.Builtin),
  254. ],
  255. 'root': [
  256. (r'\\\[', String.Backtick, 'displaymath'),
  257. (r'\\\(', String, 'inlinemath'),
  258. (r'\$\$', String.Backtick, 'displaymath'),
  259. (r'\$', String, 'inlinemath'),
  260. (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
  261. (r'\\$', Keyword),
  262. include('general'),
  263. (r'[^\\$%&_^{}]+', Text),
  264. ],
  265. 'math': [
  266. (r'\\([a-zA-Z]+|.)', Name.Variable),
  267. include('general'),
  268. (r'[0-9]+', Number),
  269. (r'[-=!+*/()\[\]]', Operator),
  270. (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
  271. ],
  272. 'inlinemath': [
  273. (r'\\\)', String, '#pop'),
  274. (r'\$', String, '#pop'),
  275. include('math'),
  276. ],
  277. 'displaymath': [
  278. (r'\\\]', String, '#pop'),
  279. (r'\$\$', String, '#pop'),
  280. (r'\$', Name.Builtin),
  281. include('math'),
  282. ],
  283. 'command': [
  284. (r'\[.*?\]', Name.Attribute),
  285. (r'\*', Keyword),
  286. default('#pop'),
  287. ],
  288. }
  289. def analyse_text(text):
  290. for start in ("\\documentclass", "\\input", "\\documentstyle",
  291. "\\relax"):
  292. if text[:len(start)] == start:
  293. return True
  294. class GroffLexer(RegexLexer):
  295. """
  296. Lexer for the (g)roff typesetting language, supporting groff
  297. extensions. Mainly useful for highlighting manpage sources.
  298. .. versionadded:: 0.6
  299. """
  300. name = 'Groff'
  301. aliases = ['groff', 'nroff', 'man']
  302. filenames = ['*.[1234567]', '*.man']
  303. mimetypes = ['application/x-troff', 'text/troff']
  304. tokens = {
  305. 'root': [
  306. (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
  307. (r'\.', Punctuation, 'request'),
  308. # Regular characters, slurp till we find a backslash or newline
  309. (r'[^\\\n]+', Text, 'textline'),
  310. default('textline'),
  311. ],
  312. 'textline': [
  313. include('escapes'),
  314. (r'[^\\\n]+', Text),
  315. (r'\n', Text, '#pop'),
  316. ],
  317. 'escapes': [
  318. # groff has many ways to write escapes.
  319. (r'\\"[^\n]*', Comment),
  320. (r'\\[fn]\w', String.Escape),
  321. (r'\\\(.{2}', String.Escape),
  322. (r'\\.\[.*\]', String.Escape),
  323. (r'\\.', String.Escape),
  324. (r'\\\n', Text, 'request'),
  325. ],
  326. 'request': [
  327. (r'\n', Text, '#pop'),
  328. include('escapes'),
  329. (r'"[^\n"]+"', String.Double),
  330. (r'\d+', Number),
  331. (r'\S+', String),
  332. (r'\s+', Text),
  333. ],
  334. }
  335. def analyse_text(text):
  336. if text[:1] != '.':
  337. return False
  338. if text[:3] == '.\\"':
  339. return True
  340. if text[:4] == '.TH ':
  341. return True
  342. if text[1:3].isalnum() and text[3].isspace():
  343. return 0.9
  344. class MozPreprocHashLexer(RegexLexer):
  345. """
  346. Lexer for Mozilla Preprocessor files (with '#' as the marker).
  347. Other data is left untouched.
  348. .. versionadded:: 2.0
  349. """
  350. name = 'mozhashpreproc'
  351. aliases = [name]
  352. filenames = []
  353. mimetypes = []
  354. tokens = {
  355. 'root': [
  356. (r'^#', Comment.Preproc, ('expr', 'exprstart')),
  357. (r'.+', Other),
  358. ],
  359. 'exprstart': [
  360. (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
  361. (words((
  362. 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
  363. 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
  364. 'include', 'includesubst', 'error')),
  365. Comment.Preproc, '#pop'),
  366. ],
  367. 'expr': [
  368. (words(('!', '!=', '==', '&&', '||')), Operator),
  369. (r'(defined)(\()', bygroups(Keyword, Punctuation)),
  370. (r'\)', Punctuation),
  371. (r'[0-9]+', Number.Decimal),
  372. (r'__\w+?__', Name.Variable),
  373. (r'@\w+?@', Name.Class),
  374. (r'\w+', Name),
  375. (r'\n', Text, '#pop'),
  376. (r'\s+', Text),
  377. (r'\S', Punctuation),
  378. ],
  379. }
  380. class MozPreprocPercentLexer(MozPreprocHashLexer):
  381. """
  382. Lexer for Mozilla Preprocessor files (with '%' as the marker).
  383. Other data is left untouched.
  384. .. versionadded:: 2.0
  385. """
  386. name = 'mozpercentpreproc'
  387. aliases = [name]
  388. filenames = []
  389. mimetypes = []
  390. tokens = {
  391. 'root': [
  392. (r'^%', Comment.Preproc, ('expr', 'exprstart')),
  393. (r'.+', Other),
  394. ],
  395. }
  396. class MozPreprocXulLexer(DelegatingLexer):
  397. """
  398. Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
  399. `XmlLexer`.
  400. .. versionadded:: 2.0
  401. """
  402. name = "XUL+mozpreproc"
  403. aliases = ['xul+mozpreproc']
  404. filenames = ['*.xul.in']
  405. mimetypes = []
  406. def __init__(self, **options):
  407. super().__init__(XmlLexer, MozPreprocHashLexer, **options)
  408. class MozPreprocJavascriptLexer(DelegatingLexer):
  409. """
  410. Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
  411. `JavascriptLexer`.
  412. .. versionadded:: 2.0
  413. """
  414. name = "Javascript+mozpreproc"
  415. aliases = ['javascript+mozpreproc']
  416. filenames = ['*.js.in']
  417. mimetypes = []
  418. def __init__(self, **options):
  419. super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)
  420. class MozPreprocCssLexer(DelegatingLexer):
  421. """
  422. Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
  423. `CssLexer`.
  424. .. versionadded:: 2.0
  425. """
  426. name = "CSS+mozpreproc"
  427. aliases = ['css+mozpreproc']
  428. filenames = ['*.css.in']
  429. mimetypes = []
  430. def __init__(self, **options):
  431. super().__init__(CssLexer, MozPreprocPercentLexer, **options)
  432. class MarkdownLexer(RegexLexer):
  433. """
  434. For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.
  435. .. versionadded:: 2.2
  436. """
  437. name = 'Markdown'
  438. aliases = ['markdown', 'md']
  439. filenames = ['*.md', '*.markdown']
  440. mimetypes = ["text/x-markdown"]
  441. flags = re.MULTILINE
  442. def _handle_codeblock(self, match):
  443. """
  444. match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
  445. """
  446. from pygments.lexers import get_lexer_by_name
  447. # section header
  448. yield match.start(1), String.Backtick, match.group(1)
  449. yield match.start(2), String.Backtick, match.group(2)
  450. yield match.start(3), Text , match.group(3)
  451. # lookup lexer if wanted and existing
  452. lexer = None
  453. if self.handlecodeblocks:
  454. try:
  455. lexer = get_lexer_by_name( match.group(2).strip() )
  456. except ClassNotFound:
  457. pass
  458. code = match.group(4)
  459. # no lexer for this language. handle it like it was a code block
  460. if lexer is None:
  461. yield match.start(4), String, code
  462. else:
  463. yield from do_insertions([], lexer.get_tokens_unprocessed(code))
  464. yield match.start(5), String.Backtick, match.group(5)
  465. tokens = {
  466. 'root': [
  467. # heading with '#' prefix (atx-style)
  468. (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),
  469. # subheading with '#' prefix (atx-style)
  470. (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),
  471. # heading with '=' underlines (Setext-style)
  472. (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),
  473. # subheading with '-' underlines (Setext-style)
  474. (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),
  475. # task list
  476. (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
  477. bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
  478. # bulleted list
  479. (r'^(\s*)([*-])(\s)(.+\n)',
  480. bygroups(Text, Keyword, Text, using(this, state='inline'))),
  481. # numbered list
  482. (r'^(\s*)([0-9]+\.)( .+\n)',
  483. bygroups(Text, Keyword, using(this, state='inline'))),
  484. # quote
  485. (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
  486. # code block fenced by 3 backticks
  487. (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick),
  488. # code block with language
  489. (r'^(\s*```)(\w+)(\n)([\w\W]*?)(^\s*```$\n)', _handle_codeblock),
  490. include('inline'),
  491. ],
  492. 'inline': [
  493. # escape
  494. (r'\\.', Text),
  495. # inline code
  496. (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)),
  497. # warning: the following rules eat outer tags.
  498. # eg. **foo _bar_ baz** => foo and baz are not recognized as bold
  499. # bold fenced by '**'
  500. (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)),
  501. # bold fenced by '__'
  502. (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)),
  503. # italics fenced by '*'
  504. (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)),
  505. # italics fenced by '_'
  506. (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)),
  507. # strikethrough
  508. (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)),
  509. # mentions and topics (twitter and github stuff)
  510. (r'[@#][\w/:]+', Name.Entity),
  511. # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
  512. (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
  513. bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
  514. # reference-style links, e.g.:
  515. # [an example][id]
  516. # [id]: http://example.com/
  517. (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
  518. bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
  519. (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
  520. bygroups(Text, Name.Label, Text, Name.Attribute)),
  521. # general text, must come last!
  522. (r'[^\\\s]+', Text),
  523. (r'.', Text),
  524. ],
  525. }
  526. def __init__(self, **options):
  527. self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
  528. RegexLexer.__init__(self, **options)
  529. class TiddlyWiki5Lexer(RegexLexer):
  530. """
  531. For `TiddlyWiki5 <https://tiddlywiki.com/#TiddlerFiles>`_ markup.
  532. .. versionadded:: 2.7
  533. """
  534. name = 'tiddler'
  535. aliases = ['tid']
  536. filenames = ['*.tid']
  537. mimetypes = ["text/vnd.tiddlywiki"]
  538. flags = re.MULTILINE
  539. def _handle_codeblock(self, match):
  540. """
  541. match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
  542. """
  543. from pygments.lexers import get_lexer_by_name
  544. # section header
  545. yield match.start(1), String, match.group(1)
  546. yield match.start(2), String, match.group(2)
  547. yield match.start(3), Text, match.group(3)
  548. # lookup lexer if wanted and existing
  549. lexer = None
  550. if self.handlecodeblocks:
  551. try:
  552. lexer = get_lexer_by_name(match.group(2).strip())
  553. except ClassNotFound:
  554. pass
  555. code = match.group(4)
  556. # no lexer for this language. handle it like it was a code block
  557. if lexer is None:
  558. yield match.start(4), String, code
  559. return
  560. yield from do_insertions([], lexer.get_tokens_unprocessed(code))
  561. yield match.start(5), String, match.group(5)
  562. def _handle_cssblock(self, match):
  563. """
  564. match args: 1:style tag 2:newline, 3:code, 4:closing style tag
  565. """
  566. from pygments.lexers import get_lexer_by_name
  567. # section header
  568. yield match.start(1), String, match.group(1)
  569. yield match.start(2), String, match.group(2)
  570. lexer = None
  571. if self.handlecodeblocks:
  572. try:
  573. lexer = get_lexer_by_name('css')
  574. except ClassNotFound:
  575. pass
  576. code = match.group(3)
  577. # no lexer for this language. handle it like it was a code block
  578. if lexer is None:
  579. yield match.start(3), String, code
  580. return
  581. yield from do_insertions([], lexer.get_tokens_unprocessed(code))
  582. yield match.start(4), String, match.group(4)
  583. tokens = {
  584. 'root': [
  585. # title in metadata section
  586. (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),
  587. # headings
  588. (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),
  589. (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
  590. # bulleted or numbered lists or single-line block quotes
  591. # (can be mixed)
  592. (r'^(\s*)([*#>]+)(\s*)(.+\n)',
  593. bygroups(Text, Keyword, Text, using(this, state='inline'))),
  594. # multi-line block quotes
  595. (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),
  596. # table header
  597. (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),
  598. # table footer or caption
  599. (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),
  600. # table class
  601. (r'^(\|.*?\|k)$', bygroups(Name.Tag)),
  602. # definitions
  603. (r'^(;.*)$', bygroups(Generic.Strong)),
  604. # text block
  605. (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
  606. # code block with language
  607. (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
  608. # CSS style block
  609. (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),
  610. include('keywords'),
  611. include('inline'),
  612. ],
  613. 'keywords': [
  614. (words((
  615. '\\define', '\\end', 'caption', 'created', 'modified', 'tags',
  616. 'title', 'type'), prefix=r'^', suffix=r'\b'),
  617. Keyword),
  618. ],
  619. 'inline': [
  620. # escape
  621. (r'\\.', Text),
  622. # created or modified date
  623. (r'\d{17}', Number.Integer),
  624. # italics
  625. (r'(\s)(//[^/]+//)((?=\W|\n))',
  626. bygroups(Text, Generic.Emph, Text)),
  627. # superscript
  628. (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),
  629. # subscript
  630. (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),
  631. # underscore
  632. (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),
  633. # bold
  634. (r"(\s)(''[^']+'')((?=\W|\n))",
  635. bygroups(Text, Generic.Strong, Text)),
  636. # strikethrough
  637. (r'(\s)(~~[^~]+~~)((?=\W|\n))',
  638. bygroups(Text, Generic.Deleted, Text)),
  639. # TiddlyWiki variables
  640. (r'<<[^>]+>>', Name.Tag),
  641. (r'\$\$[^$]+\$\$', Name.Tag),
  642. (r'\$\([^)]+\)\$', Name.Tag),
  643. # TiddlyWiki style or class
  644. (r'^@@.*$', Name.Tag),
  645. # HTML tags
  646. (r'</?[^>]+>', Name.Tag),
  647. # inline code
  648. (r'`[^`]+`', String.Backtick),
  649. # HTML escaped symbols
  650. (r'&\S*?;', String.Regex),
  651. # Wiki links
  652. (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),
  653. # External links
  654. (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',
  655. bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),
  656. # Transclusion
  657. (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),
  658. # URLs
  659. (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),
  660. # general text, must come last!
  661. (r'[\w]+', Text),
  662. (r'.', Text)
  663. ],
  664. }
  665. def __init__(self, **options):
  666. self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
  667. RegexLexer.__init__(self, **options)