bibtex.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. """
  2. pygments.lexers.bibtex
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for BibTeX bibliography data and styles
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, default, \
  10. words
  11. from pygments.token import Name, Comment, String, Error, Number, Text, \
  12. Keyword, Punctuation
  13. __all__ = ['BibTeXLexer', 'BSTLexer']
  14. class BibTeXLexer(ExtendedRegexLexer):
  15. """
  16. A lexer for BibTeX bibliography data format.
  17. .. versionadded:: 2.2
  18. """
  19. name = 'BibTeX'
  20. aliases = ['bibtex', 'bib']
  21. filenames = ['*.bib']
  22. mimetypes = ["text/x-bibtex"]
  23. flags = re.IGNORECASE
  24. ALLOWED_CHARS = r'@!$&*+\-./:;<>?\[\\\]^`|~'
  25. IDENTIFIER = '[{}][{}]*'.format('a-z_' + ALLOWED_CHARS, r'\w' + ALLOWED_CHARS)
  26. def open_brace_callback(self, match, ctx):
  27. opening_brace = match.group()
  28. ctx.opening_brace = opening_brace
  29. yield match.start(), Punctuation, opening_brace
  30. ctx.pos = match.end()
  31. def close_brace_callback(self, match, ctx):
  32. closing_brace = match.group()
  33. if (
  34. ctx.opening_brace == '{' and closing_brace != '}' or
  35. ctx.opening_brace == '(' and closing_brace != ')'
  36. ):
  37. yield match.start(), Error, closing_brace
  38. else:
  39. yield match.start(), Punctuation, closing_brace
  40. del ctx.opening_brace
  41. ctx.pos = match.end()
  42. tokens = {
  43. 'root': [
  44. include('whitespace'),
  45. ('@comment', Comment),
  46. ('@preamble', Name.Class, ('closing-brace', 'value', 'opening-brace')),
  47. ('@string', Name.Class, ('closing-brace', 'field', 'opening-brace')),
  48. ('@' + IDENTIFIER, Name.Class,
  49. ('closing-brace', 'command-body', 'opening-brace')),
  50. ('.+', Comment),
  51. ],
  52. 'opening-brace': [
  53. include('whitespace'),
  54. (r'[{(]', open_brace_callback, '#pop'),
  55. ],
  56. 'closing-brace': [
  57. include('whitespace'),
  58. (r'[})]', close_brace_callback, '#pop'),
  59. ],
  60. 'command-body': [
  61. include('whitespace'),
  62. (r'[^\s\,\}]+', Name.Label, ('#pop', 'fields')),
  63. ],
  64. 'fields': [
  65. include('whitespace'),
  66. (',', Punctuation, 'field'),
  67. default('#pop'),
  68. ],
  69. 'field': [
  70. include('whitespace'),
  71. (IDENTIFIER, Name.Attribute, ('value', '=')),
  72. default('#pop'),
  73. ],
  74. '=': [
  75. include('whitespace'),
  76. ('=', Punctuation, '#pop'),
  77. ],
  78. 'value': [
  79. include('whitespace'),
  80. (IDENTIFIER, Name.Variable),
  81. ('"', String, 'quoted-string'),
  82. (r'\{', String, 'braced-string'),
  83. (r'[\d]+', Number),
  84. ('#', Punctuation),
  85. default('#pop'),
  86. ],
  87. 'quoted-string': [
  88. (r'\{', String, 'braced-string'),
  89. ('"', String, '#pop'),
  90. (r'[^\{\"]+', String),
  91. ],
  92. 'braced-string': [
  93. (r'\{', String, '#push'),
  94. (r'\}', String, '#pop'),
  95. (r'[^\{\}]+', String),
  96. ],
  97. 'whitespace': [
  98. (r'\s+', Text),
  99. ],
  100. }
  101. class BSTLexer(RegexLexer):
  102. """
  103. A lexer for BibTeX bibliography styles.
  104. .. versionadded:: 2.2
  105. """
  106. name = 'BST'
  107. aliases = ['bst', 'bst-pybtex']
  108. filenames = ['*.bst']
  109. flags = re.IGNORECASE | re.MULTILINE
  110. tokens = {
  111. 'root': [
  112. include('whitespace'),
  113. (words(['read', 'sort']), Keyword),
  114. (words(['execute', 'integers', 'iterate', 'reverse', 'strings']),
  115. Keyword, ('group')),
  116. (words(['function', 'macro']), Keyword, ('group', 'group')),
  117. (words(['entry']), Keyword, ('group', 'group', 'group')),
  118. ],
  119. 'group': [
  120. include('whitespace'),
  121. (r'\{', Punctuation, ('#pop', 'group-end', 'body')),
  122. ],
  123. 'group-end': [
  124. include('whitespace'),
  125. (r'\}', Punctuation, '#pop'),
  126. ],
  127. 'body': [
  128. include('whitespace'),
  129. (r"\'[^#\"\{\}\s]+", Name.Function),
  130. (r'[^#\"\{\}\s]+\$', Name.Builtin),
  131. (r'[^#\"\{\}\s]+', Name.Variable),
  132. (r'"[^\"]*"', String),
  133. (r'#-?\d+', Number),
  134. (r'\{', Punctuation, ('group-end', 'body')),
  135. default('#pop'),
  136. ],
  137. 'whitespace': [
  138. (r'\s+', Text),
  139. ('%.*?$', Comment.SingleLine),
  140. ],
  141. }