email.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. """
  2. pygments.lexers.email
  3. ~~~~~~~~~~~~~~~~~~~~~
  4. Lexer for the raw E-mail.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.lexer import RegexLexer, DelegatingLexer, bygroups
  9. from pygments.lexers.mime import MIMELexer
  10. from pygments.token import Text, Keyword, Name, String, Number, Comment
  11. from pygments.util import get_bool_opt
  12. __all__ = ["EmailLexer"]
  13. class EmailHeaderLexer(RegexLexer):
  14. """
  15. Sub-lexer for raw E-mail. This lexer only process header part of e-mail.
  16. .. versionadded:: 2.5
  17. """
  18. def __init__(self, **options):
  19. super().__init__(**options)
  20. self.highlight_x = get_bool_opt(options, "highlight-X-header", False)
  21. def get_x_header_tokens(self, match):
  22. if self.highlight_x:
  23. # field
  24. yield match.start(1), Name.Tag, match.group(1)
  25. # content
  26. default_actions = self.get_tokens_unprocessed(
  27. match.group(2), stack=("root", "header"))
  28. yield from default_actions
  29. else:
  30. # lowlight
  31. yield match.start(1), Comment.Special, match.group(1)
  32. yield match.start(2), Comment.Multiline, match.group(2)
  33. tokens = {
  34. "root": [
  35. (r"^(?:[A-WYZ]|X400)[\w\-]*:", Name.Tag, "header"),
  36. (r"^(X-(?:\w[\w\-]*:))([\s\S]*?\n)(?![ \t])", get_x_header_tokens),
  37. ],
  38. "header": [
  39. # folding
  40. (r"\n[ \t]", Text.Whitespace),
  41. (r"\n(?![ \t])", Text.Whitespace, "#pop"),
  42. # keywords
  43. (r"\bE?SMTPS?\b", Keyword),
  44. (r"\b(?:HE|EH)LO\b", Keyword),
  45. # mailbox
  46. (r"[\w\.\-\+=]+@[\w\.\-]+", Name.Label),
  47. (r"<[\w\.\-\+=]+@[\w\.\-]+>", Name.Label),
  48. # domain
  49. (r"\b(\w[\w\.-]*\.[\w\.-]*\w[a-zA-Z]+)\b", Name.Function),
  50. # IPv4
  51. (
  52. r"(?<=\b)(?:(?:25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(?:25[0"
  53. r"-5]|2[0-4][0-9]|1?[0-9][0-9]?)(?=\b)",
  54. Number.Integer,
  55. ),
  56. # IPv6
  57. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,7}:(?!\b)", Number.Hex),
  58. (r"(?<=\b):((:[0-9a-fA-F]{1,4}){1,7}|:)(?=\b)", Number.Hex),
  59. (r"(?<=\b)([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}(?=\b)", Number.Hex),
  60. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}(?=\b)", Number.Hex),
  61. (r"(?<=\b)[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})(?=\b)", Number.Hex),
  62. (r"(?<=\b)fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}(?=\b)", Number.Hex),
  63. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}(?=\b)", Number.Hex),
  64. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}(?=\b)",
  65. Number.Hex),
  66. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}(?=\b)",
  67. Number.Hex),
  68. (r"(?<=\b)([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}(?=\b)",
  69. Number.Hex),
  70. (
  71. r"(?<=\b)::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}"
  72. r"[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}"
  73. r"[0-9])(?=\b)",
  74. Number.Hex,
  75. ),
  76. (
  77. r"(?<=\b)([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-"
  78. r"9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-"
  79. r"9])(?=\b)",
  80. Number.Hex,
  81. ),
  82. # Date time
  83. (
  84. r"(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),\s+)?(0[1-9]|[1-2]?[0-9]|3["
  85. r"01])\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+("
  86. r"19[0-9]{2}|[2-9][0-9]{3})\s+(2[0-3]|[0-1][0-9]):([0-5][0-9])"
  87. r"(?::(60|[0-5][0-9]))?(?:\.\d{1,5})?\s+([-\+][0-9]{2}[0-5][0-"
  88. r"9]|\(?(?:UTC?|GMT|(?:E|C|M|P)(?:ST|ET|DT)|[A-IK-Z])\)?)",
  89. Name.Decorator,
  90. ),
  91. # RFC-2047 encoded string
  92. (
  93. r"(=\?)([\w-]+)(\?)([BbQq])(\?)([\[\w!\"#$%&\'()*+,-./:;<=>@[\\"
  94. r"\]^_`{|}~]+)(\?=)",
  95. bygroups(
  96. String.Affix,
  97. Name.Constant,
  98. String.Affix,
  99. Keyword.Constant,
  100. String.Affix,
  101. Number.Hex,
  102. String.Affix
  103. )
  104. ),
  105. # others
  106. (r'[\s]+', Text.Whitespace),
  107. (r'[\S]', Text),
  108. ],
  109. }
  110. class EmailLexer(DelegatingLexer):
  111. """
  112. Lexer for raw E-mail.
  113. Additional options accepted:
  114. `highlight-X-header`
  115. Highlight the fields of ``X-`` user-defined email header. (default:
  116. ``False``).
  117. .. versionadded:: 2.5
  118. """
  119. name = "E-mail"
  120. aliases = ["email", "eml"]
  121. filenames = ["*.eml"]
  122. mimetypes = ["message/rfc822"]
  123. def __init__(self, **options):
  124. super().__init__(EmailHeaderLexer, MIMELexer, Comment, **options)