cddl.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. """
  2. pygments.lexers.cddl
  3. ~~~~~~~~~~~~~~~~~~~~
  4. Lexer for the Concise data definition language (CDDL), a notational
  5. convention to express CBOR and JSON data structures.
  6. More information:
  7. https://datatracker.ietf.org/doc/rfc8610/
  8. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  9. :license: BSD, see LICENSE for details.
  10. """
  11. import re
  12. __all__ = ['CddlLexer']
  13. from pygments.lexer import RegexLexer, bygroups, include, words
  14. from pygments.token import (
  15. Comment,
  16. Error,
  17. Keyword,
  18. Name,
  19. Number,
  20. Operator,
  21. Punctuation,
  22. String,
  23. Text,
  24. )
  25. class CddlLexer(RegexLexer):
  26. """
  27. Lexer for CDDL definitions.
  28. .. versionadded:: 2.8
  29. """
  30. name = "CDDL"
  31. aliases = ["cddl"]
  32. filenames = ["*.cddl"]
  33. mimetypes = ["text/x-cddl"]
  34. _prelude_types = [
  35. "any",
  36. "b64legacy",
  37. "b64url",
  38. "bigfloat",
  39. "bigint",
  40. "bignint",
  41. "biguint",
  42. "bool",
  43. "bstr",
  44. "bytes",
  45. "cbor-any",
  46. "decfrac",
  47. "eb16",
  48. "eb64legacy",
  49. "eb64url",
  50. "encoded-cbor",
  51. "false",
  52. "float",
  53. "float16",
  54. "float16-32",
  55. "float32",
  56. "float32-64",
  57. "float64",
  58. "int",
  59. "integer",
  60. "mime-message",
  61. "nil",
  62. "nint",
  63. "null",
  64. "number",
  65. "regexp",
  66. "tdate",
  67. "text",
  68. "time",
  69. "true",
  70. "tstr",
  71. "uint",
  72. "undefined",
  73. "unsigned",
  74. "uri",
  75. ]
  76. _controls = [
  77. ".and",
  78. ".bits",
  79. ".cbor",
  80. ".cborseq",
  81. ".default",
  82. ".eq",
  83. ".ge",
  84. ".gt",
  85. ".le",
  86. ".lt",
  87. ".ne",
  88. ".regexp",
  89. ".size",
  90. ".within",
  91. ]
  92. _re_id = (
  93. r"[$@A-Z_a-z]"
  94. r"(?:[\-\.]*[$@0-9A-Z_a-z]|[$@0-9A-Z_a-z])*"
  95. )
  96. # While the spec reads more like "an int must not start with 0" we use a
  97. # lookahead here that says "after a 0 there must be no digit". This makes the
  98. # '0' the invalid character in '01', which looks nicer when highlighted.
  99. _re_uint = r"(?:0b[01]+|0x[0-9a-fA-F]+|[1-9]\d*|0(?!\d))"
  100. _re_int = r"-?" + _re_uint
  101. flags = re.UNICODE | re.MULTILINE
  102. tokens = {
  103. "commentsandwhitespace": [(r"\s+", Text), (r";.+$", Comment.Single)],
  104. "root": [
  105. include("commentsandwhitespace"),
  106. # tag types
  107. (r"#(\d\.{uint})?".format(uint=_re_uint), Keyword.Type), # type or any
  108. # occurence
  109. (
  110. r"({uint})?(\*)({uint})?".format(uint=_re_uint),
  111. bygroups(Number, Operator, Number),
  112. ),
  113. (r"\?|\+", Operator), # occurrence
  114. (r"\^", Operator), # cuts
  115. (r"(\.\.\.|\.\.)", Operator), # rangeop
  116. (words(_controls, suffix=r"\b"), Operator.Word), # ctlops
  117. # into choice op
  118. (r"&(?=\s*({groupname}|\())".format(groupname=_re_id), Operator),
  119. (r"~(?=\s*{})".format(_re_id), Operator), # unwrap op
  120. (r"//|/(?!/)", Operator), # double und single slash
  121. (r"=>|/==|/=|=", Operator),
  122. (r"[\[\]{}\(\),<>:]", Punctuation),
  123. # Bytestrings
  124. (r"(b64)(')", bygroups(String.Affix, String.Single), "bstrb64url"),
  125. (r"(h)(')", bygroups(String.Affix, String.Single), "bstrh"),
  126. (r"'", String.Single, "bstr"),
  127. # Barewords as member keys (must be matched before values, types, typenames,
  128. # groupnames).
  129. # Token type is String as barewords are always interpreted as such.
  130. (
  131. r"({bareword})(\s*)(:)".format(bareword=_re_id),
  132. bygroups(String, Text, Punctuation),
  133. ),
  134. # predefined types
  135. (
  136. words(_prelude_types, prefix=r"(?![\-_$@])\b", suffix=r"\b(?![\-_$@])"),
  137. Name.Builtin,
  138. ),
  139. # user-defined groupnames, typenames
  140. (_re_id, Name.Class),
  141. # values
  142. (r"0b[01]+", Number.Bin),
  143. (r"0o[0-7]+", Number.Oct),
  144. (r"0x[0-9a-fA-F]+(\.[0-9a-fA-F]+)?p[+-]?\d+", Number.Hex), # hexfloat
  145. (r"0x[0-9a-fA-F]+", Number.Hex), # hex
  146. # Float
  147. (
  148. r"{int}(?=(\.\d|e[+-]?\d))(?:\.\d+)?(?:e[+-]?\d+)?".format(int=_re_int),
  149. Number.Float,
  150. ),
  151. # Int
  152. (_re_int, Number.Int),
  153. (r'"(\\\\|\\"|[^"])*"', String.Double),
  154. ],
  155. "bstrb64url": [
  156. (r"'", String.Single, "#pop"),
  157. include("commentsandwhitespace"),
  158. (r"\\.", String.Escape),
  159. (r"[0-9a-zA-Z\-_=]+", String.Single),
  160. (r".", Error),
  161. # (r";.+$", Token.Other),
  162. ],
  163. "bstrh": [
  164. (r"'", String.Single, "#pop"),
  165. include("commentsandwhitespace"),
  166. (r"\\.", String.Escape),
  167. (r"[0-9a-fA-F]+", String.Single),
  168. (r".", Error),
  169. ],
  170. "bstr": [
  171. (r"'", String.Single, "#pop"),
  172. (r"\\.", String.Escape),
  173. (r"[^']", String.Single),
  174. ],
  175. }