python.py 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158
  1. """
  2. pygments.lexers.python
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for Python and related languages.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
  10. default, words, combined, do_insertions
  11. from pygments.util import get_bool_opt, shebang_matches
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation, Generic, Other, Error
  14. from pygments import unistring as uni
  15. __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
  16. 'Python2Lexer', 'Python2TracebackLexer',
  17. 'CythonLexer', 'DgLexer', 'NumPyLexer']
  18. line_re = re.compile('.*?\n')
  19. class PythonLexer(RegexLexer):
  20. """
  21. For `Python <http://www.python.org>`_ source code (version 3.x).
  22. .. versionadded:: 0.10
  23. .. versionchanged:: 2.5
  24. This is now the default ``PythonLexer``. It is still available as the
  25. alias ``Python3Lexer``.
  26. """
  27. name = 'Python'
  28. aliases = ['python', 'py', 'sage', 'python3', 'py3']
  29. filenames = [
  30. '*.py',
  31. '*.pyw',
  32. # Jython
  33. '*.jy',
  34. # Sage
  35. '*.sage',
  36. # SCons
  37. '*.sc',
  38. 'SConstruct',
  39. 'SConscript',
  40. # Skylark/Starlark (used by Bazel, Buck, and Pants)
  41. '*.bzl',
  42. 'BUCK',
  43. 'BUILD',
  44. 'BUILD.bazel',
  45. 'WORKSPACE',
  46. # Twisted Application infrastructure
  47. '*.tac',
  48. ]
  49. mimetypes = ['text/x-python', 'application/x-python',
  50. 'text/x-python3', 'application/x-python3']
  51. flags = re.MULTILINE | re.UNICODE
  52. uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue)
  53. def innerstring_rules(ttype):
  54. return [
  55. # the old style '%s' % (...) string formatting (still valid in Py3)
  56. (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
  57. '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
  58. # the new style '{}'.format(...) string formatting
  59. (r'\{'
  60. r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
  61. r'(\![sra])?' # conversion
  62. r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
  63. r'\}', String.Interpol),
  64. # backslashes, quotes and formatting signs must be parsed one at a time
  65. (r'[^\\\'"%{\n]+', ttype),
  66. (r'[\'"\\]', ttype),
  67. # unhandled string formatting sign
  68. (r'%|(\{{1,2})', ttype)
  69. # newlines are an error (use "nl" state)
  70. ]
  71. def fstring_rules(ttype):
  72. return [
  73. # Assuming that a '}' is the closing brace after format specifier.
  74. # Sadly, this means that we won't detect syntax error. But it's
  75. # more important to parse correct syntax correctly, than to
  76. # highlight invalid syntax.
  77. (r'\}', String.Interpol),
  78. (r'\{', String.Interpol, 'expr-inside-fstring'),
  79. # backslashes, quotes and formatting signs must be parsed one at a time
  80. (r'[^\\\'"{}\n]+', ttype),
  81. (r'[\'"\\]', ttype),
  82. # newlines are an error (use "nl" state)
  83. ]
  84. tokens = {
  85. 'root': [
  86. (r'\n', Text),
  87. (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
  88. bygroups(Text, String.Affix, String.Doc)),
  89. (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
  90. bygroups(Text, String.Affix, String.Doc)),
  91. (r'\A#!.+$', Comment.Hashbang),
  92. (r'#.*$', Comment.Single),
  93. (r'\\\n', Text),
  94. (r'\\', Text),
  95. include('keywords'),
  96. (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
  97. (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
  98. (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
  99. 'fromimport'),
  100. (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
  101. 'import'),
  102. include('expr'),
  103. ],
  104. 'expr': [
  105. # raw f-strings
  106. ('(?i)(rf|fr)(""")',
  107. bygroups(String.Affix, String.Double),
  108. combined('rfstringescape', 'tdqf')),
  109. ("(?i)(rf|fr)(''')",
  110. bygroups(String.Affix, String.Single),
  111. combined('rfstringescape', 'tsqf')),
  112. ('(?i)(rf|fr)(")',
  113. bygroups(String.Affix, String.Double),
  114. combined('rfstringescape', 'dqf')),
  115. ("(?i)(rf|fr)(')",
  116. bygroups(String.Affix, String.Single),
  117. combined('rfstringescape', 'sqf')),
  118. # non-raw f-strings
  119. ('([fF])(""")', bygroups(String.Affix, String.Double),
  120. combined('fstringescape', 'tdqf')),
  121. ("([fF])(''')", bygroups(String.Affix, String.Single),
  122. combined('fstringescape', 'tsqf')),
  123. ('([fF])(")', bygroups(String.Affix, String.Double),
  124. combined('fstringescape', 'dqf')),
  125. ("([fF])(')", bygroups(String.Affix, String.Single),
  126. combined('fstringescape', 'sqf')),
  127. # raw strings
  128. ('(?i)(rb|br|r)(""")',
  129. bygroups(String.Affix, String.Double), 'tdqs'),
  130. ("(?i)(rb|br|r)(''')",
  131. bygroups(String.Affix, String.Single), 'tsqs'),
  132. ('(?i)(rb|br|r)(")',
  133. bygroups(String.Affix, String.Double), 'dqs'),
  134. ("(?i)(rb|br|r)(')",
  135. bygroups(String.Affix, String.Single), 'sqs'),
  136. # non-raw strings
  137. ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
  138. combined('stringescape', 'tdqs')),
  139. ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
  140. combined('stringescape', 'tsqs')),
  141. ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
  142. combined('stringescape', 'dqs')),
  143. ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
  144. combined('stringescape', 'sqs')),
  145. (r'[^\S\n]+', Text),
  146. include('numbers'),
  147. (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator),
  148. (r'[]{}:(),;[]', Punctuation),
  149. (r'(in|is|and|or|not)\b', Operator.Word),
  150. include('expr-keywords'),
  151. include('builtins'),
  152. include('magicfuncs'),
  153. include('magicvars'),
  154. include('name'),
  155. ],
  156. 'expr-inside-fstring': [
  157. (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
  158. # without format specifier
  159. (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
  160. r'(\![sraf])?' # conversion
  161. r'\}', String.Interpol, '#pop'),
  162. # with format specifier
  163. # we'll catch the remaining '}' in the outer scope
  164. (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
  165. r'(\![sraf])?' # conversion
  166. r':', String.Interpol, '#pop'),
  167. (r'\s+', Text), # allow new lines
  168. include('expr'),
  169. ],
  170. 'expr-inside-fstring-inner': [
  171. (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
  172. (r'[])}]', Punctuation, '#pop'),
  173. (r'\s+', Text), # allow new lines
  174. include('expr'),
  175. ],
  176. 'expr-keywords': [
  177. # Based on https://docs.python.org/3/reference/expressions.html
  178. (words((
  179. 'async for', 'await', 'else', 'for', 'if', 'lambda',
  180. 'yield', 'yield from'), suffix=r'\b'),
  181. Keyword),
  182. (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
  183. ],
  184. 'keywords': [
  185. (words((
  186. 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif',
  187. 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda',
  188. 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield',
  189. 'yield from', 'as', 'with'), suffix=r'\b'),
  190. Keyword),
  191. (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
  192. ],
  193. 'builtins': [
  194. (words((
  195. '__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray',
  196. 'bytes', 'chr', 'classmethod', 'compile', 'complex',
  197. 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'filter',
  198. 'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr',
  199. 'hash', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass',
  200. 'iter', 'len', 'list', 'locals', 'map', 'max', 'memoryview',
  201. 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'print',
  202. 'property', 'range', 'repr', 'reversed', 'round', 'set', 'setattr',
  203. 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple',
  204. 'type', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
  205. Name.Builtin),
  206. (r'(?<!\.)(self|Ellipsis|NotImplemented|cls)\b', Name.Builtin.Pseudo),
  207. (words((
  208. 'ArithmeticError', 'AssertionError', 'AttributeError',
  209. 'BaseException', 'BufferError', 'BytesWarning', 'DeprecationWarning',
  210. 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError',
  211. 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError',
  212. 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
  213. 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError',
  214. 'NotImplementedError', 'OSError', 'OverflowError',
  215. 'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning',
  216. 'RuntimeError', 'RuntimeWarning', 'StopIteration',
  217. 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
  218. 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
  219. 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
  220. 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError',
  221. 'Warning', 'WindowsError', 'ZeroDivisionError',
  222. # new builtin exceptions from PEP 3151
  223. 'BlockingIOError', 'ChildProcessError', 'ConnectionError',
  224. 'BrokenPipeError', 'ConnectionAbortedError', 'ConnectionRefusedError',
  225. 'ConnectionResetError', 'FileExistsError', 'FileNotFoundError',
  226. 'InterruptedError', 'IsADirectoryError', 'NotADirectoryError',
  227. 'PermissionError', 'ProcessLookupError', 'TimeoutError',
  228. # others new in Python 3
  229. 'StopAsyncIteration', 'ModuleNotFoundError', 'RecursionError'),
  230. prefix=r'(?<!\.)', suffix=r'\b'),
  231. Name.Exception),
  232. ],
  233. 'magicfuncs': [
  234. (words((
  235. '__abs__', '__add__', '__aenter__', '__aexit__', '__aiter__',
  236. '__and__', '__anext__', '__await__', '__bool__', '__bytes__',
  237. '__call__', '__complex__', '__contains__', '__del__', '__delattr__',
  238. '__delete__', '__delitem__', '__dir__', '__divmod__', '__enter__',
  239. '__eq__', '__exit__', '__float__', '__floordiv__', '__format__',
  240. '__ge__', '__get__', '__getattr__', '__getattribute__',
  241. '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__',
  242. '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__',
  243. '__imul__', '__index__', '__init__', '__instancecheck__',
  244. '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__',
  245. '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__',
  246. '__len__', '__length_hint__', '__lshift__', '__lt__', '__matmul__',
  247. '__missing__', '__mod__', '__mul__', '__ne__', '__neg__',
  248. '__new__', '__next__', '__or__', '__pos__', '__pow__',
  249. '__prepare__', '__radd__', '__rand__', '__rdivmod__', '__repr__',
  250. '__reversed__', '__rfloordiv__', '__rlshift__', '__rmatmul__',
  251. '__rmod__', '__rmul__', '__ror__', '__round__', '__rpow__',
  252. '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__',
  253. '__rxor__', '__set__', '__setattr__', '__setitem__', '__str__',
  254. '__sub__', '__subclasscheck__', '__truediv__',
  255. '__xor__'), suffix=r'\b'),
  256. Name.Function.Magic),
  257. ],
  258. 'magicvars': [
  259. (words((
  260. '__annotations__', '__bases__', '__class__', '__closure__',
  261. '__code__', '__defaults__', '__dict__', '__doc__', '__file__',
  262. '__func__', '__globals__', '__kwdefaults__', '__module__',
  263. '__mro__', '__name__', '__objclass__', '__qualname__',
  264. '__self__', '__slots__', '__weakref__'), suffix=r'\b'),
  265. Name.Variable.Magic),
  266. ],
  267. 'numbers': [
  268. (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
  269. r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
  270. (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float),
  271. (r'0[oO](?:_?[0-7])+', Number.Oct),
  272. (r'0[bB](?:_?[01])+', Number.Bin),
  273. (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
  274. (r'\d(?:_?\d)*', Number.Integer),
  275. ],
  276. 'name': [
  277. (r'@' + uni_name, Name.Decorator),
  278. (r'@', Operator), # new matrix multiplication operator
  279. (uni_name, Name),
  280. ],
  281. 'funcname': [
  282. include('magicfuncs'),
  283. (uni_name, Name.Function, '#pop'),
  284. default('#pop'),
  285. ],
  286. 'classname': [
  287. (uni_name, Name.Class, '#pop'),
  288. ],
  289. 'import': [
  290. (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
  291. (r'\.', Name.Namespace),
  292. (uni_name, Name.Namespace),
  293. (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
  294. default('#pop') # all else: go back
  295. ],
  296. 'fromimport': [
  297. (r'(\s+)(import)\b', bygroups(Text, Keyword.Namespace), '#pop'),
  298. (r'\.', Name.Namespace),
  299. # if None occurs here, it's "raise x from None", since None can
  300. # never be a module name
  301. (r'None\b', Name.Builtin.Pseudo, '#pop'),
  302. (uni_name, Name.Namespace),
  303. default('#pop'),
  304. ],
  305. 'rfstringescape': [
  306. (r'\{\{', String.Escape),
  307. (r'\}\}', String.Escape),
  308. ],
  309. 'fstringescape': [
  310. include('rfstringescape'),
  311. include('stringescape'),
  312. ],
  313. 'stringescape': [
  314. (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
  315. r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  316. ],
  317. 'fstrings-single': fstring_rules(String.Single),
  318. 'fstrings-double': fstring_rules(String.Double),
  319. 'strings-single': innerstring_rules(String.Single),
  320. 'strings-double': innerstring_rules(String.Double),
  321. 'dqf': [
  322. (r'"', String.Double, '#pop'),
  323. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  324. include('fstrings-double')
  325. ],
  326. 'sqf': [
  327. (r"'", String.Single, '#pop'),
  328. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  329. include('fstrings-single')
  330. ],
  331. 'dqs': [
  332. (r'"', String.Double, '#pop'),
  333. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  334. include('strings-double')
  335. ],
  336. 'sqs': [
  337. (r"'", String.Single, '#pop'),
  338. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  339. include('strings-single')
  340. ],
  341. 'tdqf': [
  342. (r'"""', String.Double, '#pop'),
  343. include('fstrings-double'),
  344. (r'\n', String.Double)
  345. ],
  346. 'tsqf': [
  347. (r"'''", String.Single, '#pop'),
  348. include('fstrings-single'),
  349. (r'\n', String.Single)
  350. ],
  351. 'tdqs': [
  352. (r'"""', String.Double, '#pop'),
  353. include('strings-double'),
  354. (r'\n', String.Double)
  355. ],
  356. 'tsqs': [
  357. (r"'''", String.Single, '#pop'),
  358. include('strings-single'),
  359. (r'\n', String.Single)
  360. ],
  361. }
  362. def analyse_text(text):
  363. return shebang_matches(text, r'pythonw?(3(\.\d)?)?') or \
  364. 'import ' in text[:1000]
  365. Python3Lexer = PythonLexer
  366. class Python2Lexer(RegexLexer):
  367. """
  368. For `Python 2.x <http://www.python.org>`_ source code.
  369. .. versionchanged:: 2.5
  370. This class has been renamed from ``PythonLexer``. ``PythonLexer`` now
  371. refers to the Python 3 variant. File name patterns like ``*.py`` have
  372. been moved to Python 3 as well.
  373. """
  374. name = 'Python 2.x'
  375. aliases = ['python2', 'py2']
  376. filenames = [] # now taken over by PythonLexer (3.x)
  377. mimetypes = ['text/x-python2', 'application/x-python2']
  378. def innerstring_rules(ttype):
  379. return [
  380. # the old style '%s' % (...) string formatting
  381. (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
  382. '[hlL]?[E-GXc-giorsux%]', String.Interpol),
  383. # backslashes, quotes and formatting signs must be parsed one at a time
  384. (r'[^\\\'"%\n]+', ttype),
  385. (r'[\'"\\]', ttype),
  386. # unhandled string formatting sign
  387. (r'%', ttype),
  388. # newlines are an error (use "nl" state)
  389. ]
  390. tokens = {
  391. 'root': [
  392. (r'\n', Text),
  393. (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
  394. bygroups(Text, String.Affix, String.Doc)),
  395. (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
  396. bygroups(Text, String.Affix, String.Doc)),
  397. (r'[^\S\n]+', Text),
  398. (r'\A#!.+$', Comment.Hashbang),
  399. (r'#.*$', Comment.Single),
  400. (r'[]{}:(),;[]', Punctuation),
  401. (r'\\\n', Text),
  402. (r'\\', Text),
  403. (r'(in|is|and|or|not)\b', Operator.Word),
  404. (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
  405. include('keywords'),
  406. (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
  407. (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
  408. (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
  409. 'fromimport'),
  410. (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
  411. 'import'),
  412. include('builtins'),
  413. include('magicfuncs'),
  414. include('magicvars'),
  415. include('backtick'),
  416. ('([rR]|[uUbB][rR]|[rR][uUbB])(""")',
  417. bygroups(String.Affix, String.Double), 'tdqs'),
  418. ("([rR]|[uUbB][rR]|[rR][uUbB])(''')",
  419. bygroups(String.Affix, String.Single), 'tsqs'),
  420. ('([rR]|[uUbB][rR]|[rR][uUbB])(")',
  421. bygroups(String.Affix, String.Double), 'dqs'),
  422. ("([rR]|[uUbB][rR]|[rR][uUbB])(')",
  423. bygroups(String.Affix, String.Single), 'sqs'),
  424. ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
  425. combined('stringescape', 'tdqs')),
  426. ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
  427. combined('stringescape', 'tsqs')),
  428. ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
  429. combined('stringescape', 'dqs')),
  430. ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
  431. combined('stringescape', 'sqs')),
  432. include('name'),
  433. include('numbers'),
  434. ],
  435. 'keywords': [
  436. (words((
  437. 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except',
  438. 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass',
  439. 'print', 'raise', 'return', 'try', 'while', 'yield',
  440. 'yield from', 'as', 'with'), suffix=r'\b'),
  441. Keyword),
  442. ],
  443. 'builtins': [
  444. (words((
  445. '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin',
  446. 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod',
  447. 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod',
  448. 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float',
  449. 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id',
  450. 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len',
  451. 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object',
  452. 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce',
  453. 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice',
  454. 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type',
  455. 'unichr', 'unicode', 'vars', 'xrange', 'zip'),
  456. prefix=r'(?<!\.)', suffix=r'\b'),
  457. Name.Builtin),
  458. (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|cls'
  459. r')\b', Name.Builtin.Pseudo),
  460. (words((
  461. 'ArithmeticError', 'AssertionError', 'AttributeError',
  462. 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
  463. 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
  464. 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
  465. 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
  466. 'MemoryError', 'NameError',
  467. 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning',
  468. 'PendingDeprecationWarning', 'ReferenceError',
  469. 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration',
  470. 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
  471. 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
  472. 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
  473. 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError', 'Warning',
  474. 'WindowsError', 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
  475. Name.Exception),
  476. ],
  477. 'magicfuncs': [
  478. (words((
  479. '__abs__', '__add__', '__and__', '__call__', '__cmp__', '__coerce__',
  480. '__complex__', '__contains__', '__del__', '__delattr__', '__delete__',
  481. '__delitem__', '__delslice__', '__div__', '__divmod__', '__enter__',
  482. '__eq__', '__exit__', '__float__', '__floordiv__', '__ge__', '__get__',
  483. '__getattr__', '__getattribute__', '__getitem__', '__getslice__', '__gt__',
  484. '__hash__', '__hex__', '__iadd__', '__iand__', '__idiv__', '__ifloordiv__',
  485. '__ilshift__', '__imod__', '__imul__', '__index__', '__init__',
  486. '__instancecheck__', '__int__', '__invert__', '__iop__', '__ior__',
  487. '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__',
  488. '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__',
  489. '__missing__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__',
  490. '__nonzero__', '__oct__', '__op__', '__or__', '__pos__', '__pow__',
  491. '__radd__', '__rand__', '__rcmp__', '__rdiv__', '__rdivmod__', '__repr__',
  492. '__reversed__', '__rfloordiv__', '__rlshift__', '__rmod__', '__rmul__',
  493. '__rop__', '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__',
  494. '__rtruediv__', '__rxor__', '__set__', '__setattr__', '__setitem__',
  495. '__setslice__', '__str__', '__sub__', '__subclasscheck__', '__truediv__',
  496. '__unicode__', '__xor__'), suffix=r'\b'),
  497. Name.Function.Magic),
  498. ],
  499. 'magicvars': [
  500. (words((
  501. '__bases__', '__class__', '__closure__', '__code__', '__defaults__',
  502. '__dict__', '__doc__', '__file__', '__func__', '__globals__',
  503. '__metaclass__', '__module__', '__mro__', '__name__', '__self__',
  504. '__slots__', '__weakref__'),
  505. suffix=r'\b'),
  506. Name.Variable.Magic),
  507. ],
  508. 'numbers': [
  509. (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
  510. (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
  511. (r'0[0-7]+j?', Number.Oct),
  512. (r'0[bB][01]+', Number.Bin),
  513. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  514. (r'\d+L', Number.Integer.Long),
  515. (r'\d+j?', Number.Integer)
  516. ],
  517. 'backtick': [
  518. ('`.*?`', String.Backtick),
  519. ],
  520. 'name': [
  521. (r'@[\w.]+', Name.Decorator),
  522. (r'[a-zA-Z_]\w*', Name),
  523. ],
  524. 'funcname': [
  525. include('magicfuncs'),
  526. (r'[a-zA-Z_]\w*', Name.Function, '#pop'),
  527. default('#pop'),
  528. ],
  529. 'classname': [
  530. (r'[a-zA-Z_]\w*', Name.Class, '#pop')
  531. ],
  532. 'import': [
  533. (r'(?:[ \t]|\\\n)+', Text),
  534. (r'as\b', Keyword.Namespace),
  535. (r',', Operator),
  536. (r'[a-zA-Z_][\w.]*', Name.Namespace),
  537. default('#pop') # all else: go back
  538. ],
  539. 'fromimport': [
  540. (r'(?:[ \t]|\\\n)+', Text),
  541. (r'import\b', Keyword.Namespace, '#pop'),
  542. # if None occurs here, it's "raise x from None", since None can
  543. # never be a module name
  544. (r'None\b', Name.Builtin.Pseudo, '#pop'),
  545. # sadly, in "raise x from y" y will be highlighted as namespace too
  546. (r'[a-zA-Z_.][\w.]*', Name.Namespace),
  547. # anything else here also means "raise x from y" and is therefore
  548. # not an error
  549. default('#pop'),
  550. ],
  551. 'stringescape': [
  552. (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
  553. r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  554. ],
  555. 'strings-single': innerstring_rules(String.Single),
  556. 'strings-double': innerstring_rules(String.Double),
  557. 'dqs': [
  558. (r'"', String.Double, '#pop'),
  559. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  560. include('strings-double')
  561. ],
  562. 'sqs': [
  563. (r"'", String.Single, '#pop'),
  564. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  565. include('strings-single')
  566. ],
  567. 'tdqs': [
  568. (r'"""', String.Double, '#pop'),
  569. include('strings-double'),
  570. (r'\n', String.Double)
  571. ],
  572. 'tsqs': [
  573. (r"'''", String.Single, '#pop'),
  574. include('strings-single'),
  575. (r'\n', String.Single)
  576. ],
  577. }
  578. def analyse_text(text):
  579. return shebang_matches(text, r'pythonw?2(\.\d)?')
  580. class PythonConsoleLexer(Lexer):
  581. """
  582. For Python console output or doctests, such as:
  583. .. sourcecode:: pycon
  584. >>> a = 'foo'
  585. >>> print a
  586. foo
  587. >>> 1 / 0
  588. Traceback (most recent call last):
  589. File "<stdin>", line 1, in <module>
  590. ZeroDivisionError: integer division or modulo by zero
  591. Additional options:
  592. `python3`
  593. Use Python 3 lexer for code. Default is ``True``.
  594. .. versionadded:: 1.0
  595. .. versionchanged:: 2.5
  596. Now defaults to ``True``.
  597. """
  598. name = 'Python console session'
  599. aliases = ['pycon']
  600. mimetypes = ['text/x-python-doctest']
  601. def __init__(self, **options):
  602. self.python3 = get_bool_opt(options, 'python3', True)
  603. Lexer.__init__(self, **options)
  604. def get_tokens_unprocessed(self, text):
  605. if self.python3:
  606. pylexer = PythonLexer(**self.options)
  607. tblexer = PythonTracebackLexer(**self.options)
  608. else:
  609. pylexer = Python2Lexer(**self.options)
  610. tblexer = Python2TracebackLexer(**self.options)
  611. curcode = ''
  612. insertions = []
  613. curtb = ''
  614. tbindex = 0
  615. tb = 0
  616. for match in line_re.finditer(text):
  617. line = match.group()
  618. if line.startswith('>>> ') or line.startswith('... '):
  619. tb = 0
  620. insertions.append((len(curcode),
  621. [(0, Generic.Prompt, line[:4])]))
  622. curcode += line[4:]
  623. elif line.rstrip() == '...' and not tb:
  624. # only a new >>> prompt can end an exception block
  625. # otherwise an ellipsis in place of the traceback frames
  626. # will be mishandled
  627. insertions.append((len(curcode),
  628. [(0, Generic.Prompt, '...')]))
  629. curcode += line[3:]
  630. else:
  631. if curcode:
  632. yield from do_insertions(
  633. insertions, pylexer.get_tokens_unprocessed(curcode))
  634. curcode = ''
  635. insertions = []
  636. if (line.startswith('Traceback (most recent call last):') or
  637. re.match(' File "[^"]+", line \\d+\\n$', line)):
  638. tb = 1
  639. curtb = line
  640. tbindex = match.start()
  641. elif line == 'KeyboardInterrupt\n':
  642. yield match.start(), Name.Class, line
  643. elif tb:
  644. curtb += line
  645. if not (line.startswith(' ') or line.strip() == '...'):
  646. tb = 0
  647. for i, t, v in tblexer.get_tokens_unprocessed(curtb):
  648. yield tbindex+i, t, v
  649. curtb = ''
  650. else:
  651. yield match.start(), Generic.Output, line
  652. if curcode:
  653. yield from do_insertions(insertions,
  654. pylexer.get_tokens_unprocessed(curcode))
  655. if curtb:
  656. for i, t, v in tblexer.get_tokens_unprocessed(curtb):
  657. yield tbindex+i, t, v
  658. class PythonTracebackLexer(RegexLexer):
  659. """
  660. For Python 3.x tracebacks, with support for chained exceptions.
  661. .. versionadded:: 1.0
  662. .. versionchanged:: 2.5
  663. This is now the default ``PythonTracebackLexer``. It is still available
  664. as the alias ``Python3TracebackLexer``.
  665. """
  666. name = 'Python Traceback'
  667. aliases = ['pytb', 'py3tb']
  668. filenames = ['*.pytb', '*.py3tb']
  669. mimetypes = ['text/x-python-traceback', 'text/x-python3-traceback']
  670. tokens = {
  671. 'root': [
  672. (r'\n', Text),
  673. (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
  674. (r'^During handling of the above exception, another '
  675. r'exception occurred:\n\n', Generic.Traceback),
  676. (r'^The above exception was the direct cause of the '
  677. r'following exception:\n\n', Generic.Traceback),
  678. (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
  679. (r'^.*\n', Other),
  680. ],
  681. 'intb': [
  682. (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
  683. bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
  684. (r'^( File )("[^"]+")(, line )(\d+)(\n)',
  685. bygroups(Text, Name.Builtin, Text, Number, Text)),
  686. (r'^( )(.+)(\n)',
  687. bygroups(Text, using(PythonLexer), Text)),
  688. (r'^([ \t]*)(\.\.\.)(\n)',
  689. bygroups(Text, Comment, Text)), # for doctests...
  690. (r'^([^:]+)(: )(.+)(\n)',
  691. bygroups(Generic.Error, Text, Name, Text), '#pop'),
  692. (r'^([a-zA-Z_][\w.]*)(:?\n)',
  693. bygroups(Generic.Error, Text), '#pop')
  694. ],
  695. }
  696. Python3TracebackLexer = PythonTracebackLexer
  697. class Python2TracebackLexer(RegexLexer):
  698. """
  699. For Python tracebacks.
  700. .. versionadded:: 0.7
  701. .. versionchanged:: 2.5
  702. This class has been renamed from ``PythonTracebackLexer``.
  703. ``PythonTracebackLexer`` now refers to the Python 3 variant.
  704. """
  705. name = 'Python 2.x Traceback'
  706. aliases = ['py2tb']
  707. filenames = ['*.py2tb']
  708. mimetypes = ['text/x-python2-traceback']
  709. tokens = {
  710. 'root': [
  711. # Cover both (most recent call last) and (innermost last)
  712. # The optional ^C allows us to catch keyboard interrupt signals.
  713. (r'^(\^C)?(Traceback.*\n)',
  714. bygroups(Text, Generic.Traceback), 'intb'),
  715. # SyntaxError starts with this.
  716. (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
  717. (r'^.*\n', Other),
  718. ],
  719. 'intb': [
  720. (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
  721. bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
  722. (r'^( File )("[^"]+")(, line )(\d+)(\n)',
  723. bygroups(Text, Name.Builtin, Text, Number, Text)),
  724. (r'^( )(.+)(\n)',
  725. bygroups(Text, using(Python2Lexer), Text)),
  726. (r'^([ \t]*)(\.\.\.)(\n)',
  727. bygroups(Text, Comment, Text)), # for doctests...
  728. (r'^([^:]+)(: )(.+)(\n)',
  729. bygroups(Generic.Error, Text, Name, Text), '#pop'),
  730. (r'^([a-zA-Z_]\w*)(:?\n)',
  731. bygroups(Generic.Error, Text), '#pop')
  732. ],
  733. }
  734. class CythonLexer(RegexLexer):
  735. """
  736. For Pyrex and `Cython <http://cython.org>`_ source code.
  737. .. versionadded:: 1.1
  738. """
  739. name = 'Cython'
  740. aliases = ['cython', 'pyx', 'pyrex']
  741. filenames = ['*.pyx', '*.pxd', '*.pxi']
  742. mimetypes = ['text/x-cython', 'application/x-cython']
  743. tokens = {
  744. 'root': [
  745. (r'\n', Text),
  746. (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
  747. (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
  748. (r'[^\S\n]+', Text),
  749. (r'#.*$', Comment),
  750. (r'[]{}:(),;[]', Punctuation),
  751. (r'\\\n', Text),
  752. (r'\\', Text),
  753. (r'(in|is|and|or|not)\b', Operator.Word),
  754. (r'(<)([a-zA-Z0-9.?]+)(>)',
  755. bygroups(Punctuation, Keyword.Type, Punctuation)),
  756. (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator),
  757. (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)',
  758. bygroups(Keyword, Number.Integer, Operator, Name, Operator,
  759. Name, Punctuation)),
  760. include('keywords'),
  761. (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'),
  762. (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'),
  763. # (should actually start a block with only cdefs)
  764. (r'(cdef)(:)', bygroups(Keyword, Punctuation)),
  765. (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'),
  766. (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'),
  767. (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'),
  768. include('builtins'),
  769. include('backtick'),
  770. ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
  771. ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
  772. ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
  773. ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
  774. ('[uU]?"""', String, combined('stringescape', 'tdqs')),
  775. ("[uU]?'''", String, combined('stringescape', 'tsqs')),
  776. ('[uU]?"', String, combined('stringescape', 'dqs')),
  777. ("[uU]?'", String, combined('stringescape', 'sqs')),
  778. include('name'),
  779. include('numbers'),
  780. ],
  781. 'keywords': [
  782. (words((
  783. 'assert', 'async', 'await', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif',
  784. 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil',
  785. 'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print',
  786. 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'),
  787. Keyword),
  788. (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc),
  789. ],
  790. 'builtins': [
  791. (words((
  792. '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bint',
  793. 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr',
  794. 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr',
  795. 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit',
  796. 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals',
  797. 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance',
  798. 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max',
  799. 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'Py_ssize_t',
  800. 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed',
  801. 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod',
  802. 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'unsigned',
  803. 'vars', 'xrange', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
  804. Name.Builtin),
  805. (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|NULL'
  806. r')\b', Name.Builtin.Pseudo),
  807. (words((
  808. 'ArithmeticError', 'AssertionError', 'AttributeError',
  809. 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
  810. 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
  811. 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
  812. 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
  813. 'MemoryError', 'NameError', 'NotImplemented', 'NotImplementedError',
  814. 'OSError', 'OverflowError', 'OverflowWarning',
  815. 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
  816. 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError',
  817. 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError',
  818. 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
  819. 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
  820. 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning',
  821. 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
  822. Name.Exception),
  823. ],
  824. 'numbers': [
  825. (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
  826. (r'0\d+', Number.Oct),
  827. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  828. (r'\d+L', Number.Integer.Long),
  829. (r'\d+', Number.Integer)
  830. ],
  831. 'backtick': [
  832. ('`.*?`', String.Backtick),
  833. ],
  834. 'name': [
  835. (r'@\w+', Name.Decorator),
  836. (r'[a-zA-Z_]\w*', Name),
  837. ],
  838. 'funcname': [
  839. (r'[a-zA-Z_]\w*', Name.Function, '#pop')
  840. ],
  841. 'cdef': [
  842. (r'(public|readonly|extern|api|inline)\b', Keyword.Reserved),
  843. (r'(struct|enum|union|class)\b', Keyword),
  844. (r'([a-zA-Z_]\w*)(\s*)(?=[(:#=]|$)',
  845. bygroups(Name.Function, Text), '#pop'),
  846. (r'([a-zA-Z_]\w*)(\s*)(,)',
  847. bygroups(Name.Function, Text, Punctuation)),
  848. (r'from\b', Keyword, '#pop'),
  849. (r'as\b', Keyword),
  850. (r':', Punctuation, '#pop'),
  851. (r'(?=["\'])', Text, '#pop'),
  852. (r'[a-zA-Z_]\w*', Keyword.Type),
  853. (r'.', Text),
  854. ],
  855. 'classname': [
  856. (r'[a-zA-Z_]\w*', Name.Class, '#pop')
  857. ],
  858. 'import': [
  859. (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
  860. (r'[a-zA-Z_][\w.]*', Name.Namespace),
  861. (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
  862. default('#pop') # all else: go back
  863. ],
  864. 'fromimport': [
  865. (r'(\s+)(c?import)\b', bygroups(Text, Keyword), '#pop'),
  866. (r'[a-zA-Z_.][\w.]*', Name.Namespace),
  867. # ``cdef foo from "header"``, or ``for foo from 0 < i < 10``
  868. default('#pop'),
  869. ],
  870. 'stringescape': [
  871. (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
  872. r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  873. ],
  874. 'strings': [
  875. (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
  876. '[hlL]?[E-GXc-giorsux%]', String.Interpol),
  877. (r'[^\\\'"%\n]+', String),
  878. # quotes, percents and backslashes must be parsed one at a time
  879. (r'[\'"\\]', String),
  880. # unhandled string formatting sign
  881. (r'%', String)
  882. # newlines are an error (use "nl" state)
  883. ],
  884. 'nl': [
  885. (r'\n', String)
  886. ],
  887. 'dqs': [
  888. (r'"', String, '#pop'),
  889. (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
  890. include('strings')
  891. ],
  892. 'sqs': [
  893. (r"'", String, '#pop'),
  894. (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
  895. include('strings')
  896. ],
  897. 'tdqs': [
  898. (r'"""', String, '#pop'),
  899. include('strings'),
  900. include('nl')
  901. ],
  902. 'tsqs': [
  903. (r"'''", String, '#pop'),
  904. include('strings'),
  905. include('nl')
  906. ],
  907. }
  908. class DgLexer(RegexLexer):
  909. """
  910. Lexer for `dg <http://pyos.github.com/dg>`_,
  911. a functional and object-oriented programming language
  912. running on the CPython 3 VM.
  913. .. versionadded:: 1.6
  914. """
  915. name = 'dg'
  916. aliases = ['dg']
  917. filenames = ['*.dg']
  918. mimetypes = ['text/x-dg']
  919. tokens = {
  920. 'root': [
  921. (r'\s+', Text),
  922. (r'#.*?$', Comment.Single),
  923. (r'(?i)0b[01]+', Number.Bin),
  924. (r'(?i)0o[0-7]+', Number.Oct),
  925. (r'(?i)0x[0-9a-f]+', Number.Hex),
  926. (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float),
  927. (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float),
  928. (r'(?i)[+-]?[0-9]+j?', Number.Integer),
  929. (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')),
  930. (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')),
  931. (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')),
  932. (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')),
  933. (r"`\w+'*`", Operator),
  934. (r'\b(and|in|is|or|where)\b', Operator.Word),
  935. (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator),
  936. (words((
  937. 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'',
  938. 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object',
  939. 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str',
  940. 'super', 'tuple', 'tuple\'', 'type'),
  941. prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
  942. Name.Builtin),
  943. (words((
  944. '__import__', 'abs', 'all', 'any', 'bin', 'bind', 'chr', 'cmp', 'compile',
  945. 'complex', 'delattr', 'dir', 'divmod', 'drop', 'dropwhile', 'enumerate',
  946. 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst',
  947. 'getattr', 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init',
  948. 'input', 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len',
  949. 'locals', 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow',
  950. 'print', 'repr', 'reversed', 'round', 'setattr', 'scanl1?', 'snd',
  951. 'sorted', 'sum', 'tail', 'take', 'takewhile', 'vars', 'zip'),
  952. prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
  953. Name.Builtin),
  954. (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])",
  955. Name.Builtin.Pseudo),
  956. (r"(?<!\.)[A-Z]\w*(Error|Exception|Warning)'*(?!['\w])",
  957. Name.Exception),
  958. (r"(?<!\.)(Exception|GeneratorExit|KeyboardInterrupt|StopIteration|"
  959. r"SystemExit)(?!['\w])", Name.Exception),
  960. (r"(?<![\w.])(except|finally|for|if|import|not|otherwise|raise|"
  961. r"subclass|while|with|yield)(?!['\w])", Keyword.Reserved),
  962. (r"[A-Z_]+'*(?!['\w])", Name),
  963. (r"[A-Z]\w+'*(?!['\w])", Keyword.Type),
  964. (r"\w+'*", Name),
  965. (r'[()]', Punctuation),
  966. (r'.', Error),
  967. ],
  968. 'stringescape': [
  969. (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
  970. r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  971. ],
  972. 'string': [
  973. (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
  974. '[hlL]?[E-GXc-giorsux%]', String.Interpol),
  975. (r'[^\\\'"%\n]+', String),
  976. # quotes, percents and backslashes must be parsed one at a time
  977. (r'[\'"\\]', String),
  978. # unhandled string formatting sign
  979. (r'%', String),
  980. (r'\n', String)
  981. ],
  982. 'dqs': [
  983. (r'"', String, '#pop')
  984. ],
  985. 'sqs': [
  986. (r"'", String, '#pop')
  987. ],
  988. 'tdqs': [
  989. (r'"""', String, '#pop')
  990. ],
  991. 'tsqs': [
  992. (r"'''", String, '#pop')
  993. ],
  994. }
  995. class NumPyLexer(PythonLexer):
  996. """
  997. A Python lexer recognizing Numerical Python builtins.
  998. .. versionadded:: 0.10
  999. """
  1000. name = 'NumPy'
  1001. aliases = ['numpy']
  1002. # override the mimetypes to not inherit them from python
  1003. mimetypes = []
  1004. filenames = []
  1005. EXTRA_KEYWORDS = {
  1006. 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose',
  1007. 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append',
  1008. 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh',
  1009. 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin',
  1010. 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal',
  1011. 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange',
  1012. 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray',
  1013. 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype',
  1014. 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett',
  1015. 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial',
  1016. 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman',
  1017. 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_',
  1018. 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type',
  1019. 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate',
  1020. 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov',
  1021. 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate',
  1022. 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide',
  1023. 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty',
  1024. 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye',
  1025. 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill',
  1026. 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud',
  1027. 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer',
  1028. 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring',
  1029. 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include',
  1030. 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize',
  1031. 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater',
  1032. 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram',
  1033. 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0',
  1034. 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info',
  1035. 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d',
  1036. 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj',
  1037. 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf',
  1038. 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_',
  1039. 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_',
  1040. 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort',
  1041. 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2',
  1042. 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace',
  1043. 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype',
  1044. 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min',
  1045. 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan',
  1046. 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum',
  1047. 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer',
  1048. 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones',
  1049. 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload',
  1050. 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv',
  1051. 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod',
  1052. 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers',
  1053. 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close',
  1054. 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require',
  1055. 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll',
  1056. 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_',
  1057. 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select',
  1058. 'set_numeric_ops', 'set_printoptions', 'set_string_function',
  1059. 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj',
  1060. 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape',
  1061. 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh',
  1062. 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source',
  1063. 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std',
  1064. 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot',
  1065. 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose',
  1066. 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict',
  1067. 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index',
  1068. 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises',
  1069. 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like'
  1070. }
  1071. def get_tokens_unprocessed(self, text):
  1072. for index, token, value in \
  1073. PythonLexer.get_tokens_unprocessed(self, text):
  1074. if token is Name and value in self.EXTRA_KEYWORDS:
  1075. yield index, Keyword.Pseudo, value
  1076. else:
  1077. yield index, token, value
  1078. def analyse_text(text):
  1079. ltext = text[:1000]
  1080. return (shebang_matches(text, r'pythonw?(3(\.\d)?)?') or
  1081. 'import ' in ltext) \
  1082. and ('import numpy' in ltext or 'from numpy import' in ltext)