unicon.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. """
  2. pygments.lexers.unicon
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for the Icon and Unicon languages, including ucode VM.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, bygroups, words, using, this
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation
  12. __all__ = ['IconLexer', 'UcodeLexer', 'UniconLexer']
  13. class UniconLexer(RegexLexer):
  14. """
  15. For Unicon source code.
  16. .. versionadded:: 2.4
  17. """
  18. name = 'Unicon'
  19. aliases = ['unicon']
  20. filenames = ['*.icn']
  21. mimetypes = ['text/unicon']
  22. flags = re.MULTILINE
  23. tokens = {
  24. 'root': [
  25. (r'[^\S\n]+', Text),
  26. (r'#.*?\n', Comment.Single),
  27. (r'[^\S\n]+', Text),
  28. (r'class|method|procedure', Keyword.Declaration, 'subprogram'),
  29. (r'(record)(\s+)(\w+)',
  30. bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'),
  31. (r'(#line|\$C|\$Cend|\$define|\$else|\$endif|\$error|\$ifdef|'
  32. r'\$ifndef|\$include|\$line|\$undef)\b', Keyword.PreProc),
  33. (r'(&null|&fail)\b', Keyword.Constant),
  34. (r'&allocated|&ascii|&clock|&collections|&column|&col|&control|'
  35. r'&cset|&current|&dateline|&date|&digits|&dump|'
  36. r'&errno|&errornumber|&errortext|&errorvalue|&error|&errout|'
  37. r'&eventcode|&eventvalue|&eventsource|&e|'
  38. r'&features|&file|&host|&input|&interval|&lcase|&letters|'
  39. r'&level|&line|&ldrag|&lpress|&lrelease|'
  40. r'&main|&mdrag|&meta|&mpress|&mrelease|&now|&output|'
  41. r'&phi|&pick|&pi|&pos|&progname|'
  42. r'&random|&rdrag|&regions|&resize|&row|&rpress|&rrelease|'
  43. r'&shift|&source|&storage|&subject|'
  44. r'&time|&trace|&ucase|&version|'
  45. r'&window|&x|&y', Keyword.Reserved),
  46. (r'(by|of|not|to)\b', Keyword.Reserved),
  47. (r'(global|local|static|abstract)\b', Keyword.Reserved),
  48. (r'package|link|import', Keyword.Declaration),
  49. (words((
  50. 'break', 'case', 'create', 'critical', 'default', 'end', 'all',
  51. 'do', 'else', 'every', 'fail', 'if', 'import', 'initial',
  52. 'initially', 'invocable', 'next',
  53. 'repeat', 'return', 'suspend',
  54. 'then', 'thread', 'until', 'while'), prefix=r'\b', suffix=r'\b'),
  55. Keyword.Reserved),
  56. (words((
  57. 'Abort', 'abs', 'acos', 'Active', 'Alert', 'any', 'Any', 'Arb',
  58. 'Arbno', 'args', 'array', 'asin', 'atan', 'atanh', 'Attrib',
  59. 'Bal', 'bal', 'Bg', 'Break', 'Breakx',
  60. 'callout', 'center', 'char', 'chdir', 'chmod', 'chown', 'chroot',
  61. 'classname', 'Clip', 'Clone', 'close', 'cofail', 'collect',
  62. 'Color', 'ColorValue', 'condvar', 'constructor', 'copy',
  63. 'CopyArea', 'cos', 'Couple', 'crypt', 'cset', 'ctime',
  64. 'dbcolumns', 'dbdriver', 'dbkeys', 'dblimits', 'dbproduct',
  65. 'dbtables', 'delay', 'delete', 'detab', 'display', 'DrawArc',
  66. 'DrawCircle', 'DrawCube', 'DrawCurve', 'DrawCylinder',
  67. 'DrawDisk', 'DrawImage', 'DrawLine', 'DrawPoint', 'DrawPolygon',
  68. 'DrawRectangle', 'DrawSegment', 'DrawSphere', 'DrawString',
  69. 'DrawTorus', 'dtor',
  70. 'entab', 'EraseArea', 'errorclear', 'Event', 'eventmask',
  71. 'EvGet', 'EvSend', 'exec', 'exit', 'exp', 'Eye',
  72. 'Fail', 'fcntl', 'fdup', 'Fence', 'fetch', 'Fg', 'fieldnames',
  73. 'filepair', 'FillArc', 'FillCircle', 'FillPolygon',
  74. 'FillRectangle', 'find', 'flock', 'flush', 'Font', 'fork',
  75. 'FreeColor', 'FreeSpace', 'function',
  76. 'get', 'getch', 'getche', 'getegid', 'getenv', 'geteuid',
  77. 'getgid', 'getgr', 'gethost', 'getpgrp', 'getpid', 'getppid',
  78. 'getpw', 'getrusage', 'getserv', 'GetSpace', 'gettimeofday',
  79. 'getuid', 'globalnames', 'GotoRC', 'GotoXY', 'gtime', 'hardlink',
  80. 'iand', 'icom', 'IdentityMatrix', 'image', 'InPort', 'insert',
  81. 'Int86', 'integer', 'ioctl', 'ior', 'ishift', 'istate', 'ixor',
  82. 'kbhit', 'key', 'keyword', 'kill',
  83. 'left', 'Len', 'list', 'load', 'loadfunc', 'localnames',
  84. 'lock', 'log', 'Lower', 'lstat',
  85. 'many', 'map', 'match', 'MatrixMode', 'max', 'member',
  86. 'membernames', 'methodnames', 'methods', 'min', 'mkdir', 'move',
  87. 'MultMatrix', 'mutex',
  88. 'name', 'NewColor', 'Normals', 'NotAny', 'numeric',
  89. 'open', 'opencl', 'oprec', 'ord', 'OutPort',
  90. 'PaletteChars', 'PaletteColor', 'PaletteKey', 'paramnames',
  91. 'parent', 'Pattern', 'Peek', 'Pending', 'pipe', 'Pixel',
  92. 'PlayAudio', 'Poke', 'pop', 'PopMatrix', 'Pos', 'pos',
  93. 'proc', 'pull', 'push', 'PushMatrix', 'PushRotate', 'PushScale',
  94. 'PushTranslate', 'put',
  95. 'QueryPointer',
  96. 'Raise', 'read', 'ReadImage', 'readlink', 'reads', 'ready',
  97. 'real', 'receive', 'Refresh', 'Rem', 'remove', 'rename',
  98. 'repl', 'reverse', 'right', 'rmdir', 'Rotate', 'Rpos',
  99. 'Rtab', 'rtod', 'runerr',
  100. 'save', 'Scale', 'seek', 'select', 'send', 'seq',
  101. 'serial', 'set', 'setenv', 'setgid', 'setgrent',
  102. 'sethostent', 'setpgrp', 'setpwent', 'setservent',
  103. 'setuid', 'signal', 'sin', 'sort', 'sortf', 'Span',
  104. 'spawn', 'sql', 'sqrt', 'stat', 'staticnames', 'stop',
  105. 'StopAudio', 'string', 'structure', 'Succeed', 'Swi',
  106. 'symlink', 'sys_errstr', 'system', 'syswrite',
  107. 'Tab', 'tab', 'table', 'tan',
  108. 'Texcoord', 'Texture', 'TextWidth', 'Translate',
  109. 'trap', 'trim', 'truncate', 'trylock', 'type',
  110. 'umask', 'Uncouple', 'unlock', 'upto', 'utime',
  111. 'variable', 'VAttrib',
  112. 'wait', 'WAttrib', 'WDefault', 'WFlush', 'where',
  113. 'WinAssociate', 'WinButton', 'WinColorDialog', 'WindowContents',
  114. 'WinEditRegion', 'WinFontDialog', 'WinMenuBar', 'WinOpenDialog',
  115. 'WinPlayMedia', 'WinSaveDialog', 'WinScrollBar', 'WinSelectDialog',
  116. 'write', 'WriteImage', 'writes', 'WSection',
  117. 'WSync'), prefix=r'\b', suffix=r'\b'),
  118. Name.Function),
  119. include('numbers'),
  120. (r'<@|<<@|>@|>>@|\.>|->|===|~===|\*\*|\+\+|--|\.|~==|~=|<=|>=|==|'
  121. r'=|<<=|<<|>>=|>>|:=:|:=|->|<->|\+:=|\|', Operator),
  122. (r'"(?:[^\\"]|\\.)*"', String),
  123. (r"'(?:[^\\']|\\.)*'", String.Character),
  124. (r'[*<>+=/&!?@~\\-]', Operator),
  125. (r'\^', Operator),
  126. (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))),
  127. (r"[\[\]]", Punctuation),
  128. (r"<>|=>|[()|:;,.'`{}%&?]", Punctuation),
  129. (r'\n+', Text),
  130. ],
  131. 'numbers': [
  132. (r'\b([+-]?([2-9]|[12][0-9]|3[0-6])[rR][0-9a-zA-Z]+)\b', Number.Hex),
  133. (r'[+-]?[0-9]*\.([0-9]*)([Ee][+-]?[0-9]*)?', Number.Float),
  134. (r'\b([+-]?[0-9]+[KMGTPkmgtp]?)\b', Number.Integer),
  135. ],
  136. 'subprogram': [
  137. (r'\(', Punctuation, ('#pop', 'formal_part')),
  138. (r';', Punctuation, '#pop'),
  139. (r'"[^"]+"|\w+', Name.Function),
  140. include('root'),
  141. ],
  142. 'type_def': [
  143. (r'\(', Punctuation, 'formal_part'),
  144. ],
  145. 'formal_part': [
  146. (r'\)', Punctuation, '#pop'),
  147. (r'\w+', Name.Variable),
  148. (r',', Punctuation),
  149. (r'(:string|:integer|:real)\b', Keyword.Reserved),
  150. include('root'),
  151. ],
  152. }
  153. class IconLexer(RegexLexer):
  154. """
  155. Lexer for Icon.
  156. .. versionadded:: 1.6
  157. """
  158. name = 'Icon'
  159. aliases = ['icon']
  160. filenames = ['*.icon', '*.ICON']
  161. mimetypes = []
  162. flags = re.MULTILINE
  163. tokens = {
  164. 'root': [
  165. (r'[^\S\n]+', Text),
  166. (r'#.*?\n', Comment.Single),
  167. (r'[^\S\n]+', Text),
  168. (r'class|method|procedure', Keyword.Declaration, 'subprogram'),
  169. (r'(record)(\s+)(\w+)',
  170. bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'),
  171. (r'(#line|\$C|\$Cend|\$define|\$else|\$endif|\$error|\$ifdef|'
  172. r'\$ifndef|\$include|\$line|\$undef)\b', Keyword.PreProc),
  173. (r'(&null|&fail)\b', Keyword.Constant),
  174. (r'&allocated|&ascii|&clock|&collections|&column|&col|&control|'
  175. r'&cset|&current|&dateline|&date|&digits|&dump|'
  176. r'&errno|&errornumber|&errortext|&errorvalue|&error|&errout|'
  177. r'&eventcode|&eventvalue|&eventsource|&e|'
  178. r'&features|&file|&host|&input|&interval|&lcase|&letters|'
  179. r'&level|&line|&ldrag|&lpress|&lrelease|'
  180. r'&main|&mdrag|&meta|&mpress|&mrelease|&now|&output|'
  181. r'&phi|&pick|&pi|&pos|&progname|'
  182. r'&random|&rdrag|&regions|&resize|&row|&rpress|&rrelease|'
  183. r'&shift|&source|&storage|&subject|'
  184. r'&time|&trace|&ucase|&version|'
  185. r'&window|&x|&y', Keyword.Reserved),
  186. (r'(by|of|not|to)\b', Keyword.Reserved),
  187. (r'(global|local|static)\b', Keyword.Reserved),
  188. (r'link', Keyword.Declaration),
  189. (words((
  190. 'break', 'case', 'create', 'default', 'end', 'all',
  191. 'do', 'else', 'every', 'fail', 'if', 'initial',
  192. 'invocable', 'next',
  193. 'repeat', 'return', 'suspend',
  194. 'then', 'until', 'while'), prefix=r'\b', suffix=r'\b'),
  195. Keyword.Reserved),
  196. (words((
  197. 'abs', 'acos', 'Active', 'Alert', 'any',
  198. 'args', 'array', 'asin', 'atan', 'atanh', 'Attrib',
  199. 'bal', 'Bg',
  200. 'callout', 'center', 'char', 'chdir', 'chmod', 'chown', 'chroot',
  201. 'Clip', 'Clone', 'close', 'cofail', 'collect',
  202. 'Color', 'ColorValue', 'condvar', 'copy',
  203. 'CopyArea', 'cos', 'Couple', 'crypt', 'cset', 'ctime',
  204. 'delay', 'delete', 'detab', 'display', 'DrawArc',
  205. 'DrawCircle', 'DrawCube', 'DrawCurve', 'DrawCylinder',
  206. 'DrawDisk', 'DrawImage', 'DrawLine', 'DrawPoint', 'DrawPolygon',
  207. 'DrawRectangle', 'DrawSegment', 'DrawSphere', 'DrawString',
  208. 'DrawTorus', 'dtor',
  209. 'entab', 'EraseArea', 'errorclear', 'Event', 'eventmask',
  210. 'EvGet', 'EvSend', 'exec', 'exit', 'exp', 'Eye',
  211. 'fcntl', 'fdup', 'fetch', 'Fg', 'fieldnames',
  212. 'FillArc', 'FillCircle', 'FillPolygon',
  213. 'FillRectangle', 'find', 'flock', 'flush', 'Font',
  214. 'FreeColor', 'FreeSpace', 'function',
  215. 'get', 'getch', 'getche', 'getenv',
  216. 'GetSpace', 'gettimeofday',
  217. 'getuid', 'globalnames', 'GotoRC', 'GotoXY', 'gtime', 'hardlink',
  218. 'iand', 'icom', 'IdentityMatrix', 'image', 'InPort', 'insert',
  219. 'Int86', 'integer', 'ioctl', 'ior', 'ishift', 'istate', 'ixor',
  220. 'kbhit', 'key', 'keyword', 'kill',
  221. 'left', 'Len', 'list', 'load', 'loadfunc', 'localnames',
  222. 'lock', 'log', 'Lower', 'lstat',
  223. 'many', 'map', 'match', 'MatrixMode', 'max', 'member',
  224. 'membernames', 'methodnames', 'methods', 'min', 'mkdir', 'move',
  225. 'MultMatrix', 'mutex',
  226. 'name', 'NewColor', 'Normals', 'numeric',
  227. 'open', 'opencl', 'oprec', 'ord', 'OutPort',
  228. 'PaletteChars', 'PaletteColor', 'PaletteKey', 'paramnames',
  229. 'parent', 'Pattern', 'Peek', 'Pending', 'pipe', 'Pixel',
  230. 'Poke', 'pop', 'PopMatrix', 'Pos', 'pos',
  231. 'proc', 'pull', 'push', 'PushMatrix', 'PushRotate', 'PushScale',
  232. 'PushTranslate', 'put',
  233. 'QueryPointer',
  234. 'Raise', 'read', 'ReadImage', 'readlink', 'reads', 'ready',
  235. 'real', 'receive', 'Refresh', 'Rem', 'remove', 'rename',
  236. 'repl', 'reverse', 'right', 'rmdir', 'Rotate', 'Rpos',
  237. 'rtod', 'runerr',
  238. 'save', 'Scale', 'seek', 'select', 'send', 'seq',
  239. 'serial', 'set', 'setenv',
  240. 'setuid', 'signal', 'sin', 'sort', 'sortf',
  241. 'spawn', 'sql', 'sqrt', 'stat', 'staticnames', 'stop',
  242. 'string', 'structure', 'Swi',
  243. 'symlink', 'sys_errstr', 'system', 'syswrite',
  244. 'tab', 'table', 'tan',
  245. 'Texcoord', 'Texture', 'TextWidth', 'Translate',
  246. 'trap', 'trim', 'truncate', 'trylock', 'type',
  247. 'umask', 'Uncouple', 'unlock', 'upto', 'utime',
  248. 'variable',
  249. 'wait', 'WAttrib', 'WDefault', 'WFlush', 'where',
  250. 'WinAssociate', 'WinButton', 'WinColorDialog', 'WindowContents',
  251. 'WinEditRegion', 'WinFontDialog', 'WinMenuBar', 'WinOpenDialog',
  252. 'WinPlayMedia', 'WinSaveDialog', 'WinScrollBar', 'WinSelectDialog',
  253. 'write', 'WriteImage', 'writes', 'WSection',
  254. 'WSync'), prefix=r'\b', suffix=r'\b'),
  255. Name.Function),
  256. include('numbers'),
  257. (r'===|~===|\*\*|\+\+|--|\.|==|~==|<=|>=|=|~=|<<=|<<|>>=|>>|'
  258. r':=:|:=|<->|<-|\+:=|\|\||\|', Operator),
  259. (r'"(?:[^\\"]|\\.)*"', String),
  260. (r"'(?:[^\\']|\\.)*'", String.Character),
  261. (r'[*<>+=/&!?@~\\-]', Operator),
  262. (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))),
  263. (r"[\[\]]", Punctuation),
  264. (r"<>|=>|[()|:;,.'`{}%\^&?]", Punctuation),
  265. (r'\n+', Text),
  266. ],
  267. 'numbers': [
  268. (r'\b([+-]?([2-9]|[12][0-9]|3[0-6])[rR][0-9a-zA-Z]+)\b', Number.Hex),
  269. (r'[+-]?[0-9]*\.([0-9]*)([Ee][+-]?[0-9]*)?', Number.Float),
  270. (r'\b([+-]?[0-9]+[KMGTPkmgtp]?)\b', Number.Integer),
  271. ],
  272. 'subprogram': [
  273. (r'\(', Punctuation, ('#pop', 'formal_part')),
  274. (r';', Punctuation, '#pop'),
  275. (r'"[^"]+"|\w+', Name.Function),
  276. include('root'),
  277. ],
  278. 'type_def': [
  279. (r'\(', Punctuation, 'formal_part'),
  280. ],
  281. 'formal_part': [
  282. (r'\)', Punctuation, '#pop'),
  283. (r'\w+', Name.Variable),
  284. (r',', Punctuation),
  285. (r'(:string|:integer|:real)\b', Keyword.Reserved),
  286. include('root'),
  287. ],
  288. }
  289. class UcodeLexer(RegexLexer):
  290. """
  291. Lexer for Icon ucode files.
  292. .. versionadded:: 2.4
  293. """
  294. name = 'ucode'
  295. aliases = ['ucode']
  296. filenames = ['*.u', '*.u1', '*.u2']
  297. mimetypes = []
  298. flags = re.MULTILINE
  299. tokens = {
  300. 'root': [
  301. (r'(#.*\n)', Comment),
  302. (words((
  303. 'con', 'declend', 'end',
  304. 'global',
  305. 'impl', 'invocable',
  306. 'lab', 'link', 'local',
  307. 'record',
  308. 'uid', 'unions',
  309. 'version'),
  310. prefix=r'\b', suffix=r'\b'),
  311. Name.Function),
  312. (words((
  313. 'colm', 'filen', 'line', 'synt'),
  314. prefix=r'\b', suffix=r'\b'),
  315. Comment),
  316. (words((
  317. 'asgn',
  318. 'bang', 'bscan',
  319. 'cat', 'ccase', 'chfail',
  320. 'coact', 'cofail', 'compl',
  321. 'coret', 'create', 'cset',
  322. 'diff', 'div', 'dup',
  323. 'efail', 'einit', 'end', 'eqv', 'eret',
  324. 'error', 'escan', 'esusp',
  325. 'field',
  326. 'goto',
  327. 'init', 'int', 'inter',
  328. 'invoke',
  329. 'keywd',
  330. 'lconcat', 'lexeq', 'lexge',
  331. 'lexgt', 'lexle', 'lexlt', 'lexne',
  332. 'limit', 'llist', 'lsusp',
  333. 'mark', 'mark0', 'minus', 'mod', 'mult',
  334. 'neg', 'neqv', 'nonnull', 'noop', 'null',
  335. 'number', 'numeq', 'numge', 'numgt',
  336. 'numle', 'numlt', 'numne',
  337. 'pfail', 'plus', 'pnull', 'pop', 'power',
  338. 'pret', 'proc', 'psusp', 'push1', 'pushn1',
  339. 'random', 'rasgn', 'rcv', 'rcvbk', 'real',
  340. 'refresh', 'rswap',
  341. 'sdup', 'sect', 'size', 'snd', 'sndbk',
  342. 'str', 'subsc', 'swap',
  343. 'tabmat', 'tally', 'toby', 'trace',
  344. 'unmark',
  345. 'value', 'var'), prefix=r'\b', suffix=r'\b'),
  346. Keyword.Declaration),
  347. (words((
  348. 'any',
  349. 'case',
  350. 'endcase', 'endevery', 'endif',
  351. 'endifelse', 'endrepeat', 'endsuspend',
  352. 'enduntil', 'endwhile', 'every',
  353. 'if', 'ifelse',
  354. 'repeat',
  355. 'suspend',
  356. 'until',
  357. 'while'),
  358. prefix=r'\b', suffix=r'\b'),
  359. Name.Constant),
  360. (r'\d+(\s*|\.$|$)', Number.Integer),
  361. (r'[+-]?\d*\.\d+(E[-+]?\d+)?', Number.Float),
  362. (r'[+-]?\d+\.\d*(E[-+]?\d+)?', Number.Float),
  363. (r"(<>|=>|[()|:;,.'`]|[{}]|[%^]|[&?])", Punctuation),
  364. (r'\s+\b', Text),
  365. (r'[\w-]+', Text),
  366. ],
  367. }
  368. def analyse_text(text):
  369. """endsuspend and endrepeat are unique to this language, and
  370. \\self, /self doesn't seem to get used anywhere else either."""
  371. result = 0
  372. if 'endsuspend' in text:
  373. result += 0.1
  374. if 'endrepeat' in text:
  375. result += 0.1
  376. if ':=' in text:
  377. result += 0.01
  378. if 'procedure' in text and 'end' in text:
  379. result += 0.01
  380. # This seems quite unique to unicon -- doesn't appear in any other
  381. # example source we have (A quick search reveals that \SELF appears in
  382. # Perl/Raku code)
  383. if r'\self' in text and r'/self' in text:
  384. result += 0.5
  385. return result