modula2.py 52 KB


  1. """
  2. pygments.lexers.modula2
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Multi-Dialect Lexer for Modula-2.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include
  10. from pygments.util import get_bool_opt, get_list_opt
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, \
  12. String, Number, Punctuation, Error
  13. __all__ = ['Modula2Lexer']
  14. # Multi-Dialect Modula-2 Lexer
  15. class Modula2Lexer(RegexLexer):
  16. """
  17. For `Modula-2 <http://www.modula2.org/>`_ source code.
  18. The Modula-2 lexer supports several dialects. By default, it operates in
  19. fallback mode, recognising the *combined* literals, punctuation symbols
  20. and operators of all supported dialects, and the *combined* reserved words
  21. and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not
  22. differentiating between library defined identifiers.
  23. To select a specific dialect, a dialect option may be passed
  24. or a dialect tag may be embedded into a source file.
  25. Dialect Options:
  26. `m2pim`
  27. Select PIM Modula-2 dialect.
  28. `m2iso`
  29. Select ISO Modula-2 dialect.
  30. `m2r10`
  31. Select Modula-2 R10 dialect.
  32. `objm2`
  33. Select Objective Modula-2 dialect.
  34. The PIM and ISO dialect options may be qualified with a language extension.
  35. Language Extensions:
  36. `+aglet`
  37. Select Aglet Modula-2 extensions, available with m2iso.
  38. `+gm2`
  39. Select GNU Modula-2 extensions, available with m2pim.
  40. `+p1`
  41. Select p1 Modula-2 extensions, available with m2iso.
  42. `+xds`
  43. Select XDS Modula-2 extensions, available with m2iso.
  44. Passing a Dialect Option via Unix Commandline Interface
  45. Dialect options may be passed to the lexer using the `dialect` key.
  46. Only one such option should be passed. If multiple dialect options are
  47. passed, the first valid option is used, any subsequent options are ignored.
  48. Examples:
  49. `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input`
  50. Use ISO dialect to render input to HTML output
  51. `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input`
  52. Use ISO dialect with p1 extensions to render input to RTF output
  53. Embedding a Dialect Option within a source file
  54. A dialect option may be embedded in a source file in form of a dialect
  55. tag, a specially formatted comment that specifies a dialect option.
  56. Dialect Tag EBNF::
  57. dialectTag :
  58. OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ;
  59. dialectOption :
  60. 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' |
  61. 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ;
  62. Prefix : '!' ;
  63. OpeningCommentDelim : '(*' ;
  64. ClosingCommentDelim : '*)' ;
  65. No whitespace is permitted between the tokens of a dialect tag.
  66. In the event that a source file contains multiple dialect tags, the first
  67. tag that contains a valid dialect option will be used and any subsequent
  68. dialect tags will be ignored. Ideally, a dialect tag should be placed
  69. at the beginning of a source file.
  70. An embedded dialect tag overrides a dialect option set via command line.
  71. Examples:
  72. ``(*!m2r10*) DEFINITION MODULE Foobar; ...``
  73. Use Modula2 R10 dialect to render this source file.
  74. ``(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...``
  75. Use PIM dialect with GNU extensions to render this source file.
  76. Algol Publication Mode:
  77. In Algol publication mode, source text is rendered for publication of
  78. algorithms in scientific papers and academic texts, following the format
  79. of the Revised Algol-60 Language Report. It is activated by passing
  80. one of two corresponding styles as an option:
  81. `algol`
  82. render reserved words lowercase underline boldface
  83. and builtins lowercase boldface italic
  84. `algol_nu`
  85. render reserved words lowercase boldface (no underlining)
  86. and builtins lowercase boldface italic
  87. The lexer automatically performs the required lowercase conversion when
  88. this mode is activated.
  89. Example:
  90. ``$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input``
  91. Render input file in Algol publication mode to LaTeX output.
  92. Rendering Mode of First Class ADT Identifiers:
  93. The rendering of standard library first class ADT identifiers is controlled
  94. by option flag "treat_stdlib_adts_as_builtins".
  95. When this option is turned on, standard library ADT identifiers are rendered
  96. as builtins. When it is turned off, they are rendered as ordinary library
  97. identifiers.
  98. `treat_stdlib_adts_as_builtins` (default: On)
  99. The option is useful for dialects that support ADTs as first class objects
  100. and provide ADTs in the standard library that would otherwise be built-in.
  101. At present, only Modula-2 R10 supports library ADTs as first class objects
  102. and therefore, no ADT identifiers are defined for any other dialects.
  103. Example:
  104. ``$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...``
  105. Render standard library ADTs as ordinary library types.
  106. .. versionadded:: 1.3
  107. .. versionchanged:: 2.1
  108. Added multi-dialect support.
  109. """
  110. name = 'Modula-2'
  111. aliases = ['modula2', 'm2']
  112. filenames = ['*.def', '*.mod']
  113. mimetypes = ['text/x-modula2']
  114. flags = re.MULTILINE | re.DOTALL
  115. tokens = {
  116. 'whitespace': [
  117. (r'\n+', Text), # blank lines
  118. (r'\s+', Text), # whitespace
  119. ],
  120. 'dialecttags': [
  121. # PIM Dialect Tag
  122. (r'\(\*!m2pim\*\)', Comment.Special),
  123. # ISO Dialect Tag
  124. (r'\(\*!m2iso\*\)', Comment.Special),
  125. # M2R10 Dialect Tag
  126. (r'\(\*!m2r10\*\)', Comment.Special),
  127. # ObjM2 Dialect Tag
  128. (r'\(\*!objm2\*\)', Comment.Special),
  129. # Aglet Extensions Dialect Tag
  130. (r'\(\*!m2iso\+aglet\*\)', Comment.Special),
  131. # GNU Extensions Dialect Tag
  132. (r'\(\*!m2pim\+gm2\*\)', Comment.Special),
  133. # p1 Extensions Dialect Tag
  134. (r'\(\*!m2iso\+p1\*\)', Comment.Special),
  135. # XDS Extensions Dialect Tag
  136. (r'\(\*!m2iso\+xds\*\)', Comment.Special),
  137. ],
  138. 'identifiers': [
  139. (r'([a-zA-Z_$][\w$]*)', Name),
  140. ],
  141. 'prefixed_number_literals': [
  142. #
  143. # Base-2, whole number
  144. (r'0b[01]+(\'[01]+)*', Number.Bin),
  145. #
  146. # Base-16, whole number
  147. (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex),
  148. ],
  149. 'plain_number_literals': [
  150. #
  151. # Base-10, real number with exponent
  152. (r'[0-9]+(\'[0-9]+)*' # integral part
  153. r'\.[0-9]+(\'[0-9]+)*' # fractional part
  154. r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent
  155. Number.Float),
  156. #
  157. # Base-10, real number without exponent
  158. (r'[0-9]+(\'[0-9]+)*' # integral part
  159. r'\.[0-9]+(\'[0-9]+)*', # fractional part
  160. Number.Float),
  161. #
  162. # Base-10, whole number
  163. (r'[0-9]+(\'[0-9]+)*', Number.Integer),
  164. ],
  165. 'suffixed_number_literals': [
  166. #
  167. # Base-8, whole number
  168. (r'[0-7]+B', Number.Oct),
  169. #
  170. # Base-8, character code
  171. (r'[0-7]+C', Number.Oct),
  172. #
  173. # Base-16, number
  174. (r'[0-9A-F]+H', Number.Hex),
  175. ],
  176. 'string_literals': [
  177. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  178. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  179. ],
  180. 'digraph_operators': [
  181. # Dot Product Operator
  182. (r'\*\.', Operator),
  183. # Array Concatenation Operator
  184. (r'\+>', Operator), # M2R10 + ObjM2
  185. # Inequality Operator
  186. (r'<>', Operator), # ISO + PIM
  187. # Less-Or-Equal, Subset
  188. (r'<=', Operator),
  189. # Greater-Or-Equal, Superset
  190. (r'>=', Operator),
  191. # Identity Operator
  192. (r'==', Operator), # M2R10 + ObjM2
  193. # Type Conversion Operator
  194. (r'::', Operator), # M2R10 + ObjM2
  195. # Assignment Symbol
  196. (r':=', Operator),
  197. # Postfix Increment Mutator
  198. (r'\+\+', Operator), # M2R10 + ObjM2
  199. # Postfix Decrement Mutator
  200. (r'--', Operator), # M2R10 + ObjM2
  201. ],
  202. 'unigraph_operators': [
  203. # Arithmetic Operators
  204. (r'[+-]', Operator),
  205. (r'[*/]', Operator),
  206. # ISO 80000-2 compliant Set Difference Operator
  207. (r'\\', Operator), # M2R10 + ObjM2
  208. # Relational Operators
  209. (r'[=#<>]', Operator),
  210. # Dereferencing Operator
  211. (r'\^', Operator),
  212. # Dereferencing Operator Synonym
  213. (r'@', Operator), # ISO
  214. # Logical AND Operator Synonym
  215. (r'&', Operator), # PIM + ISO
  216. # Logical NOT Operator Synonym
  217. (r'~', Operator), # PIM + ISO
  218. # Smalltalk Message Prefix
  219. (r'`', Operator), # ObjM2
  220. ],
  221. 'digraph_punctuation': [
  222. # Range Constructor
  223. (r'\.\.', Punctuation),
  224. # Opening Chevron Bracket
  225. (r'<<', Punctuation), # M2R10 + ISO
  226. # Closing Chevron Bracket
  227. (r'>>', Punctuation), # M2R10 + ISO
  228. # Blueprint Punctuation
  229. (r'->', Punctuation), # M2R10 + ISO
  230. # Distinguish |# and # in M2 R10
  231. (r'\|#', Punctuation),
  232. # Distinguish ## and # in M2 R10
  233. (r'##', Punctuation),
  234. # Distinguish |* and * in M2 R10
  235. (r'\|\*', Punctuation),
  236. ],
  237. 'unigraph_punctuation': [
  238. # Common Punctuation
  239. (r'[()\[\]{},.:;|]', Punctuation),
  240. # Case Label Separator Synonym
  241. (r'!', Punctuation), # ISO
  242. # Blueprint Punctuation
  243. (r'\?', Punctuation), # M2R10 + ObjM2
  244. ],
  245. 'comments': [
  246. # Single Line Comment
  247. (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2
  248. # Block Comment
  249. (r'\(\*([^$].*?)\*\)', Comment.Multiline),
  250. # Template Block Comment
  251. (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2
  252. ],
  253. 'pragmas': [
  254. # ISO Style Pragmas
  255. (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2
  256. # Pascal Style Pragmas
  257. (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM
  258. ],
  259. 'root': [
  260. include('whitespace'),
  261. include('dialecttags'),
  262. include('pragmas'),
  263. include('comments'),
  264. include('identifiers'),
  265. include('suffixed_number_literals'), # PIM + ISO
  266. include('prefixed_number_literals'), # M2R10 + ObjM2
  267. include('plain_number_literals'),
  268. include('string_literals'),
  269. include('digraph_punctuation'),
  270. include('digraph_operators'),
  271. include('unigraph_punctuation'),
  272. include('unigraph_operators'),
  273. ]
  274. }
  275. # C o m m o n D a t a s e t s
  276. # Common Reserved Words Dataset
  277. common_reserved_words = (
  278. # 37 common reserved words
  279. 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
  280. 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF',
  281. 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT',
  282. 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN',
  283. 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
  284. )
  285. # Common Builtins Dataset
  286. common_builtins = (
  287. # 16 common builtins
  288. 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER',
  289. 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL',
  290. 'TRUE',
  291. )
  292. # Common Pseudo-Module Builtins Dataset
  293. common_pseudo_builtins = (
  294. # 4 common pseudo builtins
  295. 'ADDRESS', 'BYTE', 'WORD', 'ADR'
  296. )
  297. # P I M M o d u l a - 2 D a t a s e t s
  298. # Lexemes to Mark as Error Tokens for PIM Modula-2
  299. pim_lexemes_to_reject = (
  300. '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.',
  301. '+>', '->', '<<', '>>', '|#', '##',
  302. )
  303. # PIM Modula-2 Additional Reserved Words Dataset
  304. pim_additional_reserved_words = (
  305. # 3 additional reserved words
  306. 'EXPORT', 'QUALIFIED', 'WITH',
  307. )
  308. # PIM Modula-2 Additional Builtins Dataset
  309. pim_additional_builtins = (
  310. # 16 additional builtins
  311. 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH',
  312. 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL',
  313. )
  314. # PIM Modula-2 Additional Pseudo-Module Builtins Dataset
  315. pim_additional_pseudo_builtins = (
  316. # 5 additional pseudo builtins
  317. 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER',
  318. )
  319. # I S O M o d u l a - 2 D a t a s e t s
  320. # Lexemes to Mark as Error Tokens for ISO Modula-2
  321. iso_lexemes_to_reject = (
  322. '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->',
  323. '<<', '>>', '|#', '##',
  324. )
  325. # ISO Modula-2 Additional Reserved Words Dataset
  326. iso_additional_reserved_words = (
  327. # 9 additional reserved words (ISO 10514-1)
  328. 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED',
  329. 'REM', 'RETRY', 'WITH',
  330. # 10 additional reserved words (ISO 10514-2 & ISO 10514-3)
  331. 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY',
  332. 'REVEAL', 'TRACED', 'UNSAFEGUARDED',
  333. )
  334. # ISO Modula-2 Additional Builtins Dataset
  335. iso_additional_builtins = (
  336. # 26 additional builtins (ISO 10514-1)
  337. 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT',
  338. 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH',
  339. 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE',
  340. 'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
  341. # 5 additional builtins (ISO 10514-2 & ISO 10514-3)
  342. 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF',
  343. )
  344. # ISO Modula-2 Additional Pseudo-Module Builtins Dataset
  345. iso_additional_pseudo_builtins = (
  346. # 14 additional builtins (SYSTEM)
  347. 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC',
  348. 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR',
  349. 'ROTATE', 'SHIFT', 'CAST', 'TSIZE',
  350. # 13 additional builtins (COROUTINES)
  351. 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER',
  352. 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN',
  353. 'NEWCOROUTINE', 'PROT', 'TRANSFER',
  354. # 9 additional builtins (EXCEPTIONS)
  355. 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber',
  356. 'ExceptionSource', 'GetMessage', 'IsCurrentSource',
  357. 'IsExceptionalExecution', 'RAISE',
  358. # 3 additional builtins (TERMINATION)
  359. 'TERMINATION', 'IsTerminating', 'HasHalted',
  360. # 4 additional builtins (M2EXCEPTION)
  361. 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception',
  362. 'indexException', 'rangeException', 'caseSelectException',
  363. 'invalidLocation', 'functionException', 'wholeValueException',
  364. 'wholeDivException', 'realValueException', 'realDivException',
  365. 'complexValueException', 'complexDivException', 'protException',
  366. 'sysException', 'coException', 'exException',
  367. )
  368. # M o d u l a - 2 R 1 0 D a t a s e t s
  369. # Lexemes to Mark as Error Tokens for Modula-2 R10
  370. m2r10_lexemes_to_reject = (
  371. '!', '`', '@', '$', '%', '&', '<>',
  372. )
  373. # Modula-2 R10 reserved words in addition to the common set
  374. m2r10_additional_reserved_words = (
  375. # 12 additional reserved words
  376. 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE',
  377. 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN',
  378. # 2 additional reserved words with symbolic assembly option
  379. 'ASM', 'REG',
  380. )
  381. # Modula-2 R10 builtins in addition to the common set
  382. m2r10_additional_builtins = (
  383. # 26 additional builtins
  384. 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD',
  385. 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT',
  386. 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE',
  387. 'UNICHAR', 'WRITE', 'WRITEF',
  388. )
  389. # Modula-2 R10 Additional Pseudo-Module Builtins Dataset
  390. m2r10_additional_pseudo_builtins = (
  391. # 13 additional builtins (TPROPERTIES)
  392. 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL',
  393. 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION',
  394. 'TMAXEXP', 'TMINEXP',
  395. # 4 additional builtins (CONVERSION)
  396. 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL',
  397. # 35 additional builtins (UNSAFE)
  398. 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC',
  399. 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC',
  400. 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR',
  401. 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT',
  402. 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC',
  403. # 11 additional builtins (ATOMIC)
  404. 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND',
  405. 'BWNAND', 'BWOR', 'BWXOR',
  406. # 7 additional builtins (COMPILER)
  407. 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT',
  408. 'HASH',
  409. # 5 additional builtins (ASSEMBLER)
  410. 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE',
  411. )
  412. # O b j e c t i v e M o d u l a - 2 D a t a s e t s
  413. # Lexemes to Mark as Error Tokens for Objective Modula-2
  414. objm2_lexemes_to_reject = (
  415. '!', '$', '%', '&', '<>',
  416. )
  417. # Objective Modula-2 Extensions
  418. # reserved words in addition to Modula-2 R10
  419. objm2_additional_reserved_words = (
  420. # 16 additional reserved words
  421. 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
  422. 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
  423. 'SUPER', 'TRY',
  424. )
  425. # Objective Modula-2 Extensions
  426. # builtins in addition to Modula-2 R10
  427. objm2_additional_builtins = (
  428. # 3 additional builtins
  429. 'OBJECT', 'NO', 'YES',
  430. )
  431. # Objective Modula-2 Extensions
  432. # pseudo-module builtins in addition to Modula-2 R10
  433. objm2_additional_pseudo_builtins = (
  434. # None
  435. )
  436. # A g l e t M o d u l a - 2 D a t a s e t s
  437. # Aglet Extensions
  438. # reserved words in addition to ISO Modula-2
  439. aglet_additional_reserved_words = (
  440. # None
  441. )
  442. # Aglet Extensions
  443. # builtins in addition to ISO Modula-2
  444. aglet_additional_builtins = (
  445. # 9 additional builtins
  446. 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
  447. 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32',
  448. )
  449. # Aglet Modula-2 Extensions
  450. # pseudo-module builtins in addition to ISO Modula-2
  451. aglet_additional_pseudo_builtins = (
  452. # None
  453. )
  454. # G N U M o d u l a - 2 D a t a s e t s
  455. # GNU Extensions
  456. # reserved words in addition to PIM Modula-2
  457. gm2_additional_reserved_words = (
  458. # 10 additional reserved words
  459. 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
  460. '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
  461. )
  462. # GNU Extensions
  463. # builtins in addition to PIM Modula-2
  464. gm2_additional_builtins = (
  465. # 21 additional builtins
  466. 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
  467. 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
  468. 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
  469. 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
  470. )
  471. # GNU Extensions
  472. # pseudo-module builtins in addition to PIM Modula-2
  473. gm2_additional_pseudo_builtins = (
  474. # None
  475. )
  476. # p 1 M o d u l a - 2 D a t a s e t s
  477. # p1 Extensions
  478. # reserved words in addition to ISO Modula-2
  479. p1_additional_reserved_words = (
  480. # None
  481. )
  482. # p1 Extensions
  483. # builtins in addition to ISO Modula-2
  484. p1_additional_builtins = (
  485. # None
  486. )
  487. # p1 Modula-2 Extensions
  488. # pseudo-module builtins in addition to ISO Modula-2
  489. p1_additional_pseudo_builtins = (
  490. # 1 additional builtin
  491. 'BCD',
  492. )
  493. # X D S M o d u l a - 2 D a t a s e t s
  494. # XDS Extensions
  495. # reserved words in addition to ISO Modula-2
  496. xds_additional_reserved_words = (
  497. # 1 additional reserved word
  498. 'SEQ',
  499. )
  500. # XDS Extensions
  501. # builtins in addition to ISO Modula-2
  502. xds_additional_builtins = (
  503. # 9 additional builtins
  504. 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN',
  505. 'LONGCARD', 'SHORTCARD', 'SHORTINT',
  506. )
  507. # XDS Modula-2 Extensions
  508. # pseudo-module builtins in addition to ISO Modula-2
  509. xds_additional_pseudo_builtins = (
  510. # 22 additional builtins (SYSTEM)
  511. 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8',
  512. 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE',
  513. 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void'
  514. # 3 additional builtins (COMPILER)
  515. 'COMPILER', 'OPTION', 'EQUATION'
  516. )
  517. # P I M S t a n d a r d L i b r a r y D a t a s e t s
  518. # PIM Modula-2 Standard Library Modules Dataset
  519. pim_stdlib_module_identifiers = (
  520. 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage',
  521. )
  522. # PIM Modula-2 Standard Library Types Dataset
  523. pim_stdlib_type_identifiers = (
  524. 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission',
  525. 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand',
  526. 'DirectoryCommand',
  527. )
  528. # PIM Modula-2 Standard Library Procedures Dataset
  529. pim_stdlib_proc_identifiers = (
  530. 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn',
  531. 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite',
  532. 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset',
  533. 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar',
  534. 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName',
  535. 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput',
  536. 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd',
  537. 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd',
  538. 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp',
  539. 'ln', 'sin', 'cos', 'arctan', 'entier', 'ALLOCATE', 'DEALLOCATE',
  540. )
  541. # PIM Modula-2 Standard Library Variables Dataset
  542. pim_stdlib_var_identifiers = (
  543. 'Done', 'termCH', 'in', 'out'
  544. )
  545. # PIM Modula-2 Standard Library Constants Dataset
  546. pim_stdlib_const_identifiers = (
  547. 'EOL',
  548. )
  549. # I S O S t a n d a r d L i b r a r y D a t a s e t s
  550. # ISO Modula-2 Standard Library Modules Dataset
  551. iso_stdlib_module_identifiers = (
  552. # TO DO
  553. )
  554. # ISO Modula-2 Standard Library Types Dataset
  555. iso_stdlib_type_identifiers = (
  556. # TO DO
  557. )
  558. # ISO Modula-2 Standard Library Procedures Dataset
  559. iso_stdlib_proc_identifiers = (
  560. # TO DO
  561. )
  562. # ISO Modula-2 Standard Library Variables Dataset
  563. iso_stdlib_var_identifiers = (
  564. # TO DO
  565. )
  566. # ISO Modula-2 Standard Library Constants Dataset
  567. iso_stdlib_const_identifiers = (
  568. # TO DO
  569. )
  570. # M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s
  571. # Modula-2 R10 Standard Library ADTs Dataset
  572. m2r10_stdlib_adt_identifiers = (
  573. 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET',
  574. 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD',
  575. 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT',
  576. 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64',
  577. 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8',
  578. 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8',
  579. 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16',
  580. 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32',
  581. 'INT64', 'INT128', 'STRING', 'UNISTRING',
  582. )
  583. # Modula-2 R10 Standard Library Blueprints Dataset
  584. m2r10_stdlib_blueprint_identifiers = (
  585. 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar',
  586. 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal',
  587. 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray',
  588. 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet',
  589. 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet',
  590. 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension',
  591. 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath',
  592. )
  593. # Modula-2 R10 Standard Library Modules Dataset
  594. m2r10_stdlib_module_identifiers = (
  595. 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO',
  596. 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO',
  597. 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath',
  598. 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath',
  599. 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport',
  600. )
  601. # Modula-2 R10 Standard Library Types Dataset
  602. m2r10_stdlib_type_identifiers = (
  603. 'File', 'Status',
  604. # TO BE COMPLETED
  605. )
  606. # Modula-2 R10 Standard Library Procedures Dataset
  607. m2r10_stdlib_proc_identifiers = (
  608. 'ALLOCATE', 'DEALLOCATE', 'SIZE',
  609. # TO BE COMPLETED
  610. )
  611. # Modula-2 R10 Standard Library Variables Dataset
  612. m2r10_stdlib_var_identifiers = (
  613. 'stdIn', 'stdOut', 'stdErr',
  614. )
  615. # Modula-2 R10 Standard Library Constants Dataset
  616. m2r10_stdlib_const_identifiers = (
  617. 'pi', 'tau',
  618. )
  619. # D i a l e c t s
  620. # Dialect modes
  621. dialects = (
  622. 'unknown',
  623. 'm2pim', 'm2iso', 'm2r10', 'objm2',
  624. 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds',
  625. )
  626. # D a t a b a s e s
  627. # Lexemes to Mark as Errors Database
  628. lexemes_to_reject_db = {
  629. # Lexemes to reject for unknown dialect
  630. 'unknown': (
  631. # LEAVE THIS EMPTY
  632. ),
  633. # Lexemes to reject for PIM Modula-2
  634. 'm2pim': (
  635. pim_lexemes_to_reject,
  636. ),
  637. # Lexemes to reject for ISO Modula-2
  638. 'm2iso': (
  639. iso_lexemes_to_reject,
  640. ),
  641. # Lexemes to reject for Modula-2 R10
  642. 'm2r10': (
  643. m2r10_lexemes_to_reject,
  644. ),
  645. # Lexemes to reject for Objective Modula-2
  646. 'objm2': (
  647. objm2_lexemes_to_reject,
  648. ),
  649. # Lexemes to reject for Aglet Modula-2
  650. 'm2iso+aglet': (
  651. iso_lexemes_to_reject,
  652. ),
  653. # Lexemes to reject for GNU Modula-2
  654. 'm2pim+gm2': (
  655. pim_lexemes_to_reject,
  656. ),
  657. # Lexemes to reject for p1 Modula-2
  658. 'm2iso+p1': (
  659. iso_lexemes_to_reject,
  660. ),
  661. # Lexemes to reject for XDS Modula-2
  662. 'm2iso+xds': (
  663. iso_lexemes_to_reject,
  664. ),
  665. }
  666. # Reserved Words Database
  667. reserved_words_db = {
  668. # Reserved words for unknown dialect
  669. 'unknown': (
  670. common_reserved_words,
  671. pim_additional_reserved_words,
  672. iso_additional_reserved_words,
  673. m2r10_additional_reserved_words,
  674. ),
  675. # Reserved words for PIM Modula-2
  676. 'm2pim': (
  677. common_reserved_words,
  678. pim_additional_reserved_words,
  679. ),
  680. # Reserved words for Modula-2 R10
  681. 'm2iso': (
  682. common_reserved_words,
  683. iso_additional_reserved_words,
  684. ),
  685. # Reserved words for ISO Modula-2
  686. 'm2r10': (
  687. common_reserved_words,
  688. m2r10_additional_reserved_words,
  689. ),
  690. # Reserved words for Objective Modula-2
  691. 'objm2': (
  692. common_reserved_words,
  693. m2r10_additional_reserved_words,
  694. objm2_additional_reserved_words,
  695. ),
  696. # Reserved words for Aglet Modula-2 Extensions
  697. 'm2iso+aglet': (
  698. common_reserved_words,
  699. iso_additional_reserved_words,
  700. aglet_additional_reserved_words,
  701. ),
  702. # Reserved words for GNU Modula-2 Extensions
  703. 'm2pim+gm2': (
  704. common_reserved_words,
  705. pim_additional_reserved_words,
  706. gm2_additional_reserved_words,
  707. ),
  708. # Reserved words for p1 Modula-2 Extensions
  709. 'm2iso+p1': (
  710. common_reserved_words,
  711. iso_additional_reserved_words,
  712. p1_additional_reserved_words,
  713. ),
  714. # Reserved words for XDS Modula-2 Extensions
  715. 'm2iso+xds': (
  716. common_reserved_words,
  717. iso_additional_reserved_words,
  718. xds_additional_reserved_words,
  719. ),
  720. }
  721. # Builtins Database
  722. builtins_db = {
  723. # Builtins for unknown dialect
  724. 'unknown': (
  725. common_builtins,
  726. pim_additional_builtins,
  727. iso_additional_builtins,
  728. m2r10_additional_builtins,
  729. ),
  730. # Builtins for PIM Modula-2
  731. 'm2pim': (
  732. common_builtins,
  733. pim_additional_builtins,
  734. ),
  735. # Builtins for ISO Modula-2
  736. 'm2iso': (
  737. common_builtins,
  738. iso_additional_builtins,
  739. ),
  740. # Builtins for ISO Modula-2
  741. 'm2r10': (
  742. common_builtins,
  743. m2r10_additional_builtins,
  744. ),
  745. # Builtins for Objective Modula-2
  746. 'objm2': (
  747. common_builtins,
  748. m2r10_additional_builtins,
  749. objm2_additional_builtins,
  750. ),
  751. # Builtins for Aglet Modula-2 Extensions
  752. 'm2iso+aglet': (
  753. common_builtins,
  754. iso_additional_builtins,
  755. aglet_additional_builtins,
  756. ),
  757. # Builtins for GNU Modula-2 Extensions
  758. 'm2pim+gm2': (
  759. common_builtins,
  760. pim_additional_builtins,
  761. gm2_additional_builtins,
  762. ),
  763. # Builtins for p1 Modula-2 Extensions
  764. 'm2iso+p1': (
  765. common_builtins,
  766. iso_additional_builtins,
  767. p1_additional_builtins,
  768. ),
  769. # Builtins for XDS Modula-2 Extensions
  770. 'm2iso+xds': (
  771. common_builtins,
  772. iso_additional_builtins,
  773. xds_additional_builtins,
  774. ),
  775. }
  776. # Pseudo-Module Builtins Database
  777. pseudo_builtins_db = {
  778. # Builtins for unknown dialect
  779. 'unknown': (
  780. common_pseudo_builtins,
  781. pim_additional_pseudo_builtins,
  782. iso_additional_pseudo_builtins,
  783. m2r10_additional_pseudo_builtins,
  784. ),
  785. # Builtins for PIM Modula-2
  786. 'm2pim': (
  787. common_pseudo_builtins,
  788. pim_additional_pseudo_builtins,
  789. ),
  790. # Builtins for ISO Modula-2
  791. 'm2iso': (
  792. common_pseudo_builtins,
  793. iso_additional_pseudo_builtins,
  794. ),
  795. # Builtins for ISO Modula-2
  796. 'm2r10': (
  797. common_pseudo_builtins,
  798. m2r10_additional_pseudo_builtins,
  799. ),
  800. # Builtins for Objective Modula-2
  801. 'objm2': (
  802. common_pseudo_builtins,
  803. m2r10_additional_pseudo_builtins,
  804. objm2_additional_pseudo_builtins,
  805. ),
  806. # Builtins for Aglet Modula-2 Extensions
  807. 'm2iso+aglet': (
  808. common_pseudo_builtins,
  809. iso_additional_pseudo_builtins,
  810. aglet_additional_pseudo_builtins,
  811. ),
  812. # Builtins for GNU Modula-2 Extensions
  813. 'm2pim+gm2': (
  814. common_pseudo_builtins,
  815. pim_additional_pseudo_builtins,
  816. gm2_additional_pseudo_builtins,
  817. ),
  818. # Builtins for p1 Modula-2 Extensions
  819. 'm2iso+p1': (
  820. common_pseudo_builtins,
  821. iso_additional_pseudo_builtins,
  822. p1_additional_pseudo_builtins,
  823. ),
  824. # Builtins for XDS Modula-2 Extensions
  825. 'm2iso+xds': (
  826. common_pseudo_builtins,
  827. iso_additional_pseudo_builtins,
  828. xds_additional_pseudo_builtins,
  829. ),
  830. }
  831. # Standard Library ADTs Database
  832. stdlib_adts_db = {
  833. # Empty entry for unknown dialect
  834. 'unknown': (
  835. # LEAVE THIS EMPTY
  836. ),
  837. # Standard Library ADTs for PIM Modula-2
  838. 'm2pim': (
  839. # No first class library types
  840. ),
  841. # Standard Library ADTs for ISO Modula-2
  842. 'm2iso': (
  843. # No first class library types
  844. ),
  845. # Standard Library ADTs for Modula-2 R10
  846. 'm2r10': (
  847. m2r10_stdlib_adt_identifiers,
  848. ),
  849. # Standard Library ADTs for Objective Modula-2
  850. 'objm2': (
  851. m2r10_stdlib_adt_identifiers,
  852. ),
  853. # Standard Library ADTs for Aglet Modula-2
  854. 'm2iso+aglet': (
  855. # No first class library types
  856. ),
  857. # Standard Library ADTs for GNU Modula-2
  858. 'm2pim+gm2': (
  859. # No first class library types
  860. ),
  861. # Standard Library ADTs for p1 Modula-2
  862. 'm2iso+p1': (
  863. # No first class library types
  864. ),
  865. # Standard Library ADTs for XDS Modula-2
  866. 'm2iso+xds': (
  867. # No first class library types
  868. ),
  869. }
  870. # Standard Library Modules Database
  871. stdlib_modules_db = {
  872. # Empty entry for unknown dialect
  873. 'unknown': (
  874. # LEAVE THIS EMPTY
  875. ),
  876. # Standard Library Modules for PIM Modula-2
  877. 'm2pim': (
  878. pim_stdlib_module_identifiers,
  879. ),
  880. # Standard Library Modules for ISO Modula-2
  881. 'm2iso': (
  882. iso_stdlib_module_identifiers,
  883. ),
  884. # Standard Library Modules for Modula-2 R10
  885. 'm2r10': (
  886. m2r10_stdlib_blueprint_identifiers,
  887. m2r10_stdlib_module_identifiers,
  888. m2r10_stdlib_adt_identifiers,
  889. ),
  890. # Standard Library Modules for Objective Modula-2
  891. 'objm2': (
  892. m2r10_stdlib_blueprint_identifiers,
  893. m2r10_stdlib_module_identifiers,
  894. ),
  895. # Standard Library Modules for Aglet Modula-2
  896. 'm2iso+aglet': (
  897. iso_stdlib_module_identifiers,
  898. ),
  899. # Standard Library Modules for GNU Modula-2
  900. 'm2pim+gm2': (
  901. pim_stdlib_module_identifiers,
  902. ),
  903. # Standard Library Modules for p1 Modula-2
  904. 'm2iso+p1': (
  905. iso_stdlib_module_identifiers,
  906. ),
  907. # Standard Library Modules for XDS Modula-2
  908. 'm2iso+xds': (
  909. iso_stdlib_module_identifiers,
  910. ),
  911. }
  912. # Standard Library Types Database
  913. stdlib_types_db = {
  914. # Empty entry for unknown dialect
  915. 'unknown': (
  916. # LEAVE THIS EMPTY
  917. ),
  918. # Standard Library Types for PIM Modula-2
  919. 'm2pim': (
  920. pim_stdlib_type_identifiers,
  921. ),
  922. # Standard Library Types for ISO Modula-2
  923. 'm2iso': (
  924. iso_stdlib_type_identifiers,
  925. ),
  926. # Standard Library Types for Modula-2 R10
  927. 'm2r10': (
  928. m2r10_stdlib_type_identifiers,
  929. ),
  930. # Standard Library Types for Objective Modula-2
  931. 'objm2': (
  932. m2r10_stdlib_type_identifiers,
  933. ),
  934. # Standard Library Types for Aglet Modula-2
  935. 'm2iso+aglet': (
  936. iso_stdlib_type_identifiers,
  937. ),
  938. # Standard Library Types for GNU Modula-2
  939. 'm2pim+gm2': (
  940. pim_stdlib_type_identifiers,
  941. ),
  942. # Standard Library Types for p1 Modula-2
  943. 'm2iso+p1': (
  944. iso_stdlib_type_identifiers,
  945. ),
  946. # Standard Library Types for XDS Modula-2
  947. 'm2iso+xds': (
  948. iso_stdlib_type_identifiers,
  949. ),
  950. }
  951. # Standard Library Procedures Database
  952. stdlib_procedures_db = {
  953. # Empty entry for unknown dialect
  954. 'unknown': (
  955. # LEAVE THIS EMPTY
  956. ),
  957. # Standard Library Procedures for PIM Modula-2
  958. 'm2pim': (
  959. pim_stdlib_proc_identifiers,
  960. ),
  961. # Standard Library Procedures for ISO Modula-2
  962. 'm2iso': (
  963. iso_stdlib_proc_identifiers,
  964. ),
  965. # Standard Library Procedures for Modula-2 R10
  966. 'm2r10': (
  967. m2r10_stdlib_proc_identifiers,
  968. ),
  969. # Standard Library Procedures for Objective Modula-2
  970. 'objm2': (
  971. m2r10_stdlib_proc_identifiers,
  972. ),
  973. # Standard Library Procedures for Aglet Modula-2
  974. 'm2iso+aglet': (
  975. iso_stdlib_proc_identifiers,
  976. ),
  977. # Standard Library Procedures for GNU Modula-2
  978. 'm2pim+gm2': (
  979. pim_stdlib_proc_identifiers,
  980. ),
  981. # Standard Library Procedures for p1 Modula-2
  982. 'm2iso+p1': (
  983. iso_stdlib_proc_identifiers,
  984. ),
  985. # Standard Library Procedures for XDS Modula-2
  986. 'm2iso+xds': (
  987. iso_stdlib_proc_identifiers,
  988. ),
  989. }
  990. # Standard Library Variables Database
  991. stdlib_variables_db = {
  992. # Empty entry for unknown dialect
  993. 'unknown': (
  994. # LEAVE THIS EMPTY
  995. ),
  996. # Standard Library Variables for PIM Modula-2
  997. 'm2pim': (
  998. pim_stdlib_var_identifiers,
  999. ),
  1000. # Standard Library Variables for ISO Modula-2
  1001. 'm2iso': (
  1002. iso_stdlib_var_identifiers,
  1003. ),
  1004. # Standard Library Variables for Modula-2 R10
  1005. 'm2r10': (
  1006. m2r10_stdlib_var_identifiers,
  1007. ),
  1008. # Standard Library Variables for Objective Modula-2
  1009. 'objm2': (
  1010. m2r10_stdlib_var_identifiers,
  1011. ),
  1012. # Standard Library Variables for Aglet Modula-2
  1013. 'm2iso+aglet': (
  1014. iso_stdlib_var_identifiers,
  1015. ),
  1016. # Standard Library Variables for GNU Modula-2
  1017. 'm2pim+gm2': (
  1018. pim_stdlib_var_identifiers,
  1019. ),
  1020. # Standard Library Variables for p1 Modula-2
  1021. 'm2iso+p1': (
  1022. iso_stdlib_var_identifiers,
  1023. ),
  1024. # Standard Library Variables for XDS Modula-2
  1025. 'm2iso+xds': (
  1026. iso_stdlib_var_identifiers,
  1027. ),
  1028. }
  1029. # Standard Library Constants Database
  1030. stdlib_constants_db = {
  1031. # Empty entry for unknown dialect
  1032. 'unknown': (
  1033. # LEAVE THIS EMPTY
  1034. ),
  1035. # Standard Library Constants for PIM Modula-2
  1036. 'm2pim': (
  1037. pim_stdlib_const_identifiers,
  1038. ),
  1039. # Standard Library Constants for ISO Modula-2
  1040. 'm2iso': (
  1041. iso_stdlib_const_identifiers,
  1042. ),
  1043. # Standard Library Constants for Modula-2 R10
  1044. 'm2r10': (
  1045. m2r10_stdlib_const_identifiers,
  1046. ),
  1047. # Standard Library Constants for Objective Modula-2
  1048. 'objm2': (
  1049. m2r10_stdlib_const_identifiers,
  1050. ),
  1051. # Standard Library Constants for Aglet Modula-2
  1052. 'm2iso+aglet': (
  1053. iso_stdlib_const_identifiers,
  1054. ),
  1055. # Standard Library Constants for GNU Modula-2
  1056. 'm2pim+gm2': (
  1057. pim_stdlib_const_identifiers,
  1058. ),
  1059. # Standard Library Constants for p1 Modula-2
  1060. 'm2iso+p1': (
  1061. iso_stdlib_const_identifiers,
  1062. ),
  1063. # Standard Library Constants for XDS Modula-2
  1064. 'm2iso+xds': (
  1065. iso_stdlib_const_identifiers,
  1066. ),
  1067. }
  1068. # M e t h o d s
  1069. # initialise a lexer instance
  1070. def __init__(self, **options):
  1071. #
  1072. # check dialect options
  1073. #
  1074. dialects = get_list_opt(options, 'dialect', [])
  1075. #
  1076. for dialect_option in dialects:
  1077. if dialect_option in self.dialects[1:-1]:
  1078. # valid dialect option found
  1079. self.set_dialect(dialect_option)
  1080. break
  1081. #
  1082. # Fallback Mode (DEFAULT)
  1083. else:
  1084. # no valid dialect option
  1085. self.set_dialect('unknown')
  1086. #
  1087. self.dialect_set_by_tag = False
  1088. #
  1089. # check style options
  1090. #
  1091. styles = get_list_opt(options, 'style', [])
  1092. #
  1093. # use lowercase mode for Algol style
  1094. if 'algol' in styles or 'algol_nu' in styles:
  1095. self.algol_publication_mode = True
  1096. else:
  1097. self.algol_publication_mode = False
  1098. #
  1099. # Check option flags
  1100. #
  1101. self.treat_stdlib_adts_as_builtins = get_bool_opt(
  1102. options, 'treat_stdlib_adts_as_builtins', True)
  1103. #
  1104. # call superclass initialiser
  1105. RegexLexer.__init__(self, **options)
  1106. # Set lexer to a specified dialect
  1107. def set_dialect(self, dialect_id):
  1108. #
  1109. # if __debug__:
  1110. # print 'entered set_dialect with arg: ', dialect_id
  1111. #
  1112. # check dialect name against known dialects
  1113. if dialect_id not in self.dialects:
  1114. dialect = 'unknown' # default
  1115. else:
  1116. dialect = dialect_id
  1117. #
  1118. # compose lexemes to reject set
  1119. lexemes_to_reject_set = set()
  1120. # add each list of reject lexemes for this dialect
  1121. for list in self.lexemes_to_reject_db[dialect]:
  1122. lexemes_to_reject_set.update(set(list))
  1123. #
  1124. # compose reserved words set
  1125. reswords_set = set()
  1126. # add each list of reserved words for this dialect
  1127. for list in self.reserved_words_db[dialect]:
  1128. reswords_set.update(set(list))
  1129. #
  1130. # compose builtins set
  1131. builtins_set = set()
  1132. # add each list of builtins for this dialect excluding reserved words
  1133. for list in self.builtins_db[dialect]:
  1134. builtins_set.update(set(list).difference(reswords_set))
  1135. #
  1136. # compose pseudo-builtins set
  1137. pseudo_builtins_set = set()
  1138. # add each list of builtins for this dialect excluding reserved words
  1139. for list in self.pseudo_builtins_db[dialect]:
  1140. pseudo_builtins_set.update(set(list).difference(reswords_set))
  1141. #
  1142. # compose ADTs set
  1143. adts_set = set()
  1144. # add each list of ADTs for this dialect excluding reserved words
  1145. for list in self.stdlib_adts_db[dialect]:
  1146. adts_set.update(set(list).difference(reswords_set))
  1147. #
  1148. # compose modules set
  1149. modules_set = set()
  1150. # add each list of builtins for this dialect excluding builtins
  1151. for list in self.stdlib_modules_db[dialect]:
  1152. modules_set.update(set(list).difference(builtins_set))
  1153. #
  1154. # compose types set
  1155. types_set = set()
  1156. # add each list of types for this dialect excluding builtins
  1157. for list in self.stdlib_types_db[dialect]:
  1158. types_set.update(set(list).difference(builtins_set))
  1159. #
  1160. # compose procedures set
  1161. procedures_set = set()
  1162. # add each list of procedures for this dialect excluding builtins
  1163. for list in self.stdlib_procedures_db[dialect]:
  1164. procedures_set.update(set(list).difference(builtins_set))
  1165. #
  1166. # compose variables set
  1167. variables_set = set()
  1168. # add each list of variables for this dialect excluding builtins
  1169. for list in self.stdlib_variables_db[dialect]:
  1170. variables_set.update(set(list).difference(builtins_set))
  1171. #
  1172. # compose constants set
  1173. constants_set = set()
  1174. # add each list of constants for this dialect excluding builtins
  1175. for list in self.stdlib_constants_db[dialect]:
  1176. constants_set.update(set(list).difference(builtins_set))
  1177. #
  1178. # update lexer state
  1179. self.dialect = dialect
  1180. self.lexemes_to_reject = lexemes_to_reject_set
  1181. self.reserved_words = reswords_set
  1182. self.builtins = builtins_set
  1183. self.pseudo_builtins = pseudo_builtins_set
  1184. self.adts = adts_set
  1185. self.modules = modules_set
  1186. self.types = types_set
  1187. self.procedures = procedures_set
  1188. self.variables = variables_set
  1189. self.constants = constants_set
  1190. #
  1191. # if __debug__:
  1192. # print 'exiting set_dialect'
  1193. # print ' self.dialect: ', self.dialect
  1194. # print ' self.lexemes_to_reject: ', self.lexemes_to_reject
  1195. # print ' self.reserved_words: ', self.reserved_words
  1196. # print ' self.builtins: ', self.builtins
  1197. # print ' self.pseudo_builtins: ', self.pseudo_builtins
  1198. # print ' self.adts: ', self.adts
  1199. # print ' self.modules: ', self.modules
  1200. # print ' self.types: ', self.types
  1201. # print ' self.procedures: ', self.procedures
  1202. # print ' self.variables: ', self.variables
  1203. # print ' self.types: ', self.types
  1204. # print ' self.constants: ', self.constants
  1205. # Extracts a dialect name from a dialect tag comment string and checks
  1206. # the extracted name against known dialects. If a match is found, the
  1207. # matching name is returned, otherwise dialect id 'unknown' is returned
  1208. def get_dialect_from_dialect_tag(self, dialect_tag):
  1209. #
  1210. # if __debug__:
  1211. # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag
  1212. #
  1213. # constants
  1214. left_tag_delim = '(*!'
  1215. right_tag_delim = '*)'
  1216. left_tag_delim_len = len(left_tag_delim)
  1217. right_tag_delim_len = len(right_tag_delim)
  1218. indicator_start = left_tag_delim_len
  1219. indicator_end = -(right_tag_delim_len)
  1220. #
  1221. # check comment string for dialect indicator
  1222. if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \
  1223. and dialect_tag.startswith(left_tag_delim) \
  1224. and dialect_tag.endswith(right_tag_delim):
  1225. #
  1226. # if __debug__:
  1227. # print 'dialect tag found'
  1228. #
  1229. # extract dialect indicator
  1230. indicator = dialect_tag[indicator_start:indicator_end]
  1231. #
  1232. # if __debug__:
  1233. # print 'extracted: ', indicator
  1234. #
  1235. # check against known dialects
  1236. for index in range(1, len(self.dialects)):
  1237. #
  1238. # if __debug__:
  1239. # print 'dialects[', index, ']: ', self.dialects[index]
  1240. #
  1241. if indicator == self.dialects[index]:
  1242. #
  1243. # if __debug__:
  1244. # print 'matching dialect found'
  1245. #
  1246. # indicator matches known dialect
  1247. return indicator
  1248. else:
  1249. # indicator does not match any dialect
  1250. return 'unknown' # default
  1251. else:
  1252. # invalid indicator string
  1253. return 'unknown' # default
  1254. # intercept the token stream, modify token attributes and return them
  1255. def get_tokens_unprocessed(self, text):
  1256. for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
  1257. #
  1258. # check for dialect tag if dialect has not been set by tag
  1259. if not self.dialect_set_by_tag and token == Comment.Special:
  1260. indicated_dialect = self.get_dialect_from_dialect_tag(value)
  1261. if indicated_dialect != 'unknown':
  1262. # token is a dialect indicator
  1263. # reset reserved words and builtins
  1264. self.set_dialect(indicated_dialect)
  1265. self.dialect_set_by_tag = True
  1266. #
  1267. # check for reserved words, predefined and stdlib identifiers
  1268. if token is Name:
  1269. if value in self.reserved_words:
  1270. token = Keyword.Reserved
  1271. if self.algol_publication_mode:
  1272. value = value.lower()
  1273. #
  1274. elif value in self.builtins:
  1275. token = Name.Builtin
  1276. if self.algol_publication_mode:
  1277. value = value.lower()
  1278. #
  1279. elif value in self.pseudo_builtins:
  1280. token = Name.Builtin.Pseudo
  1281. if self.algol_publication_mode:
  1282. value = value.lower()
  1283. #
  1284. elif value in self.adts:
  1285. if not self.treat_stdlib_adts_as_builtins:
  1286. token = Name.Namespace
  1287. else:
  1288. token = Name.Builtin.Pseudo
  1289. if self.algol_publication_mode:
  1290. value = value.lower()
  1291. #
  1292. elif value in self.modules:
  1293. token = Name.Namespace
  1294. #
  1295. elif value in self.types:
  1296. token = Name.Class
  1297. #
  1298. elif value in self.procedures:
  1299. token = Name.Function
  1300. #
  1301. elif value in self.variables:
  1302. token = Name.Variable
  1303. #
  1304. elif value in self.constants:
  1305. token = Name.Constant
  1306. #
  1307. elif token in Number:
  1308. #
  1309. # mark prefix number literals as error for PIM and ISO dialects
  1310. if self.dialect not in ('unknown', 'm2r10', 'objm2'):
  1311. if "'" in value or value[0:2] in ('0b', '0x', '0u'):
  1312. token = Error
  1313. #
  1314. elif self.dialect in ('m2r10', 'objm2'):
  1315. # mark base-8 number literals as errors for M2 R10 and ObjM2
  1316. if token is Number.Oct:
  1317. token = Error
  1318. # mark suffix base-16 literals as errors for M2 R10 and ObjM2
  1319. elif token is Number.Hex and 'H' in value:
  1320. token = Error
  1321. # mark real numbers with E as errors for M2 R10 and ObjM2
  1322. elif token is Number.Float and 'E' in value:
  1323. token = Error
  1324. #
  1325. elif token in Comment:
  1326. #
  1327. # mark single line comment as error for PIM and ISO dialects
  1328. if token is Comment.Single:
  1329. if self.dialect not in ('unknown', 'm2r10', 'objm2'):
  1330. token = Error
  1331. #
  1332. if token is Comment.Preproc:
  1333. # mark ISO pragma as error for PIM dialects
  1334. if value.startswith('<*') and \
  1335. self.dialect.startswith('m2pim'):
  1336. token = Error
  1337. # mark PIM pragma as comment for other dialects
  1338. elif value.startswith('(*$') and \
  1339. self.dialect != 'unknown' and \
  1340. not self.dialect.startswith('m2pim'):
  1341. token = Comment.Multiline
  1342. #
  1343. else: # token is neither Name nor Comment
  1344. #
  1345. # mark lexemes matching the dialect's error token set as errors
  1346. if value in self.lexemes_to_reject:
  1347. token = Error
  1348. #
  1349. # substitute lexemes when in Algol mode
  1350. if self.algol_publication_mode:
  1351. if value == '#':
  1352. value = '≠'
  1353. elif value == '<=':
  1354. value = '≤'
  1355. elif value == '>=':
  1356. value = '≥'
  1357. elif value == '==':
  1358. value = '≡'
  1359. elif value == '*.':
  1360. value = '•'
  1361. # return result
  1362. yield index, token, value
  1363. def analyse_text(text):
  1364. """It's Pascal-like, but does not use FUNCTION -- uses PROCEDURE
  1365. instead."""
  1366. # Check if this looks like Pascal, if not, bail out early
  1367. if not ('(*' in text and '*)' in text and ':=' in text):
  1368. return
  1369. result = 0
  1370. # Procedure is in Modula2
  1371. if re.search(r'\bPROCEDURE\b', text):
  1372. result += 0.6
  1373. # FUNCTION is only valid in Pascal, but not in Modula2
  1374. if re.search(r'\bFUNCTION\b', text):
  1375. result = 0.0
  1376. return result