oberon.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. """
  2. pygments.lexers.oberon
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for Oberon family languages.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, words
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation
  12. __all__ = ['ComponentPascalLexer']
  13. class ComponentPascalLexer(RegexLexer):
  14. """
  15. For `Component Pascal <http://www.oberon.ch/pdf/CP-Lang.pdf>`_ source code.
  16. .. versionadded:: 2.1
  17. """
  18. name = 'Component Pascal'
  19. aliases = ['componentpascal', 'cp']
  20. filenames = ['*.cp', '*.cps']
  21. mimetypes = ['text/x-component-pascal']
  22. flags = re.MULTILINE | re.DOTALL
  23. tokens = {
  24. 'root': [
  25. include('whitespace'),
  26. include('comments'),
  27. include('punctuation'),
  28. include('numliterals'),
  29. include('strings'),
  30. include('operators'),
  31. include('builtins'),
  32. include('identifiers'),
  33. ],
  34. 'whitespace': [
  35. (r'\n+', Text), # blank lines
  36. (r'\s+', Text), # whitespace
  37. ],
  38. 'comments': [
  39. (r'\(\*([^$].*?)\*\)', Comment.Multiline),
  40. # TODO: nested comments (* (* ... *) ... (* ... *) *) not supported!
  41. ],
  42. 'punctuation': [
  43. (r'[()\[\]{},.:;|]', Punctuation),
  44. ],
  45. 'numliterals': [
  46. (r'[0-9A-F]+X\b', Number.Hex), # char code
  47. (r'[0-9A-F]+[HL]\b', Number.Hex), # hexadecimal number
  48. (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number
  49. (r'[0-9]+\.[0-9]+', Number.Float), # real number
  50. (r'[0-9]+', Number.Integer), # decimal whole number
  51. ],
  52. 'strings': [
  53. (r"'[^\n']*'", String), # single quoted string
  54. (r'"[^\n"]*"', String), # double quoted string
  55. ],
  56. 'operators': [
  57. # Arithmetic Operators
  58. (r'[+-]', Operator),
  59. (r'[*/]', Operator),
  60. # Relational Operators
  61. (r'[=#<>]', Operator),
  62. # Dereferencing Operator
  63. (r'\^', Operator),
  64. # Logical AND Operator
  65. (r'&', Operator),
  66. # Logical NOT Operator
  67. (r'~', Operator),
  68. # Assignment Symbol
  69. (r':=', Operator),
  70. # Range Constructor
  71. (r'\.\.', Operator),
  72. (r'\$', Operator),
  73. ],
  74. 'identifiers': [
  75. (r'([a-zA-Z_$][\w$]*)', Name),
  76. ],
  77. 'builtins': [
  78. (words((
  79. 'ANYPTR', 'ANYREC', 'BOOLEAN', 'BYTE', 'CHAR', 'INTEGER', 'LONGINT',
  80. 'REAL', 'SET', 'SHORTCHAR', 'SHORTINT', 'SHORTREAL'
  81. ), suffix=r'\b'), Keyword.Type),
  82. (words((
  83. 'ABS', 'ABSTRACT', 'ARRAY', 'ASH', 'ASSERT', 'BEGIN', 'BITS', 'BY',
  84. 'CAP', 'CASE', 'CHR', 'CLOSE', 'CONST', 'DEC', 'DIV', 'DO', 'ELSE',
  85. 'ELSIF', 'EMPTY', 'END', 'ENTIER', 'EXCL', 'EXIT', 'EXTENSIBLE', 'FOR',
  86. 'HALT', 'IF', 'IMPORT', 'IN', 'INC', 'INCL', 'IS', 'LEN', 'LIMITED',
  87. 'LONG', 'LOOP', 'MAX', 'MIN', 'MOD', 'MODULE', 'NEW', 'ODD', 'OF',
  88. 'OR', 'ORD', 'OUT', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN',
  89. 'SHORT', 'SHORTCHAR', 'SHORTINT', 'SIZE', 'THEN', 'TYPE', 'TO', 'UNTIL',
  90. 'VAR', 'WHILE', 'WITH'
  91. ), suffix=r'\b'), Keyword.Reserved),
  92. (r'(TRUE|FALSE|NIL|INF)\b', Keyword.Constant),
  93. ]
  94. }
  95. def analyse_text(text):
  96. """The only other lexer using .cp is the C++ one, so we check if for
  97. a few common Pascal keywords here. Those are unfortunately quite
  98. common across various business languages as well."""
  99. result = 0
  100. if 'BEGIN' in text:
  101. result += 0.01
  102. if 'END' in text:
  103. result += 0.01
  104. if 'PROCEDURE' in text:
  105. result += 0.01
  106. if 'END' in text:
  107. result += 0.01
  108. return result