ezhil.py 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. """
  2. pygments.lexers.ezhil
  3. ~~~~~~~~~~~~~~~~~~~~~
  4. Pygments lexers for Ezhil language.
  5. :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, words
  10. from pygments.token import Keyword, Text, Comment, Name
  11. from pygments.token import String, Number, Punctuation, Operator
  12. __all__ = ['EzhilLexer']
  13. class EzhilLexer(RegexLexer):
  14. """
  15. Lexer for `Ezhil, a Tamil script-based programming language <http://ezhillang.org>`_
  16. .. versionadded:: 2.1
  17. """
  18. name = 'Ezhil'
  19. aliases = ['ezhil']
  20. filenames = ['*.n']
  21. mimetypes = ['text/x-ezhil']
  22. flags = re.MULTILINE | re.UNICODE
  23. # Refer to tamil.utf8.tamil_letters from open-tamil for a stricter version of this.
  24. # This much simpler version is close enough, and includes combining marks.
  25. _TALETTERS = '[a-zA-Z_]|[\u0b80-\u0bff]'
  26. tokens = {
  27. 'root': [
  28. include('keywords'),
  29. (r'#.*\n', Comment.Single),
  30. (r'[@+/*,^\-%]|[!<>=]=?|&&?|\|\|?', Operator),
  31. ('இல்', Operator.Word),
  32. (words(('assert', 'max', 'min',
  33. 'நீளம்', 'சரம்_இடமாற்று', 'சரம்_கண்டுபிடி',
  34. 'பட்டியல்', 'பின்இணை', 'வரிசைப்படுத்து',
  35. 'எடு', 'தலைகீழ்', 'நீட்டிக்க', 'நுழைக்க', 'வை',
  36. 'கோப்பை_திற', 'கோப்பை_எழுது', 'கோப்பை_மூடு',
  37. 'pi', 'sin', 'cos', 'tan', 'sqrt', 'hypot', 'pow',
  38. 'exp', 'log', 'log10', 'exit',
  39. ), suffix=r'\b'), Name.Builtin),
  40. (r'(True|False)\b', Keyword.Constant),
  41. (r'[^\S\n]+', Text),
  42. include('identifier'),
  43. include('literal'),
  44. (r'[(){}\[\]:;.]', Punctuation),
  45. ],
  46. 'keywords': [
  47. ('பதிப்பி|தேர்ந்தெடு|தேர்வு|ஏதேனில்|ஆனால்|இல்லைஆனால்|இல்லை|ஆக|ஒவ்வொன்றாக|இல்|வரை|செய்|முடியேனில்|பின்கொடு|முடி|நிரல்பாகம்|தொடர்|நிறுத்து|நிரல்பாகம்', Keyword),
  48. ],
  49. 'identifier': [
  50. ('(?:'+_TALETTERS+')(?:[0-9]|'+_TALETTERS+')*', Name),
  51. ],
  52. 'literal': [
  53. (r'".*?"', String),
  54. (r'(?u)\d+((\.\d*)?[eE][+-]?\d+|\.\d*)', Number.Float),
  55. (r'(?u)\d+', Number.Integer),
  56. ]
  57. }
  58. def analyse_text(text):
  59. """This language uses Tamil-script. We'll assume that if there's a
  60. decent amount of Tamil-characters, it's this language. This assumption
  61. is obviously horribly off if someone uses string literals in tamil
  62. in another language."""
  63. if len(re.findall(r'[\u0b80-\u0bff]', text)) > 10:
  64. return 0.25
  65. def __init__(self, **options):
  66. super().__init__(**options)
  67. self.encoding = options.get('encoding', 'utf-8')