lexer.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. from fontTools.voltLib.error import VoltLibError
  2. class Lexer(object):
  3. NUMBER = "NUMBER"
  4. STRING = "STRING"
  5. NAME = "NAME"
  6. NEWLINE = "NEWLINE"
  7. CHAR_WHITESPACE_ = " \t"
  8. CHAR_NEWLINE_ = "\r\n"
  9. CHAR_DIGIT_ = "0123456789"
  10. CHAR_UC_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  11. CHAR_LC_LETTER_ = "abcdefghijklmnopqrstuvwxyz"
  12. CHAR_UNDERSCORE_ = "_"
  13. CHAR_PERIOD_ = "."
  14. CHAR_NAME_START_ = (
  15. CHAR_UC_LETTER_ + CHAR_LC_LETTER_ + CHAR_PERIOD_ + CHAR_UNDERSCORE_
  16. )
  17. CHAR_NAME_CONTINUATION_ = CHAR_NAME_START_ + CHAR_DIGIT_
  18. def __init__(self, text, filename):
  19. self.filename_ = filename
  20. self.line_ = 1
  21. self.pos_ = 0
  22. self.line_start_ = 0
  23. self.text_ = text
  24. self.text_length_ = len(text)
  25. def __iter__(self):
  26. return self
  27. def next(self): # Python 2
  28. return self.__next__()
  29. def __next__(self): # Python 3
  30. while True:
  31. token_type, token, location = self.next_()
  32. if token_type not in {Lexer.NEWLINE}:
  33. return (token_type, token, location)
  34. def location_(self):
  35. column = self.pos_ - self.line_start_ + 1
  36. return (self.filename_ or "<volt>", self.line_, column)
  37. def next_(self):
  38. self.scan_over_(Lexer.CHAR_WHITESPACE_)
  39. location = self.location_()
  40. start = self.pos_
  41. text = self.text_
  42. limit = len(text)
  43. if start >= limit:
  44. raise StopIteration()
  45. cur_char = text[start]
  46. next_char = text[start + 1] if start + 1 < limit else None
  47. if cur_char == "\n":
  48. self.pos_ += 1
  49. self.line_ += 1
  50. self.line_start_ = self.pos_
  51. return (Lexer.NEWLINE, None, location)
  52. if cur_char == "\r":
  53. self.pos_ += 2 if next_char == "\n" else 1
  54. self.line_ += 1
  55. self.line_start_ = self.pos_
  56. return (Lexer.NEWLINE, None, location)
  57. if cur_char == '"':
  58. self.pos_ += 1
  59. self.scan_until_('"\r\n')
  60. if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
  61. self.pos_ += 1
  62. return (Lexer.STRING, text[start + 1 : self.pos_ - 1], location)
  63. else:
  64. raise VoltLibError("Expected '\"' to terminate string", location)
  65. if cur_char in Lexer.CHAR_NAME_START_:
  66. self.pos_ += 1
  67. self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
  68. token = text[start : self.pos_]
  69. return (Lexer.NAME, token, location)
  70. if cur_char in Lexer.CHAR_DIGIT_:
  71. self.scan_over_(Lexer.CHAR_DIGIT_)
  72. return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
  73. if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
  74. self.pos_ += 1
  75. self.scan_over_(Lexer.CHAR_DIGIT_)
  76. return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
  77. raise VoltLibError("Unexpected character: '%s'" % cur_char, location)
  78. def scan_over_(self, valid):
  79. p = self.pos_
  80. while p < self.text_length_ and self.text_[p] in valid:
  81. p += 1
  82. self.pos_ = p
  83. def scan_until_(self, stop_at):
  84. p = self.pos_
  85. while p < self.text_length_ and self.text_[p] not in stop_at:
  86. p += 1
  87. self.pos_ = p