lexer.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound
  2. from fontTools.feaLib.location import FeatureLibLocation
  3. import re
  4. import os
  5. try:
  6. import cython
  7. except ImportError:
  8. # if cython not installed, use mock module with no-op decorators and types
  9. from fontTools.misc import cython
  10. class Lexer(object):
  11. NUMBER = "NUMBER"
  12. HEXADECIMAL = "HEXADECIMAL"
  13. OCTAL = "OCTAL"
  14. NUMBERS = (NUMBER, HEXADECIMAL, OCTAL)
  15. FLOAT = "FLOAT"
  16. STRING = "STRING"
  17. NAME = "NAME"
  18. FILENAME = "FILENAME"
  19. GLYPHCLASS = "GLYPHCLASS"
  20. CID = "CID"
  21. SYMBOL = "SYMBOL"
  22. COMMENT = "COMMENT"
  23. NEWLINE = "NEWLINE"
  24. ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK"
  25. CHAR_WHITESPACE_ = " \t"
  26. CHAR_NEWLINE_ = "\r\n"
  27. CHAR_SYMBOL_ = ",;:-+'{}[]<>()="
  28. CHAR_DIGIT_ = "0123456789"
  29. CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
  30. CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
  31. CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
  32. CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"
  33. RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$")
  34. MODE_NORMAL_ = "NORMAL"
  35. MODE_FILENAME_ = "FILENAME"
  36. def __init__(self, text, filename):
  37. self.filename_ = filename
  38. self.line_ = 1
  39. self.pos_ = 0
  40. self.line_start_ = 0
  41. self.text_ = text
  42. self.text_length_ = len(text)
  43. self.mode_ = Lexer.MODE_NORMAL_
  44. def __iter__(self):
  45. return self
  46. def next(self): # Python 2
  47. return self.__next__()
  48. def __next__(self): # Python 3
  49. while True:
  50. token_type, token, location = self.next_()
  51. if token_type != Lexer.NEWLINE:
  52. return (token_type, token, location)
  53. def location_(self):
  54. column = self.pos_ - self.line_start_ + 1
  55. return FeatureLibLocation(self.filename_ or "<features>", self.line_, column)
  56. def next_(self):
  57. self.scan_over_(Lexer.CHAR_WHITESPACE_)
  58. location = self.location_()
  59. start = self.pos_
  60. text = self.text_
  61. limit = len(text)
  62. if start >= limit:
  63. raise StopIteration()
  64. cur_char = text[start]
  65. next_char = text[start + 1] if start + 1 < limit else None
  66. if cur_char == "\n":
  67. self.pos_ += 1
  68. self.line_ += 1
  69. self.line_start_ = self.pos_
  70. return (Lexer.NEWLINE, None, location)
  71. if cur_char == "\r":
  72. self.pos_ += 2 if next_char == "\n" else 1
  73. self.line_ += 1
  74. self.line_start_ = self.pos_
  75. return (Lexer.NEWLINE, None, location)
  76. if cur_char == "#":
  77. self.scan_until_(Lexer.CHAR_NEWLINE_)
  78. return (Lexer.COMMENT, text[start : self.pos_], location)
  79. if self.mode_ is Lexer.MODE_FILENAME_:
  80. if cur_char != "(":
  81. raise FeatureLibError("Expected '(' before file name", location)
  82. self.scan_until_(")")
  83. cur_char = text[self.pos_] if self.pos_ < limit else None
  84. if cur_char != ")":
  85. raise FeatureLibError("Expected ')' after file name", location)
  86. self.pos_ += 1
  87. self.mode_ = Lexer.MODE_NORMAL_
  88. return (Lexer.FILENAME, text[start + 1 : self.pos_ - 1], location)
  89. if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
  90. self.pos_ += 1
  91. self.scan_over_(Lexer.CHAR_DIGIT_)
  92. return (Lexer.CID, int(text[start + 1 : self.pos_], 10), location)
  93. if cur_char == "@":
  94. self.pos_ += 1
  95. self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
  96. glyphclass = text[start + 1 : self.pos_]
  97. if len(glyphclass) < 1:
  98. raise FeatureLibError("Expected glyph class name", location)
  99. if len(glyphclass) > 63:
  100. raise FeatureLibError(
  101. "Glyph class names must not be longer than 63 characters", location
  102. )
  103. if not Lexer.RE_GLYPHCLASS.match(glyphclass):
  104. raise FeatureLibError(
  105. "Glyph class names must consist of letters, digits, "
  106. "underscore, period or hyphen",
  107. location,
  108. )
  109. return (Lexer.GLYPHCLASS, glyphclass, location)
  110. if cur_char in Lexer.CHAR_NAME_START_:
  111. self.pos_ += 1
  112. self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
  113. token = text[start : self.pos_]
  114. if token == "include":
  115. self.mode_ = Lexer.MODE_FILENAME_
  116. return (Lexer.NAME, token, location)
  117. if cur_char == "0" and next_char in "xX":
  118. self.pos_ += 2
  119. self.scan_over_(Lexer.CHAR_HEXDIGIT_)
  120. return (Lexer.HEXADECIMAL, int(text[start : self.pos_], 16), location)
  121. if cur_char == "0" and next_char in Lexer.CHAR_DIGIT_:
  122. self.scan_over_(Lexer.CHAR_DIGIT_)
  123. return (Lexer.OCTAL, int(text[start : self.pos_], 8), location)
  124. if cur_char in Lexer.CHAR_DIGIT_:
  125. self.scan_over_(Lexer.CHAR_DIGIT_)
  126. if self.pos_ >= limit or text[self.pos_] != ".":
  127. return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
  128. self.scan_over_(".")
  129. self.scan_over_(Lexer.CHAR_DIGIT_)
  130. return (Lexer.FLOAT, float(text[start : self.pos_]), location)
  131. if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
  132. self.pos_ += 1
  133. self.scan_over_(Lexer.CHAR_DIGIT_)
  134. if self.pos_ >= limit or text[self.pos_] != ".":
  135. return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
  136. self.scan_over_(".")
  137. self.scan_over_(Lexer.CHAR_DIGIT_)
  138. return (Lexer.FLOAT, float(text[start : self.pos_]), location)
  139. if cur_char in Lexer.CHAR_SYMBOL_:
  140. self.pos_ += 1
  141. return (Lexer.SYMBOL, cur_char, location)
  142. if cur_char == '"':
  143. self.pos_ += 1
  144. self.scan_until_('"')
  145. if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
  146. self.pos_ += 1
  147. # strip newlines embedded within a string
  148. string = re.sub("[\r\n]", "", text[start + 1 : self.pos_ - 1])
  149. return (Lexer.STRING, string, location)
  150. else:
  151. raise FeatureLibError("Expected '\"' to terminate string", location)
  152. raise FeatureLibError("Unexpected character: %r" % cur_char, location)
  153. def scan_over_(self, valid):
  154. p = self.pos_
  155. while p < self.text_length_ and self.text_[p] in valid:
  156. p += 1
  157. self.pos_ = p
  158. def scan_until_(self, stop_at):
  159. p = self.pos_
  160. while p < self.text_length_ and self.text_[p] not in stop_at:
  161. p += 1
  162. self.pos_ = p
  163. def scan_anonymous_block(self, tag):
  164. location = self.location_()
  165. tag = tag.strip()
  166. self.scan_until_(Lexer.CHAR_NEWLINE_)
  167. self.scan_over_(Lexer.CHAR_NEWLINE_)
  168. regexp = r"}\s*" + tag + r"\s*;"
  169. split = re.split(regexp, self.text_[self.pos_ :], maxsplit=1)
  170. if len(split) != 2:
  171. raise FeatureLibError(
  172. "Expected '} %s;' to terminate anonymous block" % tag, location
  173. )
  174. self.pos_ += len(split[0])
  175. return (Lexer.ANONYMOUS_BLOCK, split[0], location)
  176. class IncludingLexer(object):
  177. """A Lexer that follows include statements.
  178. The OpenType feature file specification states that due to
  179. historical reasons, relative imports should be resolved in this
  180. order:
  181. 1. If the source font is UFO format, then relative to the UFO's
  182. font directory
  183. 2. relative to the top-level include file
  184. 3. relative to the parent include file
  185. We only support 1 (via includeDir) and 2.
  186. """
  187. def __init__(self, featurefile, *, includeDir=None):
  188. """Initializes an IncludingLexer.
  189. Behavior:
  190. If includeDir is passed, it will be used to determine the top-level
  191. include directory to use for all encountered include statements. If it is
  192. not passed, ``os.path.dirname(featurefile)`` will be considered the
  193. include directory.
  194. """
  195. self.lexers_ = [self.make_lexer_(featurefile)]
  196. self.featurefilepath = self.lexers_[0].filename_
  197. self.includeDir = includeDir
  198. def __iter__(self):
  199. return self
  200. def next(self): # Python 2
  201. return self.__next__()
  202. def __next__(self): # Python 3
  203. while self.lexers_:
  204. lexer = self.lexers_[-1]
  205. try:
  206. token_type, token, location = next(lexer)
  207. except StopIteration:
  208. self.lexers_.pop()
  209. continue
  210. if token_type is Lexer.NAME and token == "include":
  211. fname_type, fname_token, fname_location = lexer.next()
  212. if fname_type is not Lexer.FILENAME:
  213. raise FeatureLibError("Expected file name", fname_location)
  214. # semi_type, semi_token, semi_location = lexer.next()
  215. # if semi_type is not Lexer.SYMBOL or semi_token != ";":
  216. # raise FeatureLibError("Expected ';'", semi_location)
  217. if os.path.isabs(fname_token):
  218. path = fname_token
  219. else:
  220. if self.includeDir is not None:
  221. curpath = self.includeDir
  222. elif self.featurefilepath is not None:
  223. curpath = os.path.dirname(self.featurefilepath)
  224. else:
  225. # if the IncludingLexer was initialized from an in-memory
  226. # file-like stream, it doesn't have a 'name' pointing to
  227. # its filesystem path, therefore we fall back to using the
  228. # current working directory to resolve relative includes
  229. curpath = os.getcwd()
  230. path = os.path.join(curpath, fname_token)
  231. if len(self.lexers_) >= 5:
  232. raise FeatureLibError("Too many recursive includes", fname_location)
  233. try:
  234. self.lexers_.append(self.make_lexer_(path))
  235. except FileNotFoundError as err:
  236. raise IncludedFeaNotFound(fname_token, fname_location) from err
  237. else:
  238. return (token_type, token, location)
  239. raise StopIteration()
  240. @staticmethod
  241. def make_lexer_(file_or_path):
  242. if hasattr(file_or_path, "read"):
  243. fileobj, closing = file_or_path, False
  244. else:
  245. filename, closing = file_or_path, True
  246. fileobj = open(filename, "r", encoding="utf-8")
  247. data = fileobj.read()
  248. filename = getattr(fileobj, "name", None)
  249. if closing:
  250. fileobj.close()
  251. return Lexer(data, filename)
  252. def scan_anonymous_block(self, tag):
  253. return self.lexers_[-1].scan_anonymous_block(tag)
  254. class NonIncludingLexer(IncludingLexer):
  255. """Lexer that does not follow `include` statements, emits them as-is."""
  256. def __next__(self): # Python 3
  257. return next(self.lexers_[0])