123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291 |
- from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound
- from fontTools.feaLib.location import FeatureLibLocation
- import re
- import os
- try:
- import cython
- except ImportError:
- # if cython not installed, use mock module with no-op decorators and types
- from fontTools.misc import cython
- class Lexer(object):
- NUMBER = "NUMBER"
- HEXADECIMAL = "HEXADECIMAL"
- OCTAL = "OCTAL"
- NUMBERS = (NUMBER, HEXADECIMAL, OCTAL)
- FLOAT = "FLOAT"
- STRING = "STRING"
- NAME = "NAME"
- FILENAME = "FILENAME"
- GLYPHCLASS = "GLYPHCLASS"
- CID = "CID"
- SYMBOL = "SYMBOL"
- COMMENT = "COMMENT"
- NEWLINE = "NEWLINE"
- ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK"
- CHAR_WHITESPACE_ = " \t"
- CHAR_NEWLINE_ = "\r\n"
- CHAR_SYMBOL_ = ",;:-+'{}[]<>()="
- CHAR_DIGIT_ = "0123456789"
- CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
- CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
- CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
- CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"
- RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$")
- MODE_NORMAL_ = "NORMAL"
- MODE_FILENAME_ = "FILENAME"
- def __init__(self, text, filename):
- self.filename_ = filename
- self.line_ = 1
- self.pos_ = 0
- self.line_start_ = 0
- self.text_ = text
- self.text_length_ = len(text)
- self.mode_ = Lexer.MODE_NORMAL_
- def __iter__(self):
- return self
- def next(self): # Python 2
- return self.__next__()
- def __next__(self): # Python 3
- while True:
- token_type, token, location = self.next_()
- if token_type != Lexer.NEWLINE:
- return (token_type, token, location)
- def location_(self):
- column = self.pos_ - self.line_start_ + 1
- return FeatureLibLocation(self.filename_ or "<features>", self.line_, column)
- def next_(self):
- self.scan_over_(Lexer.CHAR_WHITESPACE_)
- location = self.location_()
- start = self.pos_
- text = self.text_
- limit = len(text)
- if start >= limit:
- raise StopIteration()
- cur_char = text[start]
- next_char = text[start + 1] if start + 1 < limit else None
- if cur_char == "\n":
- self.pos_ += 1
- self.line_ += 1
- self.line_start_ = self.pos_
- return (Lexer.NEWLINE, None, location)
- if cur_char == "\r":
- self.pos_ += 2 if next_char == "\n" else 1
- self.line_ += 1
- self.line_start_ = self.pos_
- return (Lexer.NEWLINE, None, location)
- if cur_char == "#":
- self.scan_until_(Lexer.CHAR_NEWLINE_)
- return (Lexer.COMMENT, text[start : self.pos_], location)
- if self.mode_ is Lexer.MODE_FILENAME_:
- if cur_char != "(":
- raise FeatureLibError("Expected '(' before file name", location)
- self.scan_until_(")")
- cur_char = text[self.pos_] if self.pos_ < limit else None
- if cur_char != ")":
- raise FeatureLibError("Expected ')' after file name", location)
- self.pos_ += 1
- self.mode_ = Lexer.MODE_NORMAL_
- return (Lexer.FILENAME, text[start + 1 : self.pos_ - 1], location)
- if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
- self.pos_ += 1
- self.scan_over_(Lexer.CHAR_DIGIT_)
- return (Lexer.CID, int(text[start + 1 : self.pos_], 10), location)
- if cur_char == "@":
- self.pos_ += 1
- self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
- glyphclass = text[start + 1 : self.pos_]
- if len(glyphclass) < 1:
- raise FeatureLibError("Expected glyph class name", location)
- if len(glyphclass) > 63:
- raise FeatureLibError(
- "Glyph class names must not be longer than 63 characters", location
- )
- if not Lexer.RE_GLYPHCLASS.match(glyphclass):
- raise FeatureLibError(
- "Glyph class names must consist of letters, digits, "
- "underscore, period or hyphen",
- location,
- )
- return (Lexer.GLYPHCLASS, glyphclass, location)
- if cur_char in Lexer.CHAR_NAME_START_:
- self.pos_ += 1
- self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
- token = text[start : self.pos_]
- if token == "include":
- self.mode_ = Lexer.MODE_FILENAME_
- return (Lexer.NAME, token, location)
- if cur_char == "0" and next_char in "xX":
- self.pos_ += 2
- self.scan_over_(Lexer.CHAR_HEXDIGIT_)
- return (Lexer.HEXADECIMAL, int(text[start : self.pos_], 16), location)
- if cur_char == "0" and next_char in Lexer.CHAR_DIGIT_:
- self.scan_over_(Lexer.CHAR_DIGIT_)
- return (Lexer.OCTAL, int(text[start : self.pos_], 8), location)
- if cur_char in Lexer.CHAR_DIGIT_:
- self.scan_over_(Lexer.CHAR_DIGIT_)
- if self.pos_ >= limit or text[self.pos_] != ".":
- return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
- self.scan_over_(".")
- self.scan_over_(Lexer.CHAR_DIGIT_)
- return (Lexer.FLOAT, float(text[start : self.pos_]), location)
- if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
- self.pos_ += 1
- self.scan_over_(Lexer.CHAR_DIGIT_)
- if self.pos_ >= limit or text[self.pos_] != ".":
- return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
- self.scan_over_(".")
- self.scan_over_(Lexer.CHAR_DIGIT_)
- return (Lexer.FLOAT, float(text[start : self.pos_]), location)
- if cur_char in Lexer.CHAR_SYMBOL_:
- self.pos_ += 1
- return (Lexer.SYMBOL, cur_char, location)
- if cur_char == '"':
- self.pos_ += 1
- self.scan_until_('"')
- if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
- self.pos_ += 1
- # strip newlines embedded within a string
- string = re.sub("[\r\n]", "", text[start + 1 : self.pos_ - 1])
- return (Lexer.STRING, string, location)
- else:
- raise FeatureLibError("Expected '\"' to terminate string", location)
- raise FeatureLibError("Unexpected character: %r" % cur_char, location)
- def scan_over_(self, valid):
- p = self.pos_
- while p < self.text_length_ and self.text_[p] in valid:
- p += 1
- self.pos_ = p
- def scan_until_(self, stop_at):
- p = self.pos_
- while p < self.text_length_ and self.text_[p] not in stop_at:
- p += 1
- self.pos_ = p
- def scan_anonymous_block(self, tag):
- location = self.location_()
- tag = tag.strip()
- self.scan_until_(Lexer.CHAR_NEWLINE_)
- self.scan_over_(Lexer.CHAR_NEWLINE_)
- regexp = r"}\s*" + tag + r"\s*;"
- split = re.split(regexp, self.text_[self.pos_ :], maxsplit=1)
- if len(split) != 2:
- raise FeatureLibError(
- "Expected '} %s;' to terminate anonymous block" % tag, location
- )
- self.pos_ += len(split[0])
- return (Lexer.ANONYMOUS_BLOCK, split[0], location)
- class IncludingLexer(object):
- """A Lexer that follows include statements.
- The OpenType feature file specification states that due to
- historical reasons, relative imports should be resolved in this
- order:
- 1. If the source font is UFO format, then relative to the UFO's
- font directory
- 2. relative to the top-level include file
- 3. relative to the parent include file
- We only support 1 (via includeDir) and 2.
- """
- def __init__(self, featurefile, *, includeDir=None):
- """Initializes an IncludingLexer.
- Behavior:
- If includeDir is passed, it will be used to determine the top-level
- include directory to use for all encountered include statements. If it is
- not passed, ``os.path.dirname(featurefile)`` will be considered the
- include directory.
- """
- self.lexers_ = [self.make_lexer_(featurefile)]
- self.featurefilepath = self.lexers_[0].filename_
- self.includeDir = includeDir
- def __iter__(self):
- return self
- def next(self): # Python 2
- return self.__next__()
- def __next__(self): # Python 3
- while self.lexers_:
- lexer = self.lexers_[-1]
- try:
- token_type, token, location = next(lexer)
- except StopIteration:
- self.lexers_.pop()
- continue
- if token_type is Lexer.NAME and token == "include":
- fname_type, fname_token, fname_location = lexer.next()
- if fname_type is not Lexer.FILENAME:
- raise FeatureLibError("Expected file name", fname_location)
- # semi_type, semi_token, semi_location = lexer.next()
- # if semi_type is not Lexer.SYMBOL or semi_token != ";":
- # raise FeatureLibError("Expected ';'", semi_location)
- if os.path.isabs(fname_token):
- path = fname_token
- else:
- if self.includeDir is not None:
- curpath = self.includeDir
- elif self.featurefilepath is not None:
- curpath = os.path.dirname(self.featurefilepath)
- else:
- # if the IncludingLexer was initialized from an in-memory
- # file-like stream, it doesn't have a 'name' pointing to
- # its filesystem path, therefore we fall back to using the
- # current working directory to resolve relative includes
- curpath = os.getcwd()
- path = os.path.join(curpath, fname_token)
- if len(self.lexers_) >= 5:
- raise FeatureLibError("Too many recursive includes", fname_location)
- try:
- self.lexers_.append(self.make_lexer_(path))
- except FileNotFoundError as err:
- raise IncludedFeaNotFound(fname_token, fname_location) from err
- else:
- return (token_type, token, location)
- raise StopIteration()
- @staticmethod
- def make_lexer_(file_or_path):
- if hasattr(file_or_path, "read"):
- fileobj, closing = file_or_path, False
- else:
- filename, closing = file_or_path, True
- fileobj = open(filename, "r", encoding="utf-8")
- data = fileobj.read()
- filename = getattr(fileobj, "name", None)
- if closing:
- fileobj.close()
- return Lexer(data, filename)
- def scan_anonymous_block(self, tag):
- return self.lexers_[-1].scan_anonymous_block(tag)
- class NonIncludingLexer(IncludingLexer):
- """Lexer that does not follow `include` statements, emits them as-is."""
- def __next__(self): # Python 3
- return next(self.lexers_[0])
|