123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656 |
- import fontTools.voltLib.ast as ast
- from fontTools.voltLib.lexer import Lexer
- from fontTools.voltLib.error import VoltLibError
- from io import open
- PARSE_FUNCS = {
- "DEF_GLYPH": "parse_def_glyph_",
- "DEF_GROUP": "parse_def_group_",
- "DEF_SCRIPT": "parse_def_script_",
- "DEF_LOOKUP": "parse_def_lookup_",
- "DEF_ANCHOR": "parse_def_anchor_",
- "GRID_PPEM": "parse_ppem_",
- "PRESENTATION_PPEM": "parse_ppem_",
- "PPOSITIONING_PPEM": "parse_ppem_",
- "COMPILER_USEEXTENSIONLOOKUPS": "parse_noarg_option_",
- "COMPILER_USEPAIRPOSFORMAT2": "parse_noarg_option_",
- "CMAP_FORMAT": "parse_cmap_format",
- "DO_NOT_TOUCH_CMAP": "parse_noarg_option_",
- }
- class Parser(object):
- def __init__(self, path):
- self.doc_ = ast.VoltFile()
- self.glyphs_ = OrderedSymbolTable()
- self.groups_ = SymbolTable()
- self.anchors_ = {} # dictionary of SymbolTable() keyed by glyph
- self.scripts_ = SymbolTable()
- self.langs_ = SymbolTable()
- self.lookups_ = SymbolTable()
- self.next_token_type_, self.next_token_ = (None, None)
- self.next_token_location_ = None
- self.make_lexer_(path)
- self.advance_lexer_()
- def make_lexer_(self, file_or_path):
- if hasattr(file_or_path, "read"):
- filename = getattr(file_or_path, "name", None)
- data = file_or_path.read()
- else:
- filename = file_or_path
- with open(file_or_path, "r") as f:
- data = f.read()
- self.lexer_ = Lexer(data, filename)
- def parse(self):
- statements = self.doc_.statements
- while self.next_token_type_ is not None:
- self.advance_lexer_()
- if self.cur_token_ in PARSE_FUNCS.keys():
- func = getattr(self, PARSE_FUNCS[self.cur_token_])
- statements.append(func())
- elif self.is_cur_keyword_("END"):
- break
- else:
- raise VoltLibError(
- "Expected " + ", ".join(sorted(PARSE_FUNCS.keys())),
- self.cur_token_location_,
- )
- return self.doc_
- def parse_def_glyph_(self):
- assert self.is_cur_keyword_("DEF_GLYPH")
- location = self.cur_token_location_
- name = self.expect_string_()
- self.expect_keyword_("ID")
- gid = self.expect_number_()
- if gid < 0:
- raise VoltLibError("Invalid glyph ID", self.cur_token_location_)
- gunicode = None
- if self.next_token_ == "UNICODE":
- self.expect_keyword_("UNICODE")
- gunicode = [self.expect_number_()]
- if gunicode[0] < 0:
- raise VoltLibError("Invalid glyph UNICODE", self.cur_token_location_)
- elif self.next_token_ == "UNICODEVALUES":
- self.expect_keyword_("UNICODEVALUES")
- gunicode = self.parse_unicode_values_()
- gtype = None
- if self.next_token_ == "TYPE":
- self.expect_keyword_("TYPE")
- gtype = self.expect_name_()
- assert gtype in ("BASE", "LIGATURE", "MARK", "COMPONENT")
- components = None
- if self.next_token_ == "COMPONENTS":
- self.expect_keyword_("COMPONENTS")
- components = self.expect_number_()
- self.expect_keyword_("END_GLYPH")
- if self.glyphs_.resolve(name) is not None:
- raise VoltLibError(
- 'Glyph "%s" (gid %i) already defined' % (name, gid), location
- )
- def_glyph = ast.GlyphDefinition(
- name, gid, gunicode, gtype, components, location=location
- )
- self.glyphs_.define(name, def_glyph)
- return def_glyph
- def parse_def_group_(self):
- assert self.is_cur_keyword_("DEF_GROUP")
- location = self.cur_token_location_
- name = self.expect_string_()
- enum = None
- if self.next_token_ == "ENUM":
- enum = self.parse_enum_()
- self.expect_keyword_("END_GROUP")
- if self.groups_.resolve(name) is not None:
- raise VoltLibError(
- 'Glyph group "%s" already defined, '
- "group names are case insensitive" % name,
- location,
- )
- def_group = ast.GroupDefinition(name, enum, location=location)
- self.groups_.define(name, def_group)
- return def_group
- def parse_def_script_(self):
- assert self.is_cur_keyword_("DEF_SCRIPT")
- location = self.cur_token_location_
- name = None
- if self.next_token_ == "NAME":
- self.expect_keyword_("NAME")
- name = self.expect_string_()
- self.expect_keyword_("TAG")
- tag = self.expect_string_()
- if self.scripts_.resolve(tag) is not None:
- raise VoltLibError(
- 'Script "%s" already defined, '
- "script tags are case insensitive" % tag,
- location,
- )
- self.langs_.enter_scope()
- langs = []
- while self.next_token_ != "END_SCRIPT":
- self.advance_lexer_()
- lang = self.parse_langsys_()
- self.expect_keyword_("END_LANGSYS")
- if self.langs_.resolve(lang.tag) is not None:
- raise VoltLibError(
- 'Language "%s" already defined in script "%s", '
- "language tags are case insensitive" % (lang.tag, tag),
- location,
- )
- self.langs_.define(lang.tag, lang)
- langs.append(lang)
- self.expect_keyword_("END_SCRIPT")
- self.langs_.exit_scope()
- def_script = ast.ScriptDefinition(name, tag, langs, location=location)
- self.scripts_.define(tag, def_script)
- return def_script
- def parse_langsys_(self):
- assert self.is_cur_keyword_("DEF_LANGSYS")
- location = self.cur_token_location_
- name = None
- if self.next_token_ == "NAME":
- self.expect_keyword_("NAME")
- name = self.expect_string_()
- self.expect_keyword_("TAG")
- tag = self.expect_string_()
- features = []
- while self.next_token_ != "END_LANGSYS":
- self.advance_lexer_()
- feature = self.parse_feature_()
- self.expect_keyword_("END_FEATURE")
- features.append(feature)
- def_langsys = ast.LangSysDefinition(name, tag, features, location=location)
- return def_langsys
- def parse_feature_(self):
- assert self.is_cur_keyword_("DEF_FEATURE")
- location = self.cur_token_location_
- self.expect_keyword_("NAME")
- name = self.expect_string_()
- self.expect_keyword_("TAG")
- tag = self.expect_string_()
- lookups = []
- while self.next_token_ != "END_FEATURE":
- # self.advance_lexer_()
- self.expect_keyword_("LOOKUP")
- lookup = self.expect_string_()
- lookups.append(lookup)
- feature = ast.FeatureDefinition(name, tag, lookups, location=location)
- return feature
- def parse_def_lookup_(self):
- assert self.is_cur_keyword_("DEF_LOOKUP")
- location = self.cur_token_location_
- name = self.expect_string_()
- if not name[0].isalpha():
- raise VoltLibError(
- 'Lookup name "%s" must start with a letter' % name, location
- )
- if self.lookups_.resolve(name) is not None:
- raise VoltLibError(
- 'Lookup "%s" already defined, '
- "lookup names are case insensitive" % name,
- location,
- )
- process_base = True
- if self.next_token_ == "PROCESS_BASE":
- self.advance_lexer_()
- elif self.next_token_ == "SKIP_BASE":
- self.advance_lexer_()
- process_base = False
- process_marks = True
- mark_glyph_set = None
- if self.next_token_ == "PROCESS_MARKS":
- self.advance_lexer_()
- if self.next_token_ == "MARK_GLYPH_SET":
- self.advance_lexer_()
- mark_glyph_set = self.expect_string_()
- elif self.next_token_ == "ALL":
- self.advance_lexer_()
- elif self.next_token_ == "NONE":
- self.advance_lexer_()
- process_marks = False
- elif self.next_token_type_ == Lexer.STRING:
- process_marks = self.expect_string_()
- else:
- raise VoltLibError(
- "Expected ALL, NONE, MARK_GLYPH_SET or an ID. "
- "Got %s" % (self.next_token_type_),
- location,
- )
- elif self.next_token_ == "SKIP_MARKS":
- self.advance_lexer_()
- process_marks = False
- direction = None
- if self.next_token_ == "DIRECTION":
- self.expect_keyword_("DIRECTION")
- direction = self.expect_name_()
- assert direction in ("LTR", "RTL")
- reversal = None
- if self.next_token_ == "REVERSAL":
- self.expect_keyword_("REVERSAL")
- reversal = True
- comments = None
- if self.next_token_ == "COMMENTS":
- self.expect_keyword_("COMMENTS")
- comments = self.expect_string_().replace(r"\n", "\n")
- context = []
- while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"):
- context = self.parse_context_()
- as_pos_or_sub = self.expect_name_()
- sub = None
- pos = None
- if as_pos_or_sub == "AS_SUBSTITUTION":
- sub = self.parse_substitution_(reversal)
- elif as_pos_or_sub == "AS_POSITION":
- pos = self.parse_position_()
- else:
- raise VoltLibError(
- "Expected AS_SUBSTITUTION or AS_POSITION. " "Got %s" % (as_pos_or_sub),
- location,
- )
- def_lookup = ast.LookupDefinition(
- name,
- process_base,
- process_marks,
- mark_glyph_set,
- direction,
- reversal,
- comments,
- context,
- sub,
- pos,
- location=location,
- )
- self.lookups_.define(name, def_lookup)
- return def_lookup
- def parse_context_(self):
- location = self.cur_token_location_
- contexts = []
- while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"):
- side = None
- coverage = None
- ex_or_in = self.expect_name_()
- # side_contexts = [] # XXX
- if self.next_token_ != "END_CONTEXT":
- left = []
- right = []
- while self.next_token_ in ("LEFT", "RIGHT"):
- side = self.expect_name_()
- coverage = self.parse_coverage_()
- if side == "LEFT":
- left.append(coverage)
- else:
- right.append(coverage)
- self.expect_keyword_("END_CONTEXT")
- context = ast.ContextDefinition(
- ex_or_in, left, right, location=location
- )
- contexts.append(context)
- else:
- self.expect_keyword_("END_CONTEXT")
- return contexts
- def parse_substitution_(self, reversal):
- assert self.is_cur_keyword_("AS_SUBSTITUTION")
- location = self.cur_token_location_
- src = []
- dest = []
- if self.next_token_ != "SUB":
- raise VoltLibError("Expected SUB", location)
- while self.next_token_ == "SUB":
- self.expect_keyword_("SUB")
- src.append(self.parse_coverage_())
- self.expect_keyword_("WITH")
- dest.append(self.parse_coverage_())
- self.expect_keyword_("END_SUB")
- self.expect_keyword_("END_SUBSTITUTION")
- max_src = max([len(cov) for cov in src])
- max_dest = max([len(cov) for cov in dest])
- # many to many or mixed is invalid
- if (max_src > 1 and max_dest > 1) or (
- reversal and (max_src > 1 or max_dest > 1)
- ):
- raise VoltLibError("Invalid substitution type", location)
- mapping = dict(zip(tuple(src), tuple(dest)))
- if max_src == 1 and max_dest == 1:
- if reversal:
- sub = ast.SubstitutionReverseChainingSingleDefinition(
- mapping, location=location
- )
- else:
- sub = ast.SubstitutionSingleDefinition(mapping, location=location)
- elif max_src == 1 and max_dest > 1:
- sub = ast.SubstitutionMultipleDefinition(mapping, location=location)
- elif max_src > 1 and max_dest == 1:
- sub = ast.SubstitutionLigatureDefinition(mapping, location=location)
- return sub
- def parse_position_(self):
- assert self.is_cur_keyword_("AS_POSITION")
- location = self.cur_token_location_
- pos_type = self.expect_name_()
- if pos_type not in ("ATTACH", "ATTACH_CURSIVE", "ADJUST_PAIR", "ADJUST_SINGLE"):
- raise VoltLibError(
- "Expected ATTACH, ATTACH_CURSIVE, ADJUST_PAIR, ADJUST_SINGLE", location
- )
- if pos_type == "ATTACH":
- position = self.parse_attach_()
- elif pos_type == "ATTACH_CURSIVE":
- position = self.parse_attach_cursive_()
- elif pos_type == "ADJUST_PAIR":
- position = self.parse_adjust_pair_()
- elif pos_type == "ADJUST_SINGLE":
- position = self.parse_adjust_single_()
- self.expect_keyword_("END_POSITION")
- return position
- def parse_attach_(self):
- assert self.is_cur_keyword_("ATTACH")
- location = self.cur_token_location_
- coverage = self.parse_coverage_()
- coverage_to = []
- self.expect_keyword_("TO")
- while self.next_token_ != "END_ATTACH":
- cov = self.parse_coverage_()
- self.expect_keyword_("AT")
- self.expect_keyword_("ANCHOR")
- anchor_name = self.expect_string_()
- coverage_to.append((cov, anchor_name))
- self.expect_keyword_("END_ATTACH")
- position = ast.PositionAttachDefinition(
- coverage, coverage_to, location=location
- )
- return position
- def parse_attach_cursive_(self):
- assert self.is_cur_keyword_("ATTACH_CURSIVE")
- location = self.cur_token_location_
- coverages_exit = []
- coverages_enter = []
- while self.next_token_ != "ENTER":
- self.expect_keyword_("EXIT")
- coverages_exit.append(self.parse_coverage_())
- while self.next_token_ != "END_ATTACH":
- self.expect_keyword_("ENTER")
- coverages_enter.append(self.parse_coverage_())
- self.expect_keyword_("END_ATTACH")
- position = ast.PositionAttachCursiveDefinition(
- coverages_exit, coverages_enter, location=location
- )
- return position
- def parse_adjust_pair_(self):
- assert self.is_cur_keyword_("ADJUST_PAIR")
- location = self.cur_token_location_
- coverages_1 = []
- coverages_2 = []
- adjust_pair = {}
- while self.next_token_ == "FIRST":
- self.advance_lexer_()
- coverage_1 = self.parse_coverage_()
- coverages_1.append(coverage_1)
- while self.next_token_ == "SECOND":
- self.advance_lexer_()
- coverage_2 = self.parse_coverage_()
- coverages_2.append(coverage_2)
- while self.next_token_ != "END_ADJUST":
- id_1 = self.expect_number_()
- id_2 = self.expect_number_()
- self.expect_keyword_("BY")
- pos_1 = self.parse_pos_()
- pos_2 = self.parse_pos_()
- adjust_pair[(id_1, id_2)] = (pos_1, pos_2)
- self.expect_keyword_("END_ADJUST")
- position = ast.PositionAdjustPairDefinition(
- coverages_1, coverages_2, adjust_pair, location=location
- )
- return position
- def parse_adjust_single_(self):
- assert self.is_cur_keyword_("ADJUST_SINGLE")
- location = self.cur_token_location_
- adjust_single = []
- while self.next_token_ != "END_ADJUST":
- coverages = self.parse_coverage_()
- self.expect_keyword_("BY")
- pos = self.parse_pos_()
- adjust_single.append((coverages, pos))
- self.expect_keyword_("END_ADJUST")
- position = ast.PositionAdjustSingleDefinition(adjust_single, location=location)
- return position
- def parse_def_anchor_(self):
- assert self.is_cur_keyword_("DEF_ANCHOR")
- location = self.cur_token_location_
- name = self.expect_string_()
- self.expect_keyword_("ON")
- gid = self.expect_number_()
- self.expect_keyword_("GLYPH")
- glyph_name = self.expect_name_()
- self.expect_keyword_("COMPONENT")
- component = self.expect_number_()
- # check for duplicate anchor names on this glyph
- if glyph_name in self.anchors_:
- anchor = self.anchors_[glyph_name].resolve(name)
- if anchor is not None and anchor.component == component:
- raise VoltLibError(
- 'Anchor "%s" already defined, '
- "anchor names are case insensitive" % name,
- location,
- )
- if self.next_token_ == "LOCKED":
- locked = True
- self.advance_lexer_()
- else:
- locked = False
- self.expect_keyword_("AT")
- pos = self.parse_pos_()
- self.expect_keyword_("END_ANCHOR")
- anchor = ast.AnchorDefinition(
- name, gid, glyph_name, component, locked, pos, location=location
- )
- if glyph_name not in self.anchors_:
- self.anchors_[glyph_name] = SymbolTable()
- self.anchors_[glyph_name].define(name, anchor)
- return anchor
- def parse_adjust_by_(self):
- self.advance_lexer_()
- assert self.is_cur_keyword_("ADJUST_BY")
- adjustment = self.expect_number_()
- self.expect_keyword_("AT")
- size = self.expect_number_()
- return adjustment, size
- def parse_pos_(self):
- # VOLT syntax doesn't seem to take device Y advance
- self.advance_lexer_()
- location = self.cur_token_location_
- assert self.is_cur_keyword_("POS"), location
- adv = None
- dx = None
- dy = None
- adv_adjust_by = {}
- dx_adjust_by = {}
- dy_adjust_by = {}
- if self.next_token_ == "ADV":
- self.advance_lexer_()
- adv = self.expect_number_()
- while self.next_token_ == "ADJUST_BY":
- adjustment, size = self.parse_adjust_by_()
- adv_adjust_by[size] = adjustment
- if self.next_token_ == "DX":
- self.advance_lexer_()
- dx = self.expect_number_()
- while self.next_token_ == "ADJUST_BY":
- adjustment, size = self.parse_adjust_by_()
- dx_adjust_by[size] = adjustment
- if self.next_token_ == "DY":
- self.advance_lexer_()
- dy = self.expect_number_()
- while self.next_token_ == "ADJUST_BY":
- adjustment, size = self.parse_adjust_by_()
- dy_adjust_by[size] = adjustment
- self.expect_keyword_("END_POS")
- return ast.Pos(adv, dx, dy, adv_adjust_by, dx_adjust_by, dy_adjust_by)
- def parse_unicode_values_(self):
- location = self.cur_token_location_
- try:
- unicode_values = self.expect_string_().split(",")
- unicode_values = [int(uni[2:], 16) for uni in unicode_values if uni != ""]
- except ValueError as err:
- raise VoltLibError(str(err), location)
- return unicode_values if unicode_values != [] else None
- def parse_enum_(self):
- self.expect_keyword_("ENUM")
- location = self.cur_token_location_
- enum = ast.Enum(self.parse_coverage_(), location=location)
- self.expect_keyword_("END_ENUM")
- return enum
- def parse_coverage_(self):
- coverage = []
- location = self.cur_token_location_
- while self.next_token_ in ("GLYPH", "GROUP", "RANGE", "ENUM"):
- if self.next_token_ == "ENUM":
- enum = self.parse_enum_()
- coverage.append(enum)
- elif self.next_token_ == "GLYPH":
- self.expect_keyword_("GLYPH")
- name = self.expect_string_()
- coverage.append(ast.GlyphName(name, location=location))
- elif self.next_token_ == "GROUP":
- self.expect_keyword_("GROUP")
- name = self.expect_string_()
- coverage.append(ast.GroupName(name, self, location=location))
- elif self.next_token_ == "RANGE":
- self.expect_keyword_("RANGE")
- start = self.expect_string_()
- self.expect_keyword_("TO")
- end = self.expect_string_()
- coverage.append(ast.Range(start, end, self, location=location))
- return tuple(coverage)
- def resolve_group(self, group_name):
- return self.groups_.resolve(group_name)
- def glyph_range(self, start, end):
- return self.glyphs_.range(start, end)
- def parse_ppem_(self):
- location = self.cur_token_location_
- ppem_name = self.cur_token_
- value = self.expect_number_()
- setting = ast.SettingDefinition(ppem_name, value, location=location)
- return setting
- def parse_noarg_option_(self):
- location = self.cur_token_location_
- name = self.cur_token_
- value = True
- setting = ast.SettingDefinition(name, value, location=location)
- return setting
- def parse_cmap_format(self):
- location = self.cur_token_location_
- name = self.cur_token_
- value = (self.expect_number_(), self.expect_number_(), self.expect_number_())
- setting = ast.SettingDefinition(name, value, location=location)
- return setting
- def is_cur_keyword_(self, k):
- return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)
- def expect_string_(self):
- self.advance_lexer_()
- if self.cur_token_type_ is not Lexer.STRING:
- raise VoltLibError("Expected a string", self.cur_token_location_)
- return self.cur_token_
- def expect_keyword_(self, keyword):
- self.advance_lexer_()
- if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword:
- return self.cur_token_
- raise VoltLibError('Expected "%s"' % keyword, self.cur_token_location_)
- def expect_name_(self):
- self.advance_lexer_()
- if self.cur_token_type_ is Lexer.NAME:
- return self.cur_token_
- raise VoltLibError("Expected a name", self.cur_token_location_)
- def expect_number_(self):
- self.advance_lexer_()
- if self.cur_token_type_ is not Lexer.NUMBER:
- raise VoltLibError("Expected a number", self.cur_token_location_)
- return self.cur_token_
- def advance_lexer_(self):
- self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
- self.next_token_type_,
- self.next_token_,
- self.next_token_location_,
- )
- try:
- if self.is_cur_keyword_("END"):
- raise StopIteration
- (
- self.next_token_type_,
- self.next_token_,
- self.next_token_location_,
- ) = self.lexer_.next()
- except StopIteration:
- self.next_token_type_, self.next_token_ = (None, None)
- class SymbolTable(object):
- def __init__(self):
- self.scopes_ = [{}]
- def enter_scope(self):
- self.scopes_.append({})
- def exit_scope(self):
- self.scopes_.pop()
- def define(self, name, item):
- self.scopes_[-1][name] = item
- def resolve(self, name, case_insensitive=True):
- for scope in reversed(self.scopes_):
- item = scope.get(name)
- if item:
- return item
- if case_insensitive:
- for key in scope:
- if key.lower() == name.lower():
- return scope[key]
- return None
- class OrderedSymbolTable(SymbolTable):
- def __init__(self):
- self.scopes_ = [{}]
- def enter_scope(self):
- self.scopes_.append({})
- def resolve(self, name, case_insensitive=False):
- SymbolTable.resolve(self, name, case_insensitive=case_insensitive)
- def range(self, start, end):
- for scope in reversed(self.scopes_):
- if start in scope and end in scope:
- start_idx = list(scope.keys()).index(start)
- end_idx = list(scope.keys()).index(end)
- return list(scope.keys())[start_idx : end_idx + 1]
- return None
|