123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727 |
- from fontTools.misc.roundTools import noRound, otRound
- from fontTools.misc.intTools import bit_count
- from fontTools.ttLib.tables import otTables as ot
- from fontTools.varLib.models import supportScalar
- from fontTools.varLib.builder import (
- buildVarRegionList,
- buildVarStore,
- buildVarRegion,
- buildVarData,
- )
- from functools import partial
- from collections import defaultdict
- from heapq import heappush, heappop
- NO_VARIATION_INDEX = ot.NO_VARIATION_INDEX
- ot.VarStore.NO_VARIATION_INDEX = NO_VARIATION_INDEX
- def _getLocationKey(loc):
- return tuple(sorted(loc.items(), key=lambda kv: kv[0]))
- class OnlineVarStoreBuilder(object):
- def __init__(self, axisTags):
- self._axisTags = axisTags
- self._regionMap = {}
- self._regionList = buildVarRegionList([], axisTags)
- self._store = buildVarStore(self._regionList, [])
- self._data = None
- self._model = None
- self._supports = None
- self._varDataIndices = {}
- self._varDataCaches = {}
- self._cache = {}
- def setModel(self, model):
- self.setSupports(model.supports)
- self._model = model
- def setSupports(self, supports):
- self._model = None
- self._supports = list(supports)
- if not self._supports[0]:
- del self._supports[0] # Drop base master support
- self._cache = {}
- self._data = None
- def finish(self, optimize=True):
- self._regionList.RegionCount = len(self._regionList.Region)
- self._store.VarDataCount = len(self._store.VarData)
- for data in self._store.VarData:
- data.ItemCount = len(data.Item)
- data.calculateNumShorts(optimize=optimize)
- return self._store
- def _add_VarData(self):
- regionMap = self._regionMap
- regionList = self._regionList
- regions = self._supports
- regionIndices = []
- for region in regions:
- key = _getLocationKey(region)
- idx = regionMap.get(key)
- if idx is None:
- varRegion = buildVarRegion(region, self._axisTags)
- idx = regionMap[key] = len(regionList.Region)
- regionList.Region.append(varRegion)
- regionIndices.append(idx)
- # Check if we have one already...
- key = tuple(regionIndices)
- varDataIdx = self._varDataIndices.get(key)
- if varDataIdx is not None:
- self._outer = varDataIdx
- self._data = self._store.VarData[varDataIdx]
- self._cache = self._varDataCaches[key]
- if len(self._data.Item) == 0xFFFF:
- # This is full. Need new one.
- varDataIdx = None
- if varDataIdx is None:
- self._data = buildVarData(regionIndices, [], optimize=False)
- self._outer = len(self._store.VarData)
- self._store.VarData.append(self._data)
- self._varDataIndices[key] = self._outer
- if key not in self._varDataCaches:
- self._varDataCaches[key] = {}
- self._cache = self._varDataCaches[key]
- def storeMasters(self, master_values, *, round=round):
- deltas = self._model.getDeltas(master_values, round=round)
- base = deltas.pop(0)
- return base, self.storeDeltas(deltas, round=noRound)
- def storeDeltas(self, deltas, *, round=round):
- deltas = [round(d) for d in deltas]
- if len(deltas) == len(self._supports) + 1:
- deltas = tuple(deltas[1:])
- else:
- assert len(deltas) == len(self._supports)
- deltas = tuple(deltas)
- varIdx = self._cache.get(deltas)
- if varIdx is not None:
- return varIdx
- if not self._data:
- self._add_VarData()
- inner = len(self._data.Item)
- if inner == 0xFFFF:
- # Full array. Start new one.
- self._add_VarData()
- return self.storeDeltas(deltas)
- self._data.addItem(deltas, round=noRound)
- varIdx = (self._outer << 16) + inner
- self._cache[deltas] = varIdx
- return varIdx
- def VarData_addItem(self, deltas, *, round=round):
- deltas = [round(d) for d in deltas]
- countUs = self.VarRegionCount
- countThem = len(deltas)
- if countUs + 1 == countThem:
- deltas = list(deltas[1:])
- else:
- assert countUs == countThem, (countUs, countThem)
- deltas = list(deltas)
- self.Item.append(deltas)
- self.ItemCount = len(self.Item)
- ot.VarData.addItem = VarData_addItem
- def VarRegion_get_support(self, fvar_axes):
- return {
- fvar_axes[i].axisTag: (reg.StartCoord, reg.PeakCoord, reg.EndCoord)
- for i, reg in enumerate(self.VarRegionAxis)
- if reg.PeakCoord != 0
- }
- ot.VarRegion.get_support = VarRegion_get_support
- def VarStore___bool__(self):
- return bool(self.VarData)
- ot.VarStore.__bool__ = VarStore___bool__
- class VarStoreInstancer(object):
- def __init__(self, varstore, fvar_axes, location={}):
- self.fvar_axes = fvar_axes
- assert varstore is None or varstore.Format == 1
- self._varData = varstore.VarData if varstore else []
- self._regions = varstore.VarRegionList.Region if varstore else []
- self.setLocation(location)
- def setLocation(self, location):
- self.location = dict(location)
- self._clearCaches()
- def _clearCaches(self):
- self._scalars = {}
- def _getScalar(self, regionIdx):
- scalar = self._scalars.get(regionIdx)
- if scalar is None:
- support = self._regions[regionIdx].get_support(self.fvar_axes)
- scalar = supportScalar(self.location, support)
- self._scalars[regionIdx] = scalar
- return scalar
- @staticmethod
- def interpolateFromDeltasAndScalars(deltas, scalars):
- delta = 0.0
- for d, s in zip(deltas, scalars):
- if not s:
- continue
- delta += d * s
- return delta
- def __getitem__(self, varidx):
- major, minor = varidx >> 16, varidx & 0xFFFF
- if varidx == NO_VARIATION_INDEX:
- return 0.0
- varData = self._varData
- scalars = [self._getScalar(ri) for ri in varData[major].VarRegionIndex]
- deltas = varData[major].Item[minor]
- return self.interpolateFromDeltasAndScalars(deltas, scalars)
- def interpolateFromDeltas(self, varDataIndex, deltas):
- varData = self._varData
- scalars = [self._getScalar(ri) for ri in varData[varDataIndex].VarRegionIndex]
- return self.interpolateFromDeltasAndScalars(deltas, scalars)
- #
- # Optimizations
- #
- # retainFirstMap - If true, major 0 mappings are retained. Deltas for unused indices are zeroed
- # advIdxes - Set of major 0 indices for advance deltas to be listed first. Other major 0 indices follow.
- def VarStore_subset_varidxes(
- self, varIdxes, optimize=True, retainFirstMap=False, advIdxes=set()
- ):
- # Sort out used varIdxes by major/minor.
- used = {}
- for varIdx in varIdxes:
- if varIdx == NO_VARIATION_INDEX:
- continue
- major = varIdx >> 16
- minor = varIdx & 0xFFFF
- d = used.get(major)
- if d is None:
- d = used[major] = set()
- d.add(minor)
- del varIdxes
- #
- # Subset VarData
- #
- varData = self.VarData
- newVarData = []
- varDataMap = {NO_VARIATION_INDEX: NO_VARIATION_INDEX}
- for major, data in enumerate(varData):
- usedMinors = used.get(major)
- if usedMinors is None:
- continue
- newMajor = len(newVarData)
- newVarData.append(data)
- items = data.Item
- newItems = []
- if major == 0 and retainFirstMap:
- for minor in range(len(items)):
- newItems.append(
- items[minor] if minor in usedMinors else [0] * len(items[minor])
- )
- varDataMap[minor] = minor
- else:
- if major == 0:
- minors = sorted(advIdxes) + sorted(usedMinors - advIdxes)
- else:
- minors = sorted(usedMinors)
- for minor in minors:
- newMinor = len(newItems)
- newItems.append(items[minor])
- varDataMap[(major << 16) + minor] = (newMajor << 16) + newMinor
- data.Item = newItems
- data.ItemCount = len(data.Item)
- data.calculateNumShorts(optimize=optimize)
- self.VarData = newVarData
- self.VarDataCount = len(self.VarData)
- self.prune_regions()
- return varDataMap
- ot.VarStore.subset_varidxes = VarStore_subset_varidxes
- def VarStore_prune_regions(self):
- """Remove unused VarRegions."""
- #
- # Subset VarRegionList
- #
- # Collect.
- usedRegions = set()
- for data in self.VarData:
- usedRegions.update(data.VarRegionIndex)
- # Subset.
- regionList = self.VarRegionList
- regions = regionList.Region
- newRegions = []
- regionMap = {}
- for i in sorted(usedRegions):
- regionMap[i] = len(newRegions)
- newRegions.append(regions[i])
- regionList.Region = newRegions
- regionList.RegionCount = len(regionList.Region)
- # Map.
- for data in self.VarData:
- data.VarRegionIndex = [regionMap[i] for i in data.VarRegionIndex]
- ot.VarStore.prune_regions = VarStore_prune_regions
- def _visit(self, func):
- """Recurse down from self, if type of an object is ot.Device,
- call func() on it. Works on otData-style classes."""
- if type(self) == ot.Device:
- func(self)
- elif isinstance(self, list):
- for that in self:
- _visit(that, func)
- elif hasattr(self, "getConverters") and not hasattr(self, "postRead"):
- for conv in self.getConverters():
- that = getattr(self, conv.name, None)
- if that is not None:
- _visit(that, func)
- elif isinstance(self, ot.ValueRecord):
- for that in self.__dict__.values():
- _visit(that, func)
- def _Device_recordVarIdx(self, s):
- """Add VarIdx in this Device table (if any) to the set s."""
- if self.DeltaFormat == 0x8000:
- s.add((self.StartSize << 16) + self.EndSize)
- def Object_collect_device_varidxes(self, varidxes):
- adder = partial(_Device_recordVarIdx, s=varidxes)
- _visit(self, adder)
- ot.GDEF.collect_device_varidxes = Object_collect_device_varidxes
- ot.GPOS.collect_device_varidxes = Object_collect_device_varidxes
- def _Device_mapVarIdx(self, mapping, done):
- """Map VarIdx in this Device table (if any) through mapping."""
- if id(self) in done:
- return
- done.add(id(self))
- if self.DeltaFormat == 0x8000:
- varIdx = mapping[(self.StartSize << 16) + self.EndSize]
- self.StartSize = varIdx >> 16
- self.EndSize = varIdx & 0xFFFF
- def Object_remap_device_varidxes(self, varidxes_map):
- mapper = partial(_Device_mapVarIdx, mapping=varidxes_map, done=set())
- _visit(self, mapper)
- ot.GDEF.remap_device_varidxes = Object_remap_device_varidxes
- ot.GPOS.remap_device_varidxes = Object_remap_device_varidxes
- class _Encoding(object):
- def __init__(self, chars):
- self.chars = chars
- self.width = bit_count(chars)
- self.columns = self._columns(chars)
- self.overhead = self._characteristic_overhead(self.columns)
- self.items = set()
- def append(self, row):
- self.items.add(row)
- def extend(self, lst):
- self.items.update(lst)
- def get_room(self):
- """Maximum number of bytes that can be added to characteristic
- while still being beneficial to merge it into another one."""
- count = len(self.items)
- return max(0, (self.overhead - 1) // count - self.width)
- room = property(get_room)
- def get_gain(self):
- """Maximum possible byte gain from merging this into another
- characteristic."""
- count = len(self.items)
- return max(0, self.overhead - count)
- gain = property(get_gain)
- def gain_sort_key(self):
- return self.gain, self.chars
- def width_sort_key(self):
- return self.width, self.chars
- @staticmethod
- def _characteristic_overhead(columns):
- """Returns overhead in bytes of encoding this characteristic
- as a VarData."""
- c = 4 + 6 # 4 bytes for LOffset, 6 bytes for VarData header
- c += bit_count(columns) * 2
- return c
- @staticmethod
- def _columns(chars):
- cols = 0
- i = 1
- while chars:
- if chars & 0b1111:
- cols |= i
- chars >>= 4
- i <<= 1
- return cols
- def gain_from_merging(self, other_encoding):
- combined_chars = other_encoding.chars | self.chars
- combined_width = bit_count(combined_chars)
- combined_columns = self.columns | other_encoding.columns
- combined_overhead = _Encoding._characteristic_overhead(combined_columns)
- combined_gain = (
- +self.overhead
- + other_encoding.overhead
- - combined_overhead
- - (combined_width - self.width) * len(self.items)
- - (combined_width - other_encoding.width) * len(other_encoding.items)
- )
- return combined_gain
- class _EncodingDict(dict):
- def __missing__(self, chars):
- r = self[chars] = _Encoding(chars)
- return r
- def add_row(self, row):
- chars = self._row_characteristics(row)
- self[chars].append(row)
- @staticmethod
- def _row_characteristics(row):
- """Returns encoding characteristics for a row."""
- longWords = False
- chars = 0
- i = 1
- for v in row:
- if v:
- chars += i
- if not (-128 <= v <= 127):
- chars += i * 0b0010
- if not (-32768 <= v <= 32767):
- longWords = True
- break
- i <<= 4
- if longWords:
- # Redo; only allow 2byte/4byte encoding
- chars = 0
- i = 1
- for v in row:
- if v:
- chars += i * 0b0011
- if not (-32768 <= v <= 32767):
- chars += i * 0b1100
- i <<= 4
- return chars
- def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1):
- """Optimize storage. Returns mapping from old VarIdxes to new ones."""
- # Overview:
- #
- # For each VarData row, we first extend it with zeroes to have
- # one column per region in VarRegionList. We then group the
- # rows into _Encoding objects, by their "characteristic" bitmap.
- # The characteristic bitmap is a binary number representing how
- # many bytes each column of the data takes up to encode. Each
- # column is encoded in four bits. For example, if a column has
- # only values in the range -128..127, it would only have a single
- # bit set in the characteristic bitmap for that column. If it has
- # values in the range -32768..32767, it would have two bits set.
- # The number of ones in the characteristic bitmap is the "width"
- # of the encoding.
- #
- # Each encoding as such has a number of "active" (ie. non-zero)
- # columns. The overhead of encoding the characteristic bitmap
- # is 10 bytes, plus 2 bytes per active column.
- #
- # When an encoding is merged into another one, if the characteristic
- # of the old encoding is a subset of the new one, then the overhead
- # of the old encoding is completely eliminated. However, each row
- # now would require more bytes to encode, to the tune of one byte
- # per characteristic bit that is active in the new encoding but not
- # in the old one. The number of bits that can be added to an encoding
- # while still beneficial to merge it into another encoding is called
- # the "room" for that encoding.
- #
- # The "gain" of an encodings is the maximum number of bytes we can
- # save by merging it into another encoding. The "gain" of merging
- # two encodings is how many bytes we save by doing so.
- #
- # High-level algorithm:
- #
- # - Each encoding has a minimal way to encode it. However, because
- # of the overhead of encoding the characteristic bitmap, it may
- # be beneficial to merge two encodings together, if there is
- # gain in doing so. As such, we need to search for the best
- # such successive merges.
- #
- # Algorithm:
- #
- # - Put all encodings into a "todo" list.
- #
- # - Sort todo list by decreasing gain (for stability).
- #
- # - Make a priority-queue of the gain from combining each two
- # encodings in the todo list. The priority queue is sorted by
- # decreasing gain. Only positive gains are included.
- #
- # - While priority queue is not empty:
- # - Pop the first item from the priority queue,
- # - Merge the two encodings it represents,
- # - Remove the two encodings from the todo list,
- # - Insert positive gains from combining the new encoding with
- # all existing todo list items into the priority queue,
- # - If a todo list item with the same characteristic bitmap as
- # the new encoding exists, remove it from the todo list and
- # merge it into the new encoding.
- # - Insert the new encoding into the todo list,
- #
- # - Encode all remaining items in the todo list.
- #
- # The output is then sorted for stability, in the following way:
- # - The VarRegionList of the input is kept intact.
- # - All encodings are sorted before the main algorithm, by
- # gain_key_sort(), which is a tuple of the following items:
- # * The gain of the encoding.
- # * The characteristic bitmap of the encoding, with higher-numbered
- # columns compared first.
- # - The VarData is sorted by width_sort_key(), which is a tuple
- # of the following items:
- # * The "width" of the encoding.
- # * The characteristic bitmap of the encoding, with higher-numbered
- # columns compared first.
- # - Within each VarData, the items are sorted as vectors of numbers.
- #
- # Finally, each VarData is optimized to remove the empty columns and
- # reorder columns as needed.
- # TODO
- # Check that no two VarRegions are the same; if they are, fold them.
- n = len(self.VarRegionList.Region) # Number of columns
- zeroes = [0] * n
- front_mapping = {} # Map from old VarIdxes to full row tuples
- encodings = _EncodingDict()
- # Collect all items into a set of full rows (with lots of zeroes.)
- for major, data in enumerate(self.VarData):
- regionIndices = data.VarRegionIndex
- for minor, item in enumerate(data.Item):
- row = list(zeroes)
- if quantization == 1:
- for regionIdx, v in zip(regionIndices, item):
- row[regionIdx] += v
- else:
- for regionIdx, v in zip(regionIndices, item):
- row[regionIdx] += (
- round(v / quantization) * quantization
- ) # TODO https://github.com/fonttools/fonttools/pull/3126#discussion_r1205439785
- row = tuple(row)
- if use_NO_VARIATION_INDEX and not any(row):
- front_mapping[(major << 16) + minor] = None
- continue
- encodings.add_row(row)
- front_mapping[(major << 16) + minor] = row
- # Prepare for the main algorithm.
- todo = sorted(encodings.values(), key=_Encoding.gain_sort_key)
- del encodings
- # Repeatedly pick two best encodings to combine, and combine them.
- heap = []
- for i, encoding in enumerate(todo):
- for j in range(i + 1, len(todo)):
- other_encoding = todo[j]
- combining_gain = encoding.gain_from_merging(other_encoding)
- if combining_gain > 0:
- heappush(heap, (-combining_gain, i, j))
- while heap:
- _, i, j = heappop(heap)
- if todo[i] is None or todo[j] is None:
- continue
- encoding, other_encoding = todo[i], todo[j]
- todo[i], todo[j] = None, None
- # Combine the two encodings
- combined_chars = other_encoding.chars | encoding.chars
- combined_encoding = _Encoding(combined_chars)
- combined_encoding.extend(encoding.items)
- combined_encoding.extend(other_encoding.items)
- for k, enc in enumerate(todo):
- if enc is None:
- continue
- # In the unlikely event that the same encoding exists already,
- # combine it.
- if enc.chars == combined_chars:
- combined_encoding.extend(enc.items)
- todo[k] = None
- continue
- combining_gain = combined_encoding.gain_from_merging(enc)
- if combining_gain > 0:
- heappush(heap, (-combining_gain, k, len(todo)))
- todo.append(combined_encoding)
- encodings = [encoding for encoding in todo if encoding is not None]
- # Assemble final store.
- back_mapping = {} # Mapping from full rows to new VarIdxes
- encodings.sort(key=_Encoding.width_sort_key)
- self.VarData = []
- for encoding in encodings:
- items = sorted(encoding.items)
- while items:
- major = len(self.VarData)
- data = ot.VarData()
- self.VarData.append(data)
- data.VarRegionIndex = range(n)
- data.VarRegionCount = len(data.VarRegionIndex)
- # Each major can only encode up to 0xFFFF entries.
- data.Item, items = items[:0xFFFF], items[0xFFFF:]
- for minor, item in enumerate(data.Item):
- back_mapping[item] = (major << 16) + minor
- # Compile final mapping.
- varidx_map = {NO_VARIATION_INDEX: NO_VARIATION_INDEX}
- for k, v in front_mapping.items():
- varidx_map[k] = back_mapping[v] if v is not None else NO_VARIATION_INDEX
- # Recalculate things and go home.
- self.VarRegionList.RegionCount = len(self.VarRegionList.Region)
- self.VarDataCount = len(self.VarData)
- for data in self.VarData:
- data.ItemCount = len(data.Item)
- data.optimize()
- # Remove unused regions.
- self.prune_regions()
- return varidx_map
- ot.VarStore.optimize = VarStore_optimize
- def main(args=None):
- """Optimize a font's GDEF variation store"""
- from argparse import ArgumentParser
- from fontTools import configLogger
- from fontTools.ttLib import TTFont
- from fontTools.ttLib.tables.otBase import OTTableWriter
- parser = ArgumentParser(prog="varLib.varStore", description=main.__doc__)
- parser.add_argument("--quantization", type=int, default=1)
- parser.add_argument("fontfile")
- parser.add_argument("outfile", nargs="?")
- options = parser.parse_args(args)
- # TODO: allow user to configure logging via command-line options
- configLogger(level="INFO")
- quantization = options.quantization
- fontfile = options.fontfile
- outfile = options.outfile
- font = TTFont(fontfile)
- gdef = font["GDEF"]
- store = gdef.table.VarStore
- writer = OTTableWriter()
- store.compile(writer, font)
- size = len(writer.getAllData())
- print("Before: %7d bytes" % size)
- varidx_map = store.optimize(quantization=quantization)
- writer = OTTableWriter()
- store.compile(writer, font)
- size = len(writer.getAllData())
- print("After: %7d bytes" % size)
- if outfile is not None:
- gdef.table.remap_device_varidxes(varidx_map)
- if "GPOS" in font:
- font["GPOS"].table.remap_device_varidxes(varidx_map)
- font.save(outfile)
- if __name__ == "__main__":
- import sys
- if len(sys.argv) > 1:
- sys.exit(main())
- import doctest
- sys.exit(doctest.testmod().failed)
|