gpos.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. import logging
  2. import os
  3. from collections import defaultdict, namedtuple
  4. from functools import reduce
  5. from itertools import chain
  6. from math import log2
  7. from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple
  8. from fontTools.config import OPTIONS
  9. from fontTools.misc.intTools import bit_count, bit_indices
  10. from fontTools.ttLib import TTFont
  11. from fontTools.ttLib.tables import otBase, otTables
  12. log = logging.getLogger(__name__)
  13. COMPRESSION_LEVEL = OPTIONS[f"{__name__}:COMPRESSION_LEVEL"]
  14. # Kept because ufo2ft depends on it, to be removed once ufo2ft uses the config instead
  15. # https://github.com/fonttools/fonttools/issues/2592
  16. GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE"
  17. GPOS_COMPACT_MODE_DEFAULT = str(COMPRESSION_LEVEL.default)
  18. def _compression_level_from_env() -> int:
  19. env_level = GPOS_COMPACT_MODE_DEFAULT
  20. if GPOS_COMPACT_MODE_ENV_KEY in os.environ:
  21. import warnings
  22. warnings.warn(
  23. f"'{GPOS_COMPACT_MODE_ENV_KEY}' environment variable is deprecated. "
  24. "Please set the 'fontTools.otlLib.optimize.gpos:COMPRESSION_LEVEL' option "
  25. "in TTFont.cfg.",
  26. DeprecationWarning,
  27. )
  28. env_level = os.environ[GPOS_COMPACT_MODE_ENV_KEY]
  29. if len(env_level) == 1 and env_level in "0123456789":
  30. return int(env_level)
  31. raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={env_level}")
  32. def compact(font: TTFont, level: int) -> TTFont:
  33. # Ideal plan:
  34. # 1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable
  35. # https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable
  36. # 2. Extract glyph-glyph kerning and class-kerning from all present subtables
  37. # 3. Regroup into different subtable arrangements
  38. # 4. Put back into the lookup
  39. #
  40. # Actual implementation:
  41. # 2. Only class kerning is optimized currently
  42. # 3. If the input kerning is already in several subtables, the subtables
  43. # are not grouped together first; instead each subtable is treated
  44. # independently, so currently this step is:
  45. # Split existing subtables into more smaller subtables
  46. gpos = font["GPOS"]
  47. for lookup in gpos.table.LookupList.Lookup:
  48. if lookup.LookupType == 2:
  49. compact_lookup(font, level, lookup)
  50. elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2:
  51. compact_ext_lookup(font, level, lookup)
  52. return font
  53. def compact_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None:
  54. new_subtables = compact_pair_pos(font, level, lookup.SubTable)
  55. lookup.SubTable = new_subtables
  56. lookup.SubTableCount = len(new_subtables)
  57. def compact_ext_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None:
  58. new_subtables = compact_pair_pos(
  59. font, level, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable]
  60. )
  61. new_ext_subtables = []
  62. for subtable in new_subtables:
  63. ext_subtable = otTables.ExtensionPos()
  64. ext_subtable.Format = 1
  65. ext_subtable.ExtSubTable = subtable
  66. new_ext_subtables.append(ext_subtable)
  67. lookup.SubTable = new_ext_subtables
  68. lookup.SubTableCount = len(new_ext_subtables)
  69. def compact_pair_pos(
  70. font: TTFont, level: int, subtables: Sequence[otTables.PairPos]
  71. ) -> Sequence[otTables.PairPos]:
  72. new_subtables = []
  73. for subtable in subtables:
  74. if subtable.Format == 1:
  75. # Not doing anything to Format 1 (yet?)
  76. new_subtables.append(subtable)
  77. elif subtable.Format == 2:
  78. new_subtables.extend(compact_class_pairs(font, level, subtable))
  79. return new_subtables
  80. def compact_class_pairs(
  81. font: TTFont, level: int, subtable: otTables.PairPos
  82. ) -> List[otTables.PairPos]:
  83. from fontTools.otlLib.builder import buildPairPosClassesSubtable
  84. subtables = []
  85. classes1: DefaultDict[int, List[str]] = defaultdict(list)
  86. for g in subtable.Coverage.glyphs:
  87. classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g)
  88. classes2: DefaultDict[int, List[str]] = defaultdict(list)
  89. for g, i in subtable.ClassDef2.classDefs.items():
  90. classes2[i].append(g)
  91. all_pairs = {}
  92. for i, class1 in enumerate(subtable.Class1Record):
  93. for j, class2 in enumerate(class1.Class2Record):
  94. if is_really_zero(class2):
  95. continue
  96. all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = (
  97. getattr(class2, "Value1", None),
  98. getattr(class2, "Value2", None),
  99. )
  100. grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(font, all_pairs, level)
  101. for pairs in grouped_pairs:
  102. subtables.append(buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap()))
  103. return subtables
  104. def is_really_zero(class2: otTables.Class2Record) -> bool:
  105. v1 = getattr(class2, "Value1", None)
  106. v2 = getattr(class2, "Value2", None)
  107. return (v1 is None or v1.getEffectiveFormat() == 0) and (
  108. v2 is None or v2.getEffectiveFormat() == 0
  109. )
  110. Pairs = Dict[
  111. Tuple[Tuple[str, ...], Tuple[str, ...]],
  112. Tuple[otBase.ValueRecord, otBase.ValueRecord],
  113. ]
  114. # Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958
  115. def _getClassRanges(glyphIDs: Iterable[int]):
  116. glyphIDs = sorted(glyphIDs)
  117. last = glyphIDs[0]
  118. ranges = [[last]]
  119. for glyphID in glyphIDs[1:]:
  120. if glyphID != last + 1:
  121. ranges[-1].append(last)
  122. ranges.append([glyphID])
  123. last = glyphID
  124. ranges[-1].append(last)
  125. return ranges, glyphIDs[0], glyphIDs[-1]
  126. # Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989
  127. def _classDef_bytes(
  128. class_data: List[Tuple[List[Tuple[int, int]], int, int]],
  129. class_ids: List[int],
  130. coverage=False,
  131. ):
  132. if not class_ids:
  133. return 0
  134. first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]]
  135. range_count = len(first_ranges)
  136. for i in class_ids[1:]:
  137. data = class_data[i]
  138. range_count += len(data[0])
  139. min_glyph_id = min(min_glyph_id, data[1])
  140. max_glyph_id = max(max_glyph_id, data[2])
  141. glyphCount = max_glyph_id - min_glyph_id + 1
  142. # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1
  143. format1_bytes = 6 + glyphCount * 2
  144. # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2
  145. format2_bytes = 4 + range_count * 6
  146. return min(format1_bytes, format2_bytes)
  147. ClusteringContext = namedtuple(
  148. "ClusteringContext",
  149. [
  150. "lines",
  151. "all_class1",
  152. "all_class1_data",
  153. "all_class2_data",
  154. "valueFormat1_bytes",
  155. "valueFormat2_bytes",
  156. ],
  157. )
  158. class Cluster:
  159. # TODO(Python 3.7): Turn this into a dataclass
  160. # ctx: ClusteringContext
  161. # indices: int
  162. # Caches
  163. # TODO(Python 3.8): use functools.cached_property instead of the
  164. # manually cached properties, and remove the cache fields listed below.
  165. # _indices: Optional[List[int]] = None
  166. # _column_indices: Optional[List[int]] = None
  167. # _cost: Optional[int] = None
  168. __slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost"
  169. def __init__(self, ctx: ClusteringContext, indices_bitmask: int):
  170. self.ctx = ctx
  171. self.indices_bitmask = indices_bitmask
  172. self._indices = None
  173. self._column_indices = None
  174. self._cost = None
  175. @property
  176. def indices(self):
  177. if self._indices is None:
  178. self._indices = bit_indices(self.indices_bitmask)
  179. return self._indices
  180. @property
  181. def column_indices(self):
  182. if self._column_indices is None:
  183. # Indices of columns that have a 1 in at least 1 line
  184. # => binary OR all the lines
  185. bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices))
  186. self._column_indices = bit_indices(bitmask)
  187. return self._column_indices
  188. @property
  189. def width(self):
  190. # Add 1 because Class2=0 cannot be used but needs to be encoded.
  191. return len(self.column_indices) + 1
  192. @property
  193. def cost(self):
  194. if self._cost is None:
  195. self._cost = (
  196. # 2 bytes to store the offset to this subtable in the Lookup table above
  197. 2
  198. # Contents of the subtable
  199. # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
  200. # uint16 posFormat Format identifier: format = 2
  201. + 2
  202. # Offset16 coverageOffset Offset to Coverage table, from beginning of PairPos subtable.
  203. + 2
  204. + self.coverage_bytes
  205. # uint16 valueFormat1 ValueRecord definition — for the first glyph of the pair (may be zero).
  206. + 2
  207. # uint16 valueFormat2 ValueRecord definition — for the second glyph of the pair (may be zero).
  208. + 2
  209. # Offset16 classDef1Offset Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair.
  210. + 2
  211. + self.classDef1_bytes
  212. # Offset16 classDef2Offset Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair.
  213. + 2
  214. + self.classDef2_bytes
  215. # uint16 class1Count Number of classes in classDef1 table — includes Class 0.
  216. + 2
  217. # uint16 class2Count Number of classes in classDef2 table — includes Class 0.
  218. + 2
  219. # Class1Record class1Records[class1Count] Array of Class1 records, ordered by classes in classDef1.
  220. + (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes)
  221. * len(self.indices)
  222. * self.width
  223. )
  224. return self._cost
  225. @property
  226. def coverage_bytes(self):
  227. format1_bytes = (
  228. # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1
  229. # uint16 coverageFormat Format identifier — format = 1
  230. # uint16 glyphCount Number of glyphs in the glyph array
  231. 4
  232. # uint16 glyphArray[glyphCount] Array of glyph IDs — in numerical order
  233. + sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2
  234. )
  235. ranges = sorted(
  236. chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices)
  237. )
  238. merged_range_count = 0
  239. last = None
  240. for start, end in ranges:
  241. if last is not None and start != last + 1:
  242. merged_range_count += 1
  243. last = end
  244. format2_bytes = (
  245. # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2
  246. # uint16 coverageFormat Format identifier — format = 2
  247. # uint16 rangeCount Number of RangeRecords
  248. 4
  249. # RangeRecord rangeRecords[rangeCount] Array of glyph ranges — ordered by startGlyphID.
  250. # uint16 startGlyphID First glyph ID in the range
  251. # uint16 endGlyphID Last glyph ID in the range
  252. # uint16 startCoverageIndex Coverage Index of first glyph ID in range
  253. + merged_range_count * 6
  254. )
  255. return min(format1_bytes, format2_bytes)
  256. @property
  257. def classDef1_bytes(self):
  258. # We can skip encoding one of the Class1 definitions, and use
  259. # Class1=0 to represent it instead, because Class1 is gated by the
  260. # Coverage definition. Use Class1=0 for the highest byte savings.
  261. # Going through all options takes too long, pick the biggest class
  262. # = what happens in otlLib.builder.ClassDefBuilder.classes()
  263. biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i]))
  264. return _classDef_bytes(
  265. self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index]
  266. )
  267. @property
  268. def classDef2_bytes(self):
  269. # All Class2 need to be encoded because we can't use Class2=0
  270. return _classDef_bytes(self.ctx.all_class2_data, self.column_indices)
  271. def cluster_pairs_by_class2_coverage_custom_cost(
  272. font: TTFont,
  273. pairs: Pairs,
  274. compression: int = 5,
  275. ) -> List[Pairs]:
  276. if not pairs:
  277. # The subtable was actually empty?
  278. return [pairs]
  279. # Sorted for reproducibility/determinism
  280. all_class1 = sorted(set(pair[0] for pair in pairs))
  281. all_class2 = sorted(set(pair[1] for pair in pairs))
  282. # Use Python's big ints for binary vectors representing each line
  283. lines = [
  284. sum(
  285. 1 << i if (class1, class2) in pairs else 0
  286. for i, class2 in enumerate(all_class2)
  287. )
  288. for class1 in all_class1
  289. ]
  290. # Map glyph names to ids and work with ints throughout for ClassDef formats
  291. name_to_id = font.getReverseGlyphMap()
  292. # Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id)
  293. all_class1_data = [
  294. _getClassRanges(name_to_id[name] for name in cls) for cls in all_class1
  295. ]
  296. all_class2_data = [
  297. _getClassRanges(name_to_id[name] for name in cls) for cls in all_class2
  298. ]
  299. format1 = 0
  300. format2 = 0
  301. for pair, value in pairs.items():
  302. format1 |= value[0].getEffectiveFormat() if value[0] else 0
  303. format2 |= value[1].getEffectiveFormat() if value[1] else 0
  304. valueFormat1_bytes = bit_count(format1) * 2
  305. valueFormat2_bytes = bit_count(format2) * 2
  306. ctx = ClusteringContext(
  307. lines,
  308. all_class1,
  309. all_class1_data,
  310. all_class2_data,
  311. valueFormat1_bytes,
  312. valueFormat2_bytes,
  313. )
  314. cluster_cache: Dict[int, Cluster] = {}
  315. def make_cluster(indices: int) -> Cluster:
  316. cluster = cluster_cache.get(indices, None)
  317. if cluster is not None:
  318. return cluster
  319. cluster = Cluster(ctx, indices)
  320. cluster_cache[indices] = cluster
  321. return cluster
  322. def merge(cluster: Cluster, other: Cluster) -> Cluster:
  323. return make_cluster(cluster.indices_bitmask | other.indices_bitmask)
  324. # Agglomerative clustering by hand, checking the cost gain of the new
  325. # cluster against the previously separate clusters
  326. # Start with 1 cluster per line
  327. # cluster = set of lines = new subtable
  328. clusters = [make_cluster(1 << i) for i in range(len(lines))]
  329. # Cost of 1 cluster with everything
  330. # `(1 << len) - 1` gives a bitmask full of 1's of length `len`
  331. cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost
  332. log.debug(f" len(clusters) = {len(clusters)}")
  333. while len(clusters) > 1:
  334. lowest_cost_change = None
  335. best_cluster_index = None
  336. best_other_index = None
  337. best_merged = None
  338. for i, cluster in enumerate(clusters):
  339. for j, other in enumerate(clusters[i + 1 :]):
  340. merged = merge(cluster, other)
  341. cost_change = merged.cost - cluster.cost - other.cost
  342. if lowest_cost_change is None or cost_change < lowest_cost_change:
  343. lowest_cost_change = cost_change
  344. best_cluster_index = i
  345. best_other_index = i + 1 + j
  346. best_merged = merged
  347. assert lowest_cost_change is not None
  348. assert best_cluster_index is not None
  349. assert best_other_index is not None
  350. assert best_merged is not None
  351. # If the best merge we found is still taking down the file size, then
  352. # there's no question: we must do it, because it's beneficial in both
  353. # ways (lower file size and lower number of subtables). However, if the
  354. # best merge we found is not reducing file size anymore, then we need to
  355. # look at the other stop criteria = the compression factor.
  356. if lowest_cost_change > 0:
  357. # Stop critera: check whether we should keep merging.
  358. # Compute size reduction brought by splitting
  359. cost_after_splitting = sum(c.cost for c in clusters)
  360. # size_reduction so that after = before * (1 - size_reduction)
  361. # E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2
  362. size_reduction = 1 - cost_after_splitting / cost_before_splitting
  363. # Force more merging by taking into account the compression number.
  364. # Target behaviour: compression number = 1 to 9, default 5 like gzip
  365. # - 1 = accept to add 1 subtable to reduce size by 50%
  366. # - 5 = accept to add 5 subtables to reduce size by 50%
  367. # See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691
  368. # Given the size reduction we have achieved so far, compute how many
  369. # new subtables are acceptable.
  370. max_new_subtables = -log2(1 - size_reduction) * compression
  371. log.debug(
  372. f" len(clusters) = {len(clusters):3d} size_reduction={size_reduction:5.2f} max_new_subtables={max_new_subtables}",
  373. )
  374. if compression == 9:
  375. # Override level 9 to mean: create any number of subtables
  376. max_new_subtables = len(clusters)
  377. # If we have managed to take the number of new subtables below the
  378. # threshold, then we can stop.
  379. if len(clusters) <= max_new_subtables + 1:
  380. break
  381. # No reason to stop yet, do the merge and move on to the next.
  382. del clusters[best_other_index]
  383. clusters[best_cluster_index] = best_merged
  384. # All clusters are final; turn bitmasks back into the "Pairs" format
  385. pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict)
  386. for pair, values in pairs.items():
  387. pairs_by_class1[pair[0]][pair] = values
  388. pairs_groups: List[Pairs] = []
  389. for cluster in clusters:
  390. pairs_group: Pairs = dict()
  391. for i in cluster.indices:
  392. class1 = all_class1[i]
  393. pairs_group.update(pairs_by_class1[class1])
  394. pairs_groups.append(pairs_group)
  395. return pairs_groups