svg.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. from __future__ import annotations
  2. import re
  3. from functools import lru_cache
  4. from itertools import chain, count
  5. from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
  6. try:
  7. from lxml import etree
  8. except ImportError:
  9. # lxml is required for subsetting SVG, but we prefer to delay the import error
  10. # until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table)
  11. etree = None
  12. from fontTools import ttLib
  13. from fontTools.subset.util import _add_method
  14. from fontTools.ttLib.tables.S_V_G_ import SVGDocument
  15. __all__ = ["subset_glyphs"]
  16. GID_RE = re.compile(r"^glyph(\d+)$")
  17. NAMESPACES = {
  18. "svg": "http://www.w3.org/2000/svg",
  19. "xlink": "http://www.w3.org/1999/xlink",
  20. }
  21. XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href'
  22. # TODO(antrotype): Replace with functools.cache once we are 3.9+
  23. @lru_cache(maxsize=None)
  24. def xpath(path):
  25. # compile XPath upfront, caching result to reuse on multiple elements
  26. return etree.XPath(path, namespaces=NAMESPACES)
  27. def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]:
  28. # select all svg elements with 'id' attribute no matter where they are
  29. # including the root element itself:
  30. # https://github.com/fonttools/fonttools/issues/2548
  31. return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)}
  32. def parse_css_declarations(style_attr: str) -> Dict[str, str]:
  33. # https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style
  34. # https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations
  35. result = {}
  36. for declaration in style_attr.split(";"):
  37. if declaration.count(":") == 1:
  38. property_name, value = declaration.split(":")
  39. property_name = property_name.strip()
  40. result[property_name] = value.strip()
  41. elif declaration.strip():
  42. raise ValueError(f"Invalid CSS declaration syntax: {declaration}")
  43. return result
  44. def iter_referenced_ids(tree: etree.Element) -> Iterator[str]:
  45. # Yield all the ids that can be reached via references from this element tree.
  46. # We currently support xlink:href (as used by <use> and gradient templates),
  47. # and local url(#...) links found in fill or clip-path attributes
  48. # TODO(anthrotype): Check we aren't missing other supported kinds of reference
  49. find_svg_elements_with_references = xpath(
  50. ".//svg:*[ "
  51. "starts-with(@xlink:href, '#') "
  52. "or starts-with(@fill, 'url(#') "
  53. "or starts-with(@clip-path, 'url(#') "
  54. "or contains(@style, ':url(#') "
  55. "]",
  56. )
  57. for el in chain([tree], find_svg_elements_with_references(tree)):
  58. ref_id = href_local_target(el)
  59. if ref_id is not None:
  60. yield ref_id
  61. attrs = el.attrib
  62. if "style" in attrs:
  63. attrs = {**dict(attrs), **parse_css_declarations(el.attrib["style"])}
  64. for attr in ("fill", "clip-path"):
  65. if attr in attrs:
  66. value = attrs[attr]
  67. if value.startswith("url(#") and value.endswith(")"):
  68. ref_id = value[5:-1]
  69. assert ref_id
  70. yield ref_id
  71. def closure_element_ids(
  72. elements: Dict[str, etree.Element], element_ids: Set[str]
  73. ) -> None:
  74. # Expand the initial subset of element ids to include ids that can be reached
  75. # via references from the initial set.
  76. unvisited = element_ids
  77. while unvisited:
  78. referenced: Set[str] = set()
  79. for el_id in unvisited:
  80. if el_id not in elements:
  81. # ignore dangling reference; not our job to validate svg
  82. continue
  83. referenced.update(iter_referenced_ids(elements[el_id]))
  84. referenced -= element_ids
  85. element_ids.update(referenced)
  86. unvisited = referenced
  87. def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool:
  88. # Keep elements if their id is in the subset, or any of their children's id is.
  89. # Drop elements whose id is not in the subset, and either have no children,
  90. # or all their children are being dropped.
  91. if el.attrib.get("id") in retained_ids:
  92. # if id is in the set, don't recurse; keep whole subtree
  93. return True
  94. # recursively subset all the children; we use a list comprehension instead
  95. # of a parentheses-less generator expression because we don't want any() to
  96. # short-circuit, as our function has a side effect of dropping empty elements.
  97. if any([subset_elements(e, retained_ids) for e in el]):
  98. return True
  99. assert len(el) == 0
  100. parent = el.getparent()
  101. if parent is not None:
  102. parent.remove(el)
  103. return False
  104. def remap_glyph_ids(
  105. svg: etree.Element, glyph_index_map: Dict[int, int]
  106. ) -> Dict[str, str]:
  107. # Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}"
  108. # special attributes
  109. elements = group_elements_by_id(svg)
  110. id_map = {}
  111. for el_id, el in elements.items():
  112. m = GID_RE.match(el_id)
  113. if not m:
  114. continue
  115. old_index = int(m.group(1))
  116. new_index = glyph_index_map.get(old_index)
  117. if new_index is not None:
  118. if old_index == new_index:
  119. continue
  120. new_id = f"glyph{new_index}"
  121. else:
  122. # If the old index is missing, the element correspond to a glyph that was
  123. # excluded from the font's subset.
  124. # We rename it to avoid clashes with the new GIDs or other element ids.
  125. new_id = f".{el_id}"
  126. n = count(1)
  127. while new_id in elements:
  128. new_id = f"{new_id}.{next(n)}"
  129. id_map[el_id] = new_id
  130. el.attrib["id"] = new_id
  131. return id_map
  132. def href_local_target(el: etree.Element) -> Optional[str]:
  133. if XLINK_HREF in el.attrib:
  134. href = el.attrib[XLINK_HREF]
  135. if href.startswith("#") and len(href) > 1:
  136. return href[1:] # drop the leading #
  137. return None
  138. def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None:
  139. # update all xlink:href="#glyph..." attributes to point to the new glyph ids
  140. for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg):
  141. old_id = href_local_target(el)
  142. assert old_id is not None
  143. if old_id in id_map:
  144. new_id = id_map[old_id]
  145. el.attrib[XLINK_HREF] = f"#{new_id}"
  146. def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]:
  147. # Yield sorted, non-overlapping (min, max) ranges of consecutive integers
  148. sorted_ints = iter(sorted(set(ints)))
  149. try:
  150. start = end = next(sorted_ints)
  151. except StopIteration:
  152. return
  153. for v in sorted_ints:
  154. if v - 1 == end:
  155. end = v
  156. else:
  157. yield (start, end)
  158. start = end = v
  159. yield (start, end)
  160. @_add_method(ttLib.getTableClass("SVG "))
  161. def subset_glyphs(self, s) -> bool:
  162. if etree is None:
  163. raise ImportError("No module named 'lxml', required to subset SVG")
  164. # glyph names (before subsetting)
  165. glyph_order: List[str] = s.orig_glyph_order
  166. # map from glyph names to original glyph indices
  167. rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap
  168. # map from original to new glyph indices (after subsetting)
  169. glyph_index_map: Dict[int, int] = s.glyph_index_map
  170. new_docs: List[SVGDocument] = []
  171. for doc in self.docList:
  172. glyphs = {
  173. glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1)
  174. }.intersection(s.glyphs)
  175. if not glyphs:
  176. # no intersection: we can drop the whole record
  177. continue
  178. svg = etree.fromstring(
  179. # encode because fromstring dislikes xml encoding decl if input is str.
  180. # SVG xml encoding must be utf-8 as per OT spec.
  181. doc.data.encode("utf-8"),
  182. parser=etree.XMLParser(
  183. # Disable libxml2 security restrictions to support very deep trees.
  184. # Without this we would get an error like this:
  185. # `lxml.etree.XMLSyntaxError: internal error: Huge input lookup`
  186. # when parsing big fonts e.g. noto-emoji-picosvg.ttf.
  187. huge_tree=True,
  188. # ignore blank text as it's not meaningful in OT-SVG; it also prevents
  189. # dangling tail text after removing an element when pretty_print=True
  190. remove_blank_text=True,
  191. # don't replace entities; we don't expect any in OT-SVG and they may
  192. # be abused for XXE attacks
  193. resolve_entities=False,
  194. ),
  195. )
  196. elements = group_elements_by_id(svg)
  197. gids = {rev_orig_glyph_map[g] for g in glyphs}
  198. element_ids = {f"glyph{i}" for i in gids}
  199. closure_element_ids(elements, element_ids)
  200. if not subset_elements(svg, element_ids):
  201. continue
  202. if not s.options.retain_gids:
  203. id_map = remap_glyph_ids(svg, glyph_index_map)
  204. update_glyph_href_links(svg, id_map)
  205. new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8")
  206. new_gids = (glyph_index_map[i] for i in gids)
  207. for start, end in ranges(new_gids):
  208. new_docs.append(SVGDocument(new_doc, start, end, doc.compressed))
  209. self.docList = new_docs
  210. return bool(self.docList)