_mathtext.py 105 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851
  1. """
  2. Implementation details for :mod:`.mathtext`.
  3. """
  4. from __future__ import annotations
  5. import abc
  6. import copy
  7. import enum
  8. import functools
  9. import logging
  10. import os
  11. import re
  12. import types
  13. import unicodedata
  14. import string
  15. import typing as T
  16. from typing import NamedTuple
  17. import numpy as np
  18. from pyparsing import (
  19. Empty, Forward, Literal, NotAny, oneOf, OneOrMore, Optional,
  20. ParseBaseException, ParseException, ParseExpression, ParseFatalException,
  21. ParserElement, ParseResults, QuotedString, Regex, StringEnd, ZeroOrMore,
  22. pyparsing_common, Group)
  23. import matplotlib as mpl
  24. from . import cbook
  25. from ._mathtext_data import (
  26. latex_to_bakoma, stix_glyph_fixes, stix_virtual_fonts, tex2uni)
  27. from .font_manager import FontProperties, findfont, get_font
  28. from .ft2font import FT2Font, FT2Image, KERNING_DEFAULT
  29. from packaging.version import parse as parse_version
  30. from pyparsing import __version__ as pyparsing_version
  31. if parse_version(pyparsing_version).major < 3:
  32. from pyparsing import nestedExpr as nested_expr
  33. else:
  34. from pyparsing import nested_expr
  35. if T.TYPE_CHECKING:
  36. from collections.abc import Iterable
  37. from .ft2font import Glyph
  38. ParserElement.enablePackrat()
  39. _log = logging.getLogger("matplotlib.mathtext")
  40. ##############################################################################
  41. # FONTS
  42. def get_unicode_index(symbol: str) -> int: # Publicly exported.
  43. r"""
  44. Return the integer index (from the Unicode table) of *symbol*.
  45. Parameters
  46. ----------
  47. symbol : str
  48. A single (Unicode) character, a TeX command (e.g. r'\pi') or a Type1
  49. symbol name (e.g. 'phi').
  50. """
  51. try: # This will succeed if symbol is a single Unicode char
  52. return ord(symbol)
  53. except TypeError:
  54. pass
  55. try: # Is symbol a TeX symbol (i.e. \alpha)
  56. return tex2uni[symbol.strip("\\")]
  57. except KeyError as err:
  58. raise ValueError(
  59. f"{symbol!r} is not a valid Unicode character or TeX/Type1 symbol"
  60. ) from err
  61. class VectorParse(NamedTuple):
  62. """
  63. The namedtuple type returned by ``MathTextParser("path").parse(...)``.
  64. Attributes
  65. ----------
  66. width, height, depth : float
  67. The global metrics.
  68. glyphs : list
  69. The glyphs including their positions.
  70. rect : list
  71. The list of rectangles.
  72. """
  73. width: float
  74. height: float
  75. depth: float
  76. glyphs: list[tuple[FT2Font, float, int, float, float]]
  77. rects: list[tuple[float, float, float, float]]
  78. VectorParse.__module__ = "matplotlib.mathtext"
  79. class RasterParse(NamedTuple):
  80. """
  81. The namedtuple type returned by ``MathTextParser("agg").parse(...)``.
  82. Attributes
  83. ----------
  84. ox, oy : float
  85. The offsets are always zero.
  86. width, height, depth : float
  87. The global metrics.
  88. image : FT2Image
  89. A raster image.
  90. """
  91. ox: float
  92. oy: float
  93. width: float
  94. height: float
  95. depth: float
  96. image: FT2Image
  97. RasterParse.__module__ = "matplotlib.mathtext"
  98. class Output:
  99. r"""
  100. Result of `ship`\ping a box: lists of positioned glyphs and rectangles.
  101. This class is not exposed to end users, but converted to a `VectorParse` or
  102. a `RasterParse` by `.MathTextParser.parse`.
  103. """
  104. def __init__(self, box: Box):
  105. self.box = box
  106. self.glyphs: list[tuple[float, float, FontInfo]] = [] # (ox, oy, info)
  107. self.rects: list[tuple[float, float, float, float]] = [] # (x1, y1, x2, y2)
  108. def to_vector(self) -> VectorParse:
  109. w, h, d = map(
  110. np.ceil, [self.box.width, self.box.height, self.box.depth])
  111. gs = [(info.font, info.fontsize, info.num, ox, h - oy + info.offset)
  112. for ox, oy, info in self.glyphs]
  113. rs = [(x1, h - y2, x2 - x1, y2 - y1)
  114. for x1, y1, x2, y2 in self.rects]
  115. return VectorParse(w, h + d, d, gs, rs)
  116. def to_raster(self, *, antialiased: bool) -> RasterParse:
  117. # Metrics y's and mathtext y's are oriented in opposite directions,
  118. # hence the switch between ymin and ymax.
  119. xmin = min([*[ox + info.metrics.xmin for ox, oy, info in self.glyphs],
  120. *[x1 for x1, y1, x2, y2 in self.rects], 0]) - 1
  121. ymin = min([*[oy - info.metrics.ymax for ox, oy, info in self.glyphs],
  122. *[y1 for x1, y1, x2, y2 in self.rects], 0]) - 1
  123. xmax = max([*[ox + info.metrics.xmax for ox, oy, info in self.glyphs],
  124. *[x2 for x1, y1, x2, y2 in self.rects], 0]) + 1
  125. ymax = max([*[oy - info.metrics.ymin for ox, oy, info in self.glyphs],
  126. *[y2 for x1, y1, x2, y2 in self.rects], 0]) + 1
  127. w = xmax - xmin
  128. h = ymax - ymin - self.box.depth
  129. d = ymax - ymin - self.box.height
  130. image = FT2Image(np.ceil(w), np.ceil(h + max(d, 0)))
  131. # Ideally, we could just use self.glyphs and self.rects here, shifting
  132. # their coordinates by (-xmin, -ymin), but this yields slightly
  133. # different results due to floating point slop; shipping twice is the
  134. # old approach and keeps baseline images backcompat.
  135. shifted = ship(self.box, (-xmin, -ymin))
  136. for ox, oy, info in shifted.glyphs:
  137. info.font.draw_glyph_to_bitmap(
  138. image, ox, oy - info.metrics.iceberg, info.glyph,
  139. antialiased=antialiased)
  140. for x1, y1, x2, y2 in shifted.rects:
  141. height = max(int(y2 - y1) - 1, 0)
  142. if height == 0:
  143. center = (y2 + y1) / 2
  144. y = int(center - (height + 1) / 2)
  145. else:
  146. y = int(y1)
  147. image.draw_rect_filled(int(x1), y, np.ceil(x2), y + height)
  148. return RasterParse(0, 0, w, h + d, d, image)
  149. class FontMetrics(NamedTuple):
  150. """
  151. Metrics of a font.
  152. Attributes
  153. ----------
  154. advance : float
  155. The advance distance (in points) of the glyph.
  156. height : float
  157. The height of the glyph in points.
  158. width : float
  159. The width of the glyph in points.
  160. xmin, xmax, ymin, ymax : float
  161. The ink rectangle of the glyph.
  162. iceberg : float
  163. The distance from the baseline to the top of the glyph. (This corresponds to
  164. TeX's definition of "height".)
  165. slanted : bool
  166. Whether the glyph should be considered as "slanted" (currently used for kerning
  167. sub/superscripts).
  168. """
  169. advance: float
  170. height: float
  171. width: float
  172. xmin: float
  173. xmax: float
  174. ymin: float
  175. ymax: float
  176. iceberg: float
  177. slanted: bool
  178. class FontInfo(NamedTuple):
  179. font: FT2Font
  180. fontsize: float
  181. postscript_name: str
  182. metrics: FontMetrics
  183. num: int
  184. glyph: Glyph
  185. offset: float
  186. class Fonts(abc.ABC):
  187. """
  188. An abstract base class for a system of fonts to use for mathtext.
  189. The class must be able to take symbol keys and font file names and
  190. return the character metrics. It also delegates to a backend class
  191. to do the actual drawing.
  192. """
  193. def __init__(self, default_font_prop: FontProperties, load_glyph_flags: int):
  194. """
  195. Parameters
  196. ----------
  197. default_font_prop : `~.font_manager.FontProperties`
  198. The default non-math font, or the base font for Unicode (generic)
  199. font rendering.
  200. load_glyph_flags : int
  201. Flags passed to the glyph loader (e.g. ``FT_Load_Glyph`` and
  202. ``FT_Load_Char`` for FreeType-based fonts).
  203. """
  204. self.default_font_prop = default_font_prop
  205. self.load_glyph_flags = load_glyph_flags
  206. def get_kern(self, font1: str, fontclass1: str, sym1: str, fontsize1: float,
  207. font2: str, fontclass2: str, sym2: str, fontsize2: float,
  208. dpi: float) -> float:
  209. """
  210. Get the kerning distance for font between *sym1* and *sym2*.
  211. See `~.Fonts.get_metrics` for a detailed description of the parameters.
  212. """
  213. return 0.
  214. def _get_font(self, font: str) -> FT2Font:
  215. raise NotImplementedError
  216. def _get_info(self, font: str, font_class: str, sym: str, fontsize: float,
  217. dpi: float) -> FontInfo:
  218. raise NotImplementedError
  219. def get_metrics(self, font: str, font_class: str, sym: str, fontsize: float,
  220. dpi: float) -> FontMetrics:
  221. r"""
  222. Parameters
  223. ----------
  224. font : str
  225. One of the TeX font names: "tt", "it", "rm", "cal", "sf", "bf",
  226. "default", "regular", "bb", "frak", "scr". "default" and "regular"
  227. are synonyms and use the non-math font.
  228. font_class : str
  229. One of the TeX font names (as for *font*), but **not** "bb",
  230. "frak", or "scr". This is used to combine two font classes. The
  231. only supported combination currently is ``get_metrics("frak", "bf",
  232. ...)``.
  233. sym : str
  234. A symbol in raw TeX form, e.g., "1", "x", or "\sigma".
  235. fontsize : float
  236. Font size in points.
  237. dpi : float
  238. Rendering dots-per-inch.
  239. Returns
  240. -------
  241. FontMetrics
  242. """
  243. info = self._get_info(font, font_class, sym, fontsize, dpi)
  244. return info.metrics
  245. def render_glyph(self, output: Output, ox: float, oy: float, font: str,
  246. font_class: str, sym: str, fontsize: float, dpi: float) -> None:
  247. """
  248. At position (*ox*, *oy*), draw the glyph specified by the remaining
  249. parameters (see `get_metrics` for their detailed description).
  250. """
  251. info = self._get_info(font, font_class, sym, fontsize, dpi)
  252. output.glyphs.append((ox, oy, info))
  253. def render_rect_filled(self, output: Output,
  254. x1: float, y1: float, x2: float, y2: float) -> None:
  255. """
  256. Draw a filled rectangle from (*x1*, *y1*) to (*x2*, *y2*).
  257. """
  258. output.rects.append((x1, y1, x2, y2))
  259. def get_xheight(self, font: str, fontsize: float, dpi: float) -> float:
  260. """
  261. Get the xheight for the given *font* and *fontsize*.
  262. """
  263. raise NotImplementedError()
  264. def get_underline_thickness(self, font: str, fontsize: float, dpi: float) -> float:
  265. """
  266. Get the line thickness that matches the given font. Used as a
  267. base unit for drawing lines such as in a fraction or radical.
  268. """
  269. raise NotImplementedError()
  270. def get_sized_alternatives_for_symbol(self, fontname: str,
  271. sym: str) -> list[tuple[str, str]]:
  272. """
  273. Override if your font provides multiple sizes of the same
  274. symbol. Should return a list of symbols matching *sym* in
  275. various sizes. The expression renderer will select the most
  276. appropriate size for a given situation from this list.
  277. """
  278. return [(fontname, sym)]
  279. class TruetypeFonts(Fonts, metaclass=abc.ABCMeta):
  280. """
  281. A generic base class for all font setups that use Truetype fonts
  282. (through FT2Font).
  283. """
  284. def __init__(self, default_font_prop: FontProperties, load_glyph_flags: int):
  285. super().__init__(default_font_prop, load_glyph_flags)
  286. # Per-instance cache.
  287. self._get_info = functools.cache(self._get_info) # type: ignore[method-assign]
  288. self._fonts = {}
  289. self.fontmap: dict[str | int, str] = {}
  290. filename = findfont(self.default_font_prop)
  291. default_font = get_font(filename)
  292. self._fonts['default'] = default_font
  293. self._fonts['regular'] = default_font
  294. def _get_font(self, font: str | int) -> FT2Font:
  295. if font in self.fontmap:
  296. basename = self.fontmap[font]
  297. else:
  298. # NOTE: An int is only passed by subclasses which have placed int keys into
  299. # `self.fontmap`, so we must cast this to confirm it to typing.
  300. basename = T.cast(str, font)
  301. cached_font = self._fonts.get(basename)
  302. if cached_font is None and os.path.exists(basename):
  303. cached_font = get_font(basename)
  304. self._fonts[basename] = cached_font
  305. self._fonts[cached_font.postscript_name] = cached_font
  306. self._fonts[cached_font.postscript_name.lower()] = cached_font
  307. return T.cast(FT2Font, cached_font) # FIXME: Not sure this is guaranteed.
  308. def _get_offset(self, font: FT2Font, glyph: Glyph, fontsize: float,
  309. dpi: float) -> float:
  310. if font.postscript_name == 'Cmex10':
  311. return (glyph.height / 64 / 2) + (fontsize/3 * dpi/72)
  312. return 0.
  313. def _get_glyph(self, fontname: str, font_class: str,
  314. sym: str) -> tuple[FT2Font, int, bool]:
  315. raise NotImplementedError
  316. # The return value of _get_info is cached per-instance.
  317. def _get_info(self, fontname: str, font_class: str, sym: str, fontsize: float,
  318. dpi: float) -> FontInfo:
  319. font, num, slanted = self._get_glyph(fontname, font_class, sym)
  320. font.set_size(fontsize, dpi)
  321. glyph = font.load_char(num, flags=self.load_glyph_flags)
  322. xmin, ymin, xmax, ymax = [val/64.0 for val in glyph.bbox]
  323. offset = self._get_offset(font, glyph, fontsize, dpi)
  324. metrics = FontMetrics(
  325. advance = glyph.linearHoriAdvance/65536.0,
  326. height = glyph.height/64.0,
  327. width = glyph.width/64.0,
  328. xmin = xmin,
  329. xmax = xmax,
  330. ymin = ymin+offset,
  331. ymax = ymax+offset,
  332. # iceberg is the equivalent of TeX's "height"
  333. iceberg = glyph.horiBearingY/64.0 + offset,
  334. slanted = slanted
  335. )
  336. return FontInfo(
  337. font = font,
  338. fontsize = fontsize,
  339. postscript_name = font.postscript_name,
  340. metrics = metrics,
  341. num = num,
  342. glyph = glyph,
  343. offset = offset
  344. )
  345. def get_xheight(self, fontname: str, fontsize: float, dpi: float) -> float:
  346. font = self._get_font(fontname)
  347. font.set_size(fontsize, dpi)
  348. pclt = font.get_sfnt_table('pclt')
  349. if pclt is None:
  350. # Some fonts don't store the xHeight, so we do a poor man's xHeight
  351. metrics = self.get_metrics(
  352. fontname, mpl.rcParams['mathtext.default'], 'x', fontsize, dpi)
  353. return metrics.iceberg
  354. xHeight = (pclt['xHeight'] / 64.0) * (fontsize / 12.0) * (dpi / 100.0)
  355. return xHeight
  356. def get_underline_thickness(self, font: str, fontsize: float, dpi: float) -> float:
  357. # This function used to grab underline thickness from the font
  358. # metrics, but that information is just too un-reliable, so it
  359. # is now hardcoded.
  360. return ((0.75 / 12.0) * fontsize * dpi) / 72.0
  361. def get_kern(self, font1: str, fontclass1: str, sym1: str, fontsize1: float,
  362. font2: str, fontclass2: str, sym2: str, fontsize2: float,
  363. dpi: float) -> float:
  364. if font1 == font2 and fontsize1 == fontsize2:
  365. info1 = self._get_info(font1, fontclass1, sym1, fontsize1, dpi)
  366. info2 = self._get_info(font2, fontclass2, sym2, fontsize2, dpi)
  367. font = info1.font
  368. return font.get_kerning(info1.num, info2.num, KERNING_DEFAULT) / 64
  369. return super().get_kern(font1, fontclass1, sym1, fontsize1,
  370. font2, fontclass2, sym2, fontsize2, dpi)
  371. class BakomaFonts(TruetypeFonts):
  372. """
  373. Use the Bakoma TrueType fonts for rendering.
  374. Symbols are strewn about a number of font files, each of which has
  375. its own proprietary 8-bit encoding.
  376. """
  377. _fontmap = {
  378. 'cal': 'cmsy10',
  379. 'rm': 'cmr10',
  380. 'tt': 'cmtt10',
  381. 'it': 'cmmi10',
  382. 'bf': 'cmb10',
  383. 'sf': 'cmss10',
  384. 'ex': 'cmex10',
  385. }
  386. def __init__(self, default_font_prop: FontProperties, load_glyph_flags: int):
  387. self._stix_fallback = StixFonts(default_font_prop, load_glyph_flags)
  388. super().__init__(default_font_prop, load_glyph_flags)
  389. for key, val in self._fontmap.items():
  390. fullpath = findfont(val)
  391. self.fontmap[key] = fullpath
  392. self.fontmap[val] = fullpath
  393. _slanted_symbols = set(r"\int \oint".split())
  394. def _get_glyph(self, fontname: str, font_class: str,
  395. sym: str) -> tuple[FT2Font, int, bool]:
  396. font = None
  397. if fontname in self.fontmap and sym in latex_to_bakoma:
  398. basename, num = latex_to_bakoma[sym]
  399. slanted = (basename == "cmmi10") or sym in self._slanted_symbols
  400. font = self._get_font(basename)
  401. elif len(sym) == 1:
  402. slanted = (fontname == "it")
  403. font = self._get_font(fontname)
  404. if font is not None:
  405. num = ord(sym)
  406. if font is not None and font.get_char_index(num) != 0:
  407. return font, num, slanted
  408. else:
  409. return self._stix_fallback._get_glyph(fontname, font_class, sym)
  410. # The Bakoma fonts contain many pre-sized alternatives for the
  411. # delimiters. The AutoSizedChar class will use these alternatives
  412. # and select the best (closest sized) glyph.
  413. _size_alternatives = {
  414. '(': [('rm', '('), ('ex', '\xa1'), ('ex', '\xb3'),
  415. ('ex', '\xb5'), ('ex', '\xc3')],
  416. ')': [('rm', ')'), ('ex', '\xa2'), ('ex', '\xb4'),
  417. ('ex', '\xb6'), ('ex', '\x21')],
  418. '{': [('cal', '{'), ('ex', '\xa9'), ('ex', '\x6e'),
  419. ('ex', '\xbd'), ('ex', '\x28')],
  420. '}': [('cal', '}'), ('ex', '\xaa'), ('ex', '\x6f'),
  421. ('ex', '\xbe'), ('ex', '\x29')],
  422. # The fourth size of '[' is mysteriously missing from the BaKoMa
  423. # font, so I've omitted it for both '[' and ']'
  424. '[': [('rm', '['), ('ex', '\xa3'), ('ex', '\x68'),
  425. ('ex', '\x22')],
  426. ']': [('rm', ']'), ('ex', '\xa4'), ('ex', '\x69'),
  427. ('ex', '\x23')],
  428. r'\lfloor': [('ex', '\xa5'), ('ex', '\x6a'),
  429. ('ex', '\xb9'), ('ex', '\x24')],
  430. r'\rfloor': [('ex', '\xa6'), ('ex', '\x6b'),
  431. ('ex', '\xba'), ('ex', '\x25')],
  432. r'\lceil': [('ex', '\xa7'), ('ex', '\x6c'),
  433. ('ex', '\xbb'), ('ex', '\x26')],
  434. r'\rceil': [('ex', '\xa8'), ('ex', '\x6d'),
  435. ('ex', '\xbc'), ('ex', '\x27')],
  436. r'\langle': [('ex', '\xad'), ('ex', '\x44'),
  437. ('ex', '\xbf'), ('ex', '\x2a')],
  438. r'\rangle': [('ex', '\xae'), ('ex', '\x45'),
  439. ('ex', '\xc0'), ('ex', '\x2b')],
  440. r'\__sqrt__': [('ex', '\x70'), ('ex', '\x71'),
  441. ('ex', '\x72'), ('ex', '\x73')],
  442. r'\backslash': [('ex', '\xb2'), ('ex', '\x2f'),
  443. ('ex', '\xc2'), ('ex', '\x2d')],
  444. r'/': [('rm', '/'), ('ex', '\xb1'), ('ex', '\x2e'),
  445. ('ex', '\xcb'), ('ex', '\x2c')],
  446. r'\widehat': [('rm', '\x5e'), ('ex', '\x62'), ('ex', '\x63'),
  447. ('ex', '\x64')],
  448. r'\widetilde': [('rm', '\x7e'), ('ex', '\x65'), ('ex', '\x66'),
  449. ('ex', '\x67')],
  450. r'<': [('cal', 'h'), ('ex', 'D')],
  451. r'>': [('cal', 'i'), ('ex', 'E')]
  452. }
  453. for alias, target in [(r'\leftparen', '('),
  454. (r'\rightparent', ')'),
  455. (r'\leftbrace', '{'),
  456. (r'\rightbrace', '}'),
  457. (r'\leftbracket', '['),
  458. (r'\rightbracket', ']'),
  459. (r'\{', '{'),
  460. (r'\}', '}'),
  461. (r'\[', '['),
  462. (r'\]', ']')]:
  463. _size_alternatives[alias] = _size_alternatives[target]
  464. def get_sized_alternatives_for_symbol(self, fontname: str,
  465. sym: str) -> list[tuple[str, str]]:
  466. return self._size_alternatives.get(sym, [(fontname, sym)])
  467. class UnicodeFonts(TruetypeFonts):
  468. """
  469. An abstract base class for handling Unicode fonts.
  470. While some reasonably complete Unicode fonts (such as DejaVu) may
  471. work in some situations, the only Unicode font I'm aware of with a
  472. complete set of math symbols is STIX.
  473. This class will "fallback" on the Bakoma fonts when a required
  474. symbol cannot be found in the font.
  475. """
  476. # Some glyphs are not present in the `cmr10` font, and must be brought in
  477. # from `cmsy10`. Map the Unicode indices of those glyphs to the indices at
  478. # which they are found in `cmsy10`.
  479. _cmr10_substitutions = {
  480. 0x00D7: 0x00A3, # Multiplication sign.
  481. 0x2212: 0x00A1, # Minus sign.
  482. }
  483. def __init__(self, default_font_prop: FontProperties, load_glyph_flags: int):
  484. # This must come first so the backend's owner is set correctly
  485. fallback_rc = mpl.rcParams['mathtext.fallback']
  486. font_cls: type[TruetypeFonts] | None = {
  487. 'stix': StixFonts,
  488. 'stixsans': StixSansFonts,
  489. 'cm': BakomaFonts
  490. }.get(fallback_rc)
  491. self._fallback_font = (font_cls(default_font_prop, load_glyph_flags)
  492. if font_cls else None)
  493. super().__init__(default_font_prop, load_glyph_flags)
  494. for texfont in "cal rm tt it bf sf bfit".split():
  495. prop = mpl.rcParams['mathtext.' + texfont]
  496. font = findfont(prop)
  497. self.fontmap[texfont] = font
  498. prop = FontProperties('cmex10')
  499. font = findfont(prop)
  500. self.fontmap['ex'] = font
  501. # include STIX sized alternatives for glyphs if fallback is STIX
  502. if isinstance(self._fallback_font, StixFonts):
  503. stixsizedaltfonts = {
  504. 0: 'STIXGeneral',
  505. 1: 'STIXSizeOneSym',
  506. 2: 'STIXSizeTwoSym',
  507. 3: 'STIXSizeThreeSym',
  508. 4: 'STIXSizeFourSym',
  509. 5: 'STIXSizeFiveSym'}
  510. for size, name in stixsizedaltfonts.items():
  511. fullpath = findfont(name)
  512. self.fontmap[size] = fullpath
  513. self.fontmap[name] = fullpath
  514. _slanted_symbols = set(r"\int \oint".split())
  515. def _map_virtual_font(self, fontname: str, font_class: str,
  516. uniindex: int) -> tuple[str, int]:
  517. return fontname, uniindex
  518. def _get_glyph(self, fontname: str, font_class: str,
  519. sym: str) -> tuple[FT2Font, int, bool]:
  520. try:
  521. uniindex = get_unicode_index(sym)
  522. found_symbol = True
  523. except ValueError:
  524. uniindex = ord('?')
  525. found_symbol = False
  526. _log.warning("No TeX to Unicode mapping for %a.", sym)
  527. fontname, uniindex = self._map_virtual_font(
  528. fontname, font_class, uniindex)
  529. new_fontname = fontname
  530. # Only characters in the "Letter" class should be italicized in 'it'
  531. # mode. Greek capital letters should be Roman.
  532. if found_symbol:
  533. if fontname == 'it' and uniindex < 0x10000:
  534. char = chr(uniindex)
  535. if (unicodedata.category(char)[0] != "L"
  536. or unicodedata.name(char).startswith("GREEK CAPITAL")):
  537. new_fontname = 'rm'
  538. slanted = (new_fontname == 'it') or sym in self._slanted_symbols
  539. found_symbol = False
  540. font = self._get_font(new_fontname)
  541. if font is not None:
  542. if (uniindex in self._cmr10_substitutions
  543. and font.family_name == "cmr10"):
  544. font = get_font(
  545. cbook._get_data_path("fonts/ttf/cmsy10.ttf"))
  546. uniindex = self._cmr10_substitutions[uniindex]
  547. glyphindex = font.get_char_index(uniindex)
  548. if glyphindex != 0:
  549. found_symbol = True
  550. if not found_symbol:
  551. if self._fallback_font:
  552. if (fontname in ('it', 'regular')
  553. and isinstance(self._fallback_font, StixFonts)):
  554. fontname = 'rm'
  555. g = self._fallback_font._get_glyph(fontname, font_class, sym)
  556. family = g[0].family_name
  557. if family in list(BakomaFonts._fontmap.values()):
  558. family = "Computer Modern"
  559. _log.info("Substituting symbol %s from %s", sym, family)
  560. return g
  561. else:
  562. if (fontname in ('it', 'regular')
  563. and isinstance(self, StixFonts)):
  564. return self._get_glyph('rm', font_class, sym)
  565. _log.warning("Font %r does not have a glyph for %a [U+%x], "
  566. "substituting with a dummy symbol.",
  567. new_fontname, sym, uniindex)
  568. font = self._get_font('rm')
  569. uniindex = 0xA4 # currency char, for lack of anything better
  570. slanted = False
  571. return font, uniindex, slanted
  572. def get_sized_alternatives_for_symbol(self, fontname: str,
  573. sym: str) -> list[tuple[str, str]]:
  574. if self._fallback_font:
  575. return self._fallback_font.get_sized_alternatives_for_symbol(
  576. fontname, sym)
  577. return [(fontname, sym)]
  578. class DejaVuFonts(UnicodeFonts, metaclass=abc.ABCMeta):
  579. _fontmap: dict[str | int, str] = {}
  580. def __init__(self, default_font_prop: FontProperties, load_glyph_flags: int):
  581. # This must come first so the backend's owner is set correctly
  582. if isinstance(self, DejaVuSerifFonts):
  583. self._fallback_font = StixFonts(default_font_prop, load_glyph_flags)
  584. else:
  585. self._fallback_font = StixSansFonts(default_font_prop, load_glyph_flags)
  586. self.bakoma = BakomaFonts(default_font_prop, load_glyph_flags)
  587. TruetypeFonts.__init__(self, default_font_prop, load_glyph_flags)
  588. # Include Stix sized alternatives for glyphs
  589. self._fontmap.update({
  590. 1: 'STIXSizeOneSym',
  591. 2: 'STIXSizeTwoSym',
  592. 3: 'STIXSizeThreeSym',
  593. 4: 'STIXSizeFourSym',
  594. 5: 'STIXSizeFiveSym',
  595. })
  596. for key, name in self._fontmap.items():
  597. fullpath = findfont(name)
  598. self.fontmap[key] = fullpath
  599. self.fontmap[name] = fullpath
  600. def _get_glyph(self, fontname: str, font_class: str,
  601. sym: str) -> tuple[FT2Font, int, bool]:
  602. # Override prime symbol to use Bakoma.
  603. if sym == r'\prime':
  604. return self.bakoma._get_glyph(fontname, font_class, sym)
  605. else:
  606. # check whether the glyph is available in the display font
  607. uniindex = get_unicode_index(sym)
  608. font = self._get_font('ex')
  609. if font is not None:
  610. glyphindex = font.get_char_index(uniindex)
  611. if glyphindex != 0:
  612. return super()._get_glyph('ex', font_class, sym)
  613. # otherwise return regular glyph
  614. return super()._get_glyph(fontname, font_class, sym)
  615. class DejaVuSerifFonts(DejaVuFonts):
  616. """
  617. A font handling class for the DejaVu Serif fonts
  618. If a glyph is not found it will fallback to Stix Serif
  619. """
  620. _fontmap = {
  621. 'rm': 'DejaVu Serif',
  622. 'it': 'DejaVu Serif:italic',
  623. 'bf': 'DejaVu Serif:weight=bold',
  624. 'bfit': 'DejaVu Serif:italic:bold',
  625. 'sf': 'DejaVu Sans',
  626. 'tt': 'DejaVu Sans Mono',
  627. 'ex': 'DejaVu Serif Display',
  628. 0: 'DejaVu Serif',
  629. }
  630. class DejaVuSansFonts(DejaVuFonts):
  631. """
  632. A font handling class for the DejaVu Sans fonts
  633. If a glyph is not found it will fallback to Stix Sans
  634. """
  635. _fontmap = {
  636. 'rm': 'DejaVu Sans',
  637. 'it': 'DejaVu Sans:italic',
  638. 'bf': 'DejaVu Sans:weight=bold',
  639. 'bfit': 'DejaVu Sans:italic:bold',
  640. 'sf': 'DejaVu Sans',
  641. 'tt': 'DejaVu Sans Mono',
  642. 'ex': 'DejaVu Sans Display',
  643. 0: 'DejaVu Sans',
  644. }
  645. class StixFonts(UnicodeFonts):
  646. """
  647. A font handling class for the STIX fonts.
  648. In addition to what UnicodeFonts provides, this class:
  649. - supports "virtual fonts" which are complete alpha numeric
  650. character sets with different font styles at special Unicode
  651. code points, such as "Blackboard".
  652. - handles sized alternative characters for the STIXSizeX fonts.
  653. """
  654. _fontmap: dict[str | int, str] = {
  655. 'rm': 'STIXGeneral',
  656. 'it': 'STIXGeneral:italic',
  657. 'bf': 'STIXGeneral:weight=bold',
  658. 'bfit': 'STIXGeneral:italic:bold',
  659. 'nonunirm': 'STIXNonUnicode',
  660. 'nonuniit': 'STIXNonUnicode:italic',
  661. 'nonunibf': 'STIXNonUnicode:weight=bold',
  662. 0: 'STIXGeneral',
  663. 1: 'STIXSizeOneSym',
  664. 2: 'STIXSizeTwoSym',
  665. 3: 'STIXSizeThreeSym',
  666. 4: 'STIXSizeFourSym',
  667. 5: 'STIXSizeFiveSym',
  668. }
  669. _fallback_font = None
  670. _sans = False
  671. def __init__(self, default_font_prop: FontProperties, load_glyph_flags: int):
  672. TruetypeFonts.__init__(self, default_font_prop, load_glyph_flags)
  673. for key, name in self._fontmap.items():
  674. fullpath = findfont(name)
  675. self.fontmap[key] = fullpath
  676. self.fontmap[name] = fullpath
  677. def _map_virtual_font(self, fontname: str, font_class: str,
  678. uniindex: int) -> tuple[str, int]:
  679. # Handle these "fonts" that are actually embedded in
  680. # other fonts.
  681. font_mapping = stix_virtual_fonts.get(fontname)
  682. if (self._sans and font_mapping is None
  683. and fontname not in ('regular', 'default')):
  684. font_mapping = stix_virtual_fonts['sf']
  685. doing_sans_conversion = True
  686. else:
  687. doing_sans_conversion = False
  688. if isinstance(font_mapping, dict):
  689. try:
  690. mapping = font_mapping[font_class]
  691. except KeyError:
  692. mapping = font_mapping['rm']
  693. elif isinstance(font_mapping, list):
  694. mapping = font_mapping
  695. else:
  696. mapping = None
  697. if mapping is not None:
  698. # Binary search for the source glyph
  699. lo = 0
  700. hi = len(mapping)
  701. while lo < hi:
  702. mid = (lo+hi)//2
  703. range = mapping[mid]
  704. if uniindex < range[0]:
  705. hi = mid
  706. elif uniindex <= range[1]:
  707. break
  708. else:
  709. lo = mid + 1
  710. if range[0] <= uniindex <= range[1]:
  711. uniindex = uniindex - range[0] + range[3]
  712. fontname = range[2]
  713. elif not doing_sans_conversion:
  714. # This will generate a dummy character
  715. uniindex = 0x1
  716. fontname = mpl.rcParams['mathtext.default']
  717. # Fix some incorrect glyphs.
  718. if fontname in ('rm', 'it'):
  719. uniindex = stix_glyph_fixes.get(uniindex, uniindex)
  720. # Handle private use area glyphs
  721. if fontname in ('it', 'rm', 'bf', 'bfit') and 0xe000 <= uniindex <= 0xf8ff:
  722. fontname = 'nonuni' + fontname
  723. return fontname, uniindex
  724. @functools.cache
  725. def get_sized_alternatives_for_symbol( # type: ignore[override]
  726. self,
  727. fontname: str,
  728. sym: str) -> list[tuple[str, str]] | list[tuple[int, str]]:
  729. fixes = {
  730. '\\{': '{', '\\}': '}', '\\[': '[', '\\]': ']',
  731. '<': '\N{MATHEMATICAL LEFT ANGLE BRACKET}',
  732. '>': '\N{MATHEMATICAL RIGHT ANGLE BRACKET}',
  733. }
  734. sym = fixes.get(sym, sym)
  735. try:
  736. uniindex = get_unicode_index(sym)
  737. except ValueError:
  738. return [(fontname, sym)]
  739. alternatives = [(i, chr(uniindex)) for i in range(6)
  740. if self._get_font(i).get_char_index(uniindex) != 0]
  741. # The largest size of the radical symbol in STIX has incorrect
  742. # metrics that cause it to be disconnected from the stem.
  743. if sym == r'\__sqrt__':
  744. alternatives = alternatives[:-1]
  745. return alternatives
  746. class StixSansFonts(StixFonts):
  747. """
  748. A font handling class for the STIX fonts (that uses sans-serif
  749. characters by default).
  750. """
  751. _sans = True
  752. ##############################################################################
  753. # TeX-LIKE BOX MODEL
  754. # The following is based directly on the document 'woven' from the
  755. # TeX82 source code. This information is also available in printed
  756. # form:
  757. #
  758. # Knuth, Donald E.. 1986. Computers and Typesetting, Volume B:
  759. # TeX: The Program. Addison-Wesley Professional.
  760. #
  761. # The most relevant "chapters" are:
  762. # Data structures for boxes and their friends
  763. # Shipping pages out (ship())
  764. # Packaging (hpack() and vpack())
  765. # Data structures for math mode
  766. # Subroutines for math mode
  767. # Typesetting math formulas
  768. #
  769. # Many of the docstrings below refer to a numbered "node" in that
  770. # book, e.g., node123
  771. #
  772. # Note that (as TeX) y increases downward, unlike many other parts of
  773. # matplotlib.
  774. # How much text shrinks when going to the next-smallest level.
  775. SHRINK_FACTOR = 0.7
  776. # The number of different sizes of chars to use, beyond which they will not
  777. # get any smaller
  778. NUM_SIZE_LEVELS = 6
  779. class FontConstantsBase:
  780. """
  781. A set of constants that controls how certain things, such as sub-
  782. and superscripts are laid out. These are all metrics that can't
  783. be reliably retrieved from the font metrics in the font itself.
  784. """
  785. # Percentage of x-height of additional horiz. space after sub/superscripts
  786. script_space: T.ClassVar[float] = 0.05
  787. # Percentage of x-height that sub/superscripts drop below the baseline
  788. subdrop: T.ClassVar[float] = 0.4
  789. # Percentage of x-height that superscripts are raised from the baseline
  790. sup1: T.ClassVar[float] = 0.7
  791. # Percentage of x-height that subscripts drop below the baseline
  792. sub1: T.ClassVar[float] = 0.3
  793. # Percentage of x-height that subscripts drop below the baseline when a
  794. # superscript is present
  795. sub2: T.ClassVar[float] = 0.5
  796. # Percentage of x-height that sub/superscripts are offset relative to the
  797. # nucleus edge for non-slanted nuclei
  798. delta: T.ClassVar[float] = 0.025
  799. # Additional percentage of last character height above 2/3 of the
  800. # x-height that superscripts are offset relative to the subscript
  801. # for slanted nuclei
  802. delta_slanted: T.ClassVar[float] = 0.2
  803. # Percentage of x-height that superscripts and subscripts are offset for
  804. # integrals
  805. delta_integral: T.ClassVar[float] = 0.1
  806. class ComputerModernFontConstants(FontConstantsBase):
  807. script_space = 0.075
  808. subdrop = 0.2
  809. sup1 = 0.45
  810. sub1 = 0.2
  811. sub2 = 0.3
  812. delta = 0.075
  813. delta_slanted = 0.3
  814. delta_integral = 0.3
  815. class STIXFontConstants(FontConstantsBase):
  816. script_space = 0.1
  817. sup1 = 0.8
  818. sub2 = 0.6
  819. delta = 0.05
  820. delta_slanted = 0.3
  821. delta_integral = 0.3
  822. class STIXSansFontConstants(FontConstantsBase):
  823. script_space = 0.05
  824. sup1 = 0.8
  825. delta_slanted = 0.6
  826. delta_integral = 0.3
  827. class DejaVuSerifFontConstants(FontConstantsBase):
  828. pass
  829. class DejaVuSansFontConstants(FontConstantsBase):
  830. pass
  831. # Maps font family names to the FontConstantBase subclass to use
  832. _font_constant_mapping = {
  833. 'DejaVu Sans': DejaVuSansFontConstants,
  834. 'DejaVu Sans Mono': DejaVuSansFontConstants,
  835. 'DejaVu Serif': DejaVuSerifFontConstants,
  836. 'cmb10': ComputerModernFontConstants,
  837. 'cmex10': ComputerModernFontConstants,
  838. 'cmmi10': ComputerModernFontConstants,
  839. 'cmr10': ComputerModernFontConstants,
  840. 'cmss10': ComputerModernFontConstants,
  841. 'cmsy10': ComputerModernFontConstants,
  842. 'cmtt10': ComputerModernFontConstants,
  843. 'STIXGeneral': STIXFontConstants,
  844. 'STIXNonUnicode': STIXFontConstants,
  845. 'STIXSizeFiveSym': STIXFontConstants,
  846. 'STIXSizeFourSym': STIXFontConstants,
  847. 'STIXSizeThreeSym': STIXFontConstants,
  848. 'STIXSizeTwoSym': STIXFontConstants,
  849. 'STIXSizeOneSym': STIXFontConstants,
  850. # Map the fonts we used to ship, just for good measure
  851. 'Bitstream Vera Sans': DejaVuSansFontConstants,
  852. 'Bitstream Vera': DejaVuSansFontConstants,
  853. }
  854. def _get_font_constant_set(state: ParserState) -> type[FontConstantsBase]:
  855. constants = _font_constant_mapping.get(
  856. state.fontset._get_font(state.font).family_name, FontConstantsBase)
  857. # STIX sans isn't really its own fonts, just different code points
  858. # in the STIX fonts, so we have to detect this one separately.
  859. if (constants is STIXFontConstants and
  860. isinstance(state.fontset, StixSansFonts)):
  861. return STIXSansFontConstants
  862. return constants
  863. class Node:
  864. """A node in the TeX box model."""
  865. def __init__(self) -> None:
  866. self.size = 0
  867. def __repr__(self) -> str:
  868. return type(self).__name__
  869. def get_kerning(self, next: Node | None) -> float:
  870. return 0.0
  871. def shrink(self) -> None:
  872. """
  873. Shrinks one level smaller. There are only three levels of
  874. sizes, after which things will no longer get smaller.
  875. """
  876. self.size += 1
  877. def render(self, output: Output, x: float, y: float) -> None:
  878. """Render this node."""
  879. class Box(Node):
  880. """A node with a physical location."""
  881. def __init__(self, width: float, height: float, depth: float) -> None:
  882. super().__init__()
  883. self.width = width
  884. self.height = height
  885. self.depth = depth
  886. def shrink(self) -> None:
  887. super().shrink()
  888. if self.size < NUM_SIZE_LEVELS:
  889. self.width *= SHRINK_FACTOR
  890. self.height *= SHRINK_FACTOR
  891. self.depth *= SHRINK_FACTOR
  892. def render(self, output: Output, # type: ignore[override]
  893. x1: float, y1: float, x2: float, y2: float) -> None:
  894. pass
  895. class Vbox(Box):
  896. """A box with only height (zero width)."""
  897. def __init__(self, height: float, depth: float):
  898. super().__init__(0., height, depth)
  899. class Hbox(Box):
  900. """A box with only width (zero height and depth)."""
  901. def __init__(self, width: float):
  902. super().__init__(width, 0., 0.)
  903. class Char(Node):
  904. """
  905. A single character.
  906. Unlike TeX, the font information and metrics are stored with each `Char`
  907. to make it easier to lookup the font metrics when needed. Note that TeX
  908. boxes have a width, height, and depth, unlike Type1 and TrueType which use
  909. a full bounding box and an advance in the x-direction. The metrics must
  910. be converted to the TeX model, and the advance (if different from width)
  911. must be converted into a `Kern` node when the `Char` is added to its parent
  912. `Hlist`.
  913. """
  914. def __init__(self, c: str, state: ParserState):
  915. super().__init__()
  916. self.c = c
  917. self.fontset = state.fontset
  918. self.font = state.font
  919. self.font_class = state.font_class
  920. self.fontsize = state.fontsize
  921. self.dpi = state.dpi
  922. # The real width, height and depth will be set during the
  923. # pack phase, after we know the real fontsize
  924. self._update_metrics()
  925. def __repr__(self) -> str:
  926. return '`%s`' % self.c
  927. def _update_metrics(self) -> None:
  928. metrics = self._metrics = self.fontset.get_metrics(
  929. self.font, self.font_class, self.c, self.fontsize, self.dpi)
  930. if self.c == ' ':
  931. self.width = metrics.advance
  932. else:
  933. self.width = metrics.width
  934. self.height = metrics.iceberg
  935. self.depth = -(metrics.iceberg - metrics.height)
  936. def is_slanted(self) -> bool:
  937. return self._metrics.slanted
  938. def get_kerning(self, next: Node | None) -> float:
  939. """
  940. Return the amount of kerning between this and the given character.
  941. This method is called when characters are strung together into `Hlist`
  942. to create `Kern` nodes.
  943. """
  944. advance = self._metrics.advance - self.width
  945. kern = 0.
  946. if isinstance(next, Char):
  947. kern = self.fontset.get_kern(
  948. self.font, self.font_class, self.c, self.fontsize,
  949. next.font, next.font_class, next.c, next.fontsize,
  950. self.dpi)
  951. return advance + kern
  952. def render(self, output: Output, x: float, y: float) -> None:
  953. self.fontset.render_glyph(
  954. output, x, y,
  955. self.font, self.font_class, self.c, self.fontsize, self.dpi)
  956. def shrink(self) -> None:
  957. super().shrink()
  958. if self.size < NUM_SIZE_LEVELS:
  959. self.fontsize *= SHRINK_FACTOR
  960. self.width *= SHRINK_FACTOR
  961. self.height *= SHRINK_FACTOR
  962. self.depth *= SHRINK_FACTOR
  963. class Accent(Char):
  964. """
  965. The font metrics need to be dealt with differently for accents,
  966. since they are already offset correctly from the baseline in
  967. TrueType fonts.
  968. """
  969. def _update_metrics(self) -> None:
  970. metrics = self._metrics = self.fontset.get_metrics(
  971. self.font, self.font_class, self.c, self.fontsize, self.dpi)
  972. self.width = metrics.xmax - metrics.xmin
  973. self.height = metrics.ymax - metrics.ymin
  974. self.depth = 0
  975. def shrink(self) -> None:
  976. super().shrink()
  977. self._update_metrics()
  978. def render(self, output: Output, x: float, y: float) -> None:
  979. self.fontset.render_glyph(
  980. output, x - self._metrics.xmin, y + self._metrics.ymin,
  981. self.font, self.font_class, self.c, self.fontsize, self.dpi)
  982. class List(Box):
  983. """A list of nodes (either horizontal or vertical)."""
  984. def __init__(self, elements: T.Sequence[Node]):
  985. super().__init__(0., 0., 0.)
  986. self.shift_amount = 0. # An arbitrary offset
  987. self.children = [*elements] # The child nodes of this list
  988. # The following parameters are set in the vpack and hpack functions
  989. self.glue_set = 0. # The glue setting of this list
  990. self.glue_sign = 0 # 0: normal, -1: shrinking, 1: stretching
  991. self.glue_order = 0 # The order of infinity (0 - 3) for the glue
  992. def __repr__(self) -> str:
  993. return '{}<w={:.02f} h={:.02f} d={:.02f} s={:.02f}>[{}]'.format(
  994. super().__repr__(),
  995. self.width, self.height,
  996. self.depth, self.shift_amount,
  997. ', '.join([repr(x) for x in self.children]))
  998. def _set_glue(self, x: float, sign: int, totals: list[float],
  999. error_type: str) -> None:
  1000. self.glue_order = o = next(
  1001. # Highest order of glue used by the members of this list.
  1002. (i for i in range(len(totals))[::-1] if totals[i] != 0), 0)
  1003. self.glue_sign = sign
  1004. if totals[o] != 0.:
  1005. self.glue_set = x / totals[o]
  1006. else:
  1007. self.glue_sign = 0
  1008. self.glue_ratio = 0.
  1009. if o == 0:
  1010. if len(self.children):
  1011. _log.warning("%s %s: %r",
  1012. error_type, type(self).__name__, self)
  1013. def shrink(self) -> None:
  1014. for child in self.children:
  1015. child.shrink()
  1016. super().shrink()
  1017. if self.size < NUM_SIZE_LEVELS:
  1018. self.shift_amount *= SHRINK_FACTOR
  1019. self.glue_set *= SHRINK_FACTOR
  1020. class Hlist(List):
  1021. """A horizontal list of boxes."""
  1022. def __init__(self, elements: T.Sequence[Node], w: float = 0.0,
  1023. m: T.Literal['additional', 'exactly'] = 'additional',
  1024. do_kern: bool = True):
  1025. super().__init__(elements)
  1026. if do_kern:
  1027. self.kern()
  1028. self.hpack(w=w, m=m)
  1029. def kern(self) -> None:
  1030. """
  1031. Insert `Kern` nodes between `Char` nodes to set kerning.
  1032. The `Char` nodes themselves determine the amount of kerning they need
  1033. (in `~Char.get_kerning`), and this function just creates the correct
  1034. linked list.
  1035. """
  1036. new_children = []
  1037. num_children = len(self.children)
  1038. if num_children:
  1039. for i in range(num_children):
  1040. elem = self.children[i]
  1041. if i < num_children - 1:
  1042. next = self.children[i + 1]
  1043. else:
  1044. next = None
  1045. new_children.append(elem)
  1046. kerning_distance = elem.get_kerning(next)
  1047. if kerning_distance != 0.:
  1048. kern = Kern(kerning_distance)
  1049. new_children.append(kern)
  1050. self.children = new_children
  1051. def hpack(self, w: float = 0.0,
  1052. m: T.Literal['additional', 'exactly'] = 'additional') -> None:
  1053. r"""
  1054. Compute the dimensions of the resulting boxes, and adjust the glue if
  1055. one of those dimensions is pre-specified. The computed sizes normally
  1056. enclose all of the material inside the new box; but some items may
  1057. stick out if negative glue is used, if the box is overfull, or if a
  1058. ``\vbox`` includes other boxes that have been shifted left.
  1059. Parameters
  1060. ----------
  1061. w : float, default: 0
  1062. A width.
  1063. m : {'exactly', 'additional'}, default: 'additional'
  1064. Whether to produce a box whose width is 'exactly' *w*; or a box
  1065. with the natural width of the contents, plus *w* ('additional').
  1066. Notes
  1067. -----
  1068. The defaults produce a box with the natural width of the contents.
  1069. """
  1070. # I don't know why these get reset in TeX. Shift_amount is pretty
  1071. # much useless if we do.
  1072. # self.shift_amount = 0.
  1073. h = 0.
  1074. d = 0.
  1075. x = 0.
  1076. total_stretch = [0.] * 4
  1077. total_shrink = [0.] * 4
  1078. for p in self.children:
  1079. if isinstance(p, Char):
  1080. x += p.width
  1081. h = max(h, p.height)
  1082. d = max(d, p.depth)
  1083. elif isinstance(p, Box):
  1084. x += p.width
  1085. if not np.isinf(p.height) and not np.isinf(p.depth):
  1086. s = getattr(p, 'shift_amount', 0.)
  1087. h = max(h, p.height - s)
  1088. d = max(d, p.depth + s)
  1089. elif isinstance(p, Glue):
  1090. glue_spec = p.glue_spec
  1091. x += glue_spec.width
  1092. total_stretch[glue_spec.stretch_order] += glue_spec.stretch
  1093. total_shrink[glue_spec.shrink_order] += glue_spec.shrink
  1094. elif isinstance(p, Kern):
  1095. x += p.width
  1096. self.height = h
  1097. self.depth = d
  1098. if m == 'additional':
  1099. w += x
  1100. self.width = w
  1101. x = w - x
  1102. if x == 0.:
  1103. self.glue_sign = 0
  1104. self.glue_order = 0
  1105. self.glue_ratio = 0.
  1106. return
  1107. if x > 0.:
  1108. self._set_glue(x, 1, total_stretch, "Overful")
  1109. else:
  1110. self._set_glue(x, -1, total_shrink, "Underful")
  1111. class Vlist(List):
  1112. """A vertical list of boxes."""
  1113. def __init__(self, elements: T.Sequence[Node], h: float = 0.0,
  1114. m: T.Literal['additional', 'exactly'] = 'additional'):
  1115. super().__init__(elements)
  1116. self.vpack(h=h, m=m)
  1117. def vpack(self, h: float = 0.0,
  1118. m: T.Literal['additional', 'exactly'] = 'additional',
  1119. l: float = np.inf) -> None:
  1120. """
  1121. Compute the dimensions of the resulting boxes, and to adjust the glue
  1122. if one of those dimensions is pre-specified.
  1123. Parameters
  1124. ----------
  1125. h : float, default: 0
  1126. A height.
  1127. m : {'exactly', 'additional'}, default: 'additional'
  1128. Whether to produce a box whose height is 'exactly' *h*; or a box
  1129. with the natural height of the contents, plus *h* ('additional').
  1130. l : float, default: np.inf
  1131. The maximum height.
  1132. Notes
  1133. -----
  1134. The defaults produce a box with the natural height of the contents.
  1135. """
  1136. # I don't know why these get reset in TeX. Shift_amount is pretty
  1137. # much useless if we do.
  1138. # self.shift_amount = 0.
  1139. w = 0.
  1140. d = 0.
  1141. x = 0.
  1142. total_stretch = [0.] * 4
  1143. total_shrink = [0.] * 4
  1144. for p in self.children:
  1145. if isinstance(p, Box):
  1146. x += d + p.height
  1147. d = p.depth
  1148. if not np.isinf(p.width):
  1149. s = getattr(p, 'shift_amount', 0.)
  1150. w = max(w, p.width + s)
  1151. elif isinstance(p, Glue):
  1152. x += d
  1153. d = 0.
  1154. glue_spec = p.glue_spec
  1155. x += glue_spec.width
  1156. total_stretch[glue_spec.stretch_order] += glue_spec.stretch
  1157. total_shrink[glue_spec.shrink_order] += glue_spec.shrink
  1158. elif isinstance(p, Kern):
  1159. x += d + p.width
  1160. d = 0.
  1161. elif isinstance(p, Char):
  1162. raise RuntimeError(
  1163. "Internal mathtext error: Char node found in Vlist")
  1164. self.width = w
  1165. if d > l:
  1166. x += d - l
  1167. self.depth = l
  1168. else:
  1169. self.depth = d
  1170. if m == 'additional':
  1171. h += x
  1172. self.height = h
  1173. x = h - x
  1174. if x == 0:
  1175. self.glue_sign = 0
  1176. self.glue_order = 0
  1177. self.glue_ratio = 0.
  1178. return
  1179. if x > 0.:
  1180. self._set_glue(x, 1, total_stretch, "Overful")
  1181. else:
  1182. self._set_glue(x, -1, total_shrink, "Underful")
  1183. class Rule(Box):
  1184. """
  1185. A solid black rectangle.
  1186. It has *width*, *depth*, and *height* fields just as in an `Hlist`.
  1187. However, if any of these dimensions is inf, the actual value will be
  1188. determined by running the rule up to the boundary of the innermost
  1189. enclosing box. This is called a "running dimension". The width is never
  1190. running in an `Hlist`; the height and depth are never running in a `Vlist`.
  1191. """
  1192. def __init__(self, width: float, height: float, depth: float, state: ParserState):
  1193. super().__init__(width, height, depth)
  1194. self.fontset = state.fontset
  1195. def render(self, output: Output, # type: ignore[override]
  1196. x: float, y: float, w: float, h: float) -> None:
  1197. self.fontset.render_rect_filled(output, x, y, x + w, y + h)
  1198. class Hrule(Rule):
  1199. """Convenience class to create a horizontal rule."""
  1200. def __init__(self, state: ParserState, thickness: float | None = None):
  1201. if thickness is None:
  1202. thickness = state.get_current_underline_thickness()
  1203. height = depth = thickness * 0.5
  1204. super().__init__(np.inf, height, depth, state)
  1205. class Vrule(Rule):
  1206. """Convenience class to create a vertical rule."""
  1207. def __init__(self, state: ParserState):
  1208. thickness = state.get_current_underline_thickness()
  1209. super().__init__(thickness, np.inf, np.inf, state)
  1210. class _GlueSpec(NamedTuple):
  1211. width: float
  1212. stretch: float
  1213. stretch_order: int
  1214. shrink: float
  1215. shrink_order: int
  1216. _GlueSpec._named = { # type: ignore[attr-defined]
  1217. 'fil': _GlueSpec(0., 1., 1, 0., 0),
  1218. 'fill': _GlueSpec(0., 1., 2, 0., 0),
  1219. 'filll': _GlueSpec(0., 1., 3, 0., 0),
  1220. 'neg_fil': _GlueSpec(0., 0., 0, 1., 1),
  1221. 'neg_fill': _GlueSpec(0., 0., 0, 1., 2),
  1222. 'neg_filll': _GlueSpec(0., 0., 0, 1., 3),
  1223. 'empty': _GlueSpec(0., 0., 0, 0., 0),
  1224. 'ss': _GlueSpec(0., 1., 1, -1., 1),
  1225. }
  1226. class Glue(Node):
  1227. """
  1228. Most of the information in this object is stored in the underlying
  1229. ``_GlueSpec`` class, which is shared between multiple glue objects.
  1230. (This is a memory optimization which probably doesn't matter anymore, but
  1231. it's easier to stick to what TeX does.)
  1232. """
  1233. def __init__(self,
  1234. glue_type: _GlueSpec | T.Literal["fil", "fill", "filll",
  1235. "neg_fil", "neg_fill", "neg_filll",
  1236. "empty", "ss"]):
  1237. super().__init__()
  1238. if isinstance(glue_type, str):
  1239. glue_spec = _GlueSpec._named[glue_type] # type: ignore[attr-defined]
  1240. elif isinstance(glue_type, _GlueSpec):
  1241. glue_spec = glue_type
  1242. else:
  1243. raise ValueError("glue_type must be a glue spec name or instance")
  1244. self.glue_spec = glue_spec
  1245. def shrink(self) -> None:
  1246. super().shrink()
  1247. if self.size < NUM_SIZE_LEVELS:
  1248. g = self.glue_spec
  1249. self.glue_spec = g._replace(width=g.width * SHRINK_FACTOR)
  1250. class HCentered(Hlist):
  1251. """
  1252. A convenience class to create an `Hlist` whose contents are
  1253. centered within its enclosing box.
  1254. """
  1255. def __init__(self, elements: list[Node]):
  1256. super().__init__([Glue('ss'), *elements, Glue('ss')], do_kern=False)
  1257. class VCentered(Vlist):
  1258. """
  1259. A convenience class to create a `Vlist` whose contents are
  1260. centered within its enclosing box.
  1261. """
  1262. def __init__(self, elements: list[Node]):
  1263. super().__init__([Glue('ss'), *elements, Glue('ss')])
  1264. class Kern(Node):
  1265. """
  1266. A `Kern` node has a width field to specify a (normally
  1267. negative) amount of spacing. This spacing correction appears in
  1268. horizontal lists between letters like A and V when the font
  1269. designer said that it looks better to move them closer together or
  1270. further apart. A kern node can also appear in a vertical list,
  1271. when its *width* denotes additional spacing in the vertical
  1272. direction.
  1273. """
  1274. height = 0
  1275. depth = 0
  1276. def __init__(self, width: float):
  1277. super().__init__()
  1278. self.width = width
  1279. def __repr__(self) -> str:
  1280. return "k%.02f" % self.width
  1281. def shrink(self) -> None:
  1282. super().shrink()
  1283. if self.size < NUM_SIZE_LEVELS:
  1284. self.width *= SHRINK_FACTOR
  1285. class AutoHeightChar(Hlist):
  1286. """
  1287. A character as close to the given height and depth as possible.
  1288. When using a font with multiple height versions of some characters (such as
  1289. the BaKoMa fonts), the correct glyph will be selected, otherwise this will
  1290. always just return a scaled version of the glyph.
  1291. """
  1292. def __init__(self, c: str, height: float, depth: float, state: ParserState,
  1293. always: bool = False, factor: float | None = None):
  1294. alternatives = state.fontset.get_sized_alternatives_for_symbol(
  1295. state.font, c)
  1296. xHeight = state.fontset.get_xheight(
  1297. state.font, state.fontsize, state.dpi)
  1298. state = state.copy()
  1299. target_total = height + depth
  1300. for fontname, sym in alternatives:
  1301. state.font = fontname
  1302. char = Char(sym, state)
  1303. # Ensure that size 0 is chosen when the text is regular sized but
  1304. # with descender glyphs by subtracting 0.2 * xHeight
  1305. if char.height + char.depth >= target_total - 0.2 * xHeight:
  1306. break
  1307. shift = 0.0
  1308. if state.font != 0 or len(alternatives) == 1:
  1309. if factor is None:
  1310. factor = target_total / (char.height + char.depth)
  1311. state.fontsize *= factor
  1312. char = Char(sym, state)
  1313. shift = (depth - char.depth)
  1314. super().__init__([char])
  1315. self.shift_amount = shift
  1316. class AutoWidthChar(Hlist):
  1317. """
  1318. A character as close to the given width as possible.
  1319. When using a font with multiple width versions of some characters (such as
  1320. the BaKoMa fonts), the correct glyph will be selected, otherwise this will
  1321. always just return a scaled version of the glyph.
  1322. """
  1323. def __init__(self, c: str, width: float, state: ParserState, always: bool = False,
  1324. char_class: type[Char] = Char):
  1325. alternatives = state.fontset.get_sized_alternatives_for_symbol(
  1326. state.font, c)
  1327. state = state.copy()
  1328. for fontname, sym in alternatives:
  1329. state.font = fontname
  1330. char = char_class(sym, state)
  1331. if char.width >= width:
  1332. break
  1333. factor = width / char.width
  1334. state.fontsize *= factor
  1335. char = char_class(sym, state)
  1336. super().__init__([char])
  1337. self.width = char.width
  1338. def ship(box: Box, xy: tuple[float, float] = (0, 0)) -> Output:
  1339. """
  1340. Ship out *box* at offset *xy*, converting it to an `Output`.
  1341. Since boxes can be inside of boxes inside of boxes, the main work of `ship`
  1342. is done by two mutually recursive routines, `hlist_out` and `vlist_out`,
  1343. which traverse the `Hlist` nodes and `Vlist` nodes inside of horizontal
  1344. and vertical boxes. The global variables used in TeX to store state as it
  1345. processes have become local variables here.
  1346. """
  1347. ox, oy = xy
  1348. cur_v = 0.
  1349. cur_h = 0.
  1350. off_h = ox
  1351. off_v = oy + box.height
  1352. output = Output(box)
  1353. def clamp(value: float) -> float:
  1354. return -1e9 if value < -1e9 else +1e9 if value > +1e9 else value
  1355. def hlist_out(box: Hlist) -> None:
  1356. nonlocal cur_v, cur_h, off_h, off_v
  1357. cur_g = 0
  1358. cur_glue = 0.
  1359. glue_order = box.glue_order
  1360. glue_sign = box.glue_sign
  1361. base_line = cur_v
  1362. left_edge = cur_h
  1363. for p in box.children:
  1364. if isinstance(p, Char):
  1365. p.render(output, cur_h + off_h, cur_v + off_v)
  1366. cur_h += p.width
  1367. elif isinstance(p, Kern):
  1368. cur_h += p.width
  1369. elif isinstance(p, List):
  1370. # node623
  1371. if len(p.children) == 0:
  1372. cur_h += p.width
  1373. else:
  1374. edge = cur_h
  1375. cur_v = base_line + p.shift_amount
  1376. if isinstance(p, Hlist):
  1377. hlist_out(p)
  1378. elif isinstance(p, Vlist):
  1379. # p.vpack(box.height + box.depth, 'exactly')
  1380. vlist_out(p)
  1381. else:
  1382. assert False, "unreachable code"
  1383. cur_h = edge + p.width
  1384. cur_v = base_line
  1385. elif isinstance(p, Box):
  1386. # node624
  1387. rule_height = p.height
  1388. rule_depth = p.depth
  1389. rule_width = p.width
  1390. if np.isinf(rule_height):
  1391. rule_height = box.height
  1392. if np.isinf(rule_depth):
  1393. rule_depth = box.depth
  1394. if rule_height > 0 and rule_width > 0:
  1395. cur_v = base_line + rule_depth
  1396. p.render(output,
  1397. cur_h + off_h, cur_v + off_v,
  1398. rule_width, rule_height)
  1399. cur_v = base_line
  1400. cur_h += rule_width
  1401. elif isinstance(p, Glue):
  1402. # node625
  1403. glue_spec = p.glue_spec
  1404. rule_width = glue_spec.width - cur_g
  1405. if glue_sign != 0: # normal
  1406. if glue_sign == 1: # stretching
  1407. if glue_spec.stretch_order == glue_order:
  1408. cur_glue += glue_spec.stretch
  1409. cur_g = round(clamp(box.glue_set * cur_glue))
  1410. elif glue_spec.shrink_order == glue_order:
  1411. cur_glue += glue_spec.shrink
  1412. cur_g = round(clamp(box.glue_set * cur_glue))
  1413. rule_width += cur_g
  1414. cur_h += rule_width
  1415. def vlist_out(box: Vlist) -> None:
  1416. nonlocal cur_v, cur_h, off_h, off_v
  1417. cur_g = 0
  1418. cur_glue = 0.
  1419. glue_order = box.glue_order
  1420. glue_sign = box.glue_sign
  1421. left_edge = cur_h
  1422. cur_v -= box.height
  1423. top_edge = cur_v
  1424. for p in box.children:
  1425. if isinstance(p, Kern):
  1426. cur_v += p.width
  1427. elif isinstance(p, List):
  1428. if len(p.children) == 0:
  1429. cur_v += p.height + p.depth
  1430. else:
  1431. cur_v += p.height
  1432. cur_h = left_edge + p.shift_amount
  1433. save_v = cur_v
  1434. p.width = box.width
  1435. if isinstance(p, Hlist):
  1436. hlist_out(p)
  1437. elif isinstance(p, Vlist):
  1438. vlist_out(p)
  1439. else:
  1440. assert False, "unreachable code"
  1441. cur_v = save_v + p.depth
  1442. cur_h = left_edge
  1443. elif isinstance(p, Box):
  1444. rule_height = p.height
  1445. rule_depth = p.depth
  1446. rule_width = p.width
  1447. if np.isinf(rule_width):
  1448. rule_width = box.width
  1449. rule_height += rule_depth
  1450. if rule_height > 0 and rule_depth > 0:
  1451. cur_v += rule_height
  1452. p.render(output,
  1453. cur_h + off_h, cur_v + off_v,
  1454. rule_width, rule_height)
  1455. elif isinstance(p, Glue):
  1456. glue_spec = p.glue_spec
  1457. rule_height = glue_spec.width - cur_g
  1458. if glue_sign != 0: # normal
  1459. if glue_sign == 1: # stretching
  1460. if glue_spec.stretch_order == glue_order:
  1461. cur_glue += glue_spec.stretch
  1462. cur_g = round(clamp(box.glue_set * cur_glue))
  1463. elif glue_spec.shrink_order == glue_order: # shrinking
  1464. cur_glue += glue_spec.shrink
  1465. cur_g = round(clamp(box.glue_set * cur_glue))
  1466. rule_height += cur_g
  1467. cur_v += rule_height
  1468. elif isinstance(p, Char):
  1469. raise RuntimeError(
  1470. "Internal mathtext error: Char node found in vlist")
  1471. assert isinstance(box, Hlist)
  1472. hlist_out(box)
  1473. return output
  1474. ##############################################################################
  1475. # PARSER
  1476. def Error(msg: str) -> ParserElement:
  1477. """Helper class to raise parser errors."""
  1478. def raise_error(s: str, loc: int, toks: ParseResults) -> T.Any:
  1479. raise ParseFatalException(s, loc, msg)
  1480. return Empty().setParseAction(raise_error)
  1481. class ParserState:
  1482. """
  1483. Parser state.
  1484. States are pushed and popped from a stack as necessary, and the "current"
  1485. state is always at the top of the stack.
  1486. Upon entering and leaving a group { } or math/non-math, the stack is pushed
  1487. and popped accordingly.
  1488. """
  1489. def __init__(self, fontset: Fonts, font: str, font_class: str, fontsize: float,
  1490. dpi: float):
  1491. self.fontset = fontset
  1492. self._font = font
  1493. self.font_class = font_class
  1494. self.fontsize = fontsize
  1495. self.dpi = dpi
  1496. def copy(self) -> ParserState:
  1497. return copy.copy(self)
  1498. @property
  1499. def font(self) -> str:
  1500. return self._font
  1501. @font.setter
  1502. def font(self, name: str) -> None:
  1503. if name in ('rm', 'it', 'bf', 'bfit'):
  1504. self.font_class = name
  1505. self._font = name
  1506. def get_current_underline_thickness(self) -> float:
  1507. """Return the underline thickness for this state."""
  1508. return self.fontset.get_underline_thickness(
  1509. self.font, self.fontsize, self.dpi)
  1510. def cmd(expr: str, args: ParserElement) -> ParserElement:
  1511. r"""
  1512. Helper to define TeX commands.
  1513. ``cmd("\cmd", args)`` is equivalent to
  1514. ``"\cmd" - (args | Error("Expected \cmd{arg}{...}"))`` where the names in
  1515. the error message are taken from element names in *args*. If *expr*
  1516. already includes arguments (e.g. "\cmd{arg}{...}"), then they are stripped
  1517. when constructing the parse element, but kept (and *expr* is used as is) in
  1518. the error message.
  1519. """
  1520. def names(elt: ParserElement) -> T.Generator[str, None, None]:
  1521. if isinstance(elt, ParseExpression):
  1522. for expr in elt.exprs:
  1523. yield from names(expr)
  1524. elif elt.resultsName:
  1525. yield elt.resultsName
  1526. csname = expr.split("{", 1)[0]
  1527. err = (csname + "".join("{%s}" % name for name in names(args))
  1528. if expr == csname else expr)
  1529. return csname - (args | Error(f"Expected {err}"))
  1530. class Parser:
  1531. """
  1532. A pyparsing-based parser for strings containing math expressions.
  1533. Raw text may also appear outside of pairs of ``$``.
  1534. The grammar is based directly on that in TeX, though it cuts a few corners.
  1535. """
  1536. class _MathStyle(enum.Enum):
  1537. DISPLAYSTYLE = 0
  1538. TEXTSTYLE = 1
  1539. SCRIPTSTYLE = 2
  1540. SCRIPTSCRIPTSTYLE = 3
  1541. _binary_operators = set(
  1542. '+ * - \N{MINUS SIGN}'
  1543. r'''
  1544. \pm \sqcap \rhd
  1545. \mp \sqcup \unlhd
  1546. \times \vee \unrhd
  1547. \div \wedge \oplus
  1548. \ast \setminus \ominus
  1549. \star \wr \otimes
  1550. \circ \diamond \oslash
  1551. \bullet \bigtriangleup \odot
  1552. \cdot \bigtriangledown \bigcirc
  1553. \cap \triangleleft \dagger
  1554. \cup \triangleright \ddagger
  1555. \uplus \lhd \amalg
  1556. \dotplus \dotminus \Cap
  1557. \Cup \barwedge \boxdot
  1558. \boxminus \boxplus \boxtimes
  1559. \curlyvee \curlywedge \divideontimes
  1560. \doublebarwedge \leftthreetimes \rightthreetimes
  1561. \slash \veebar \barvee
  1562. \cupdot \intercal \amalg
  1563. \circledcirc \circleddash \circledast
  1564. \boxbar \obar \merge
  1565. \minuscolon \dotsminusdots
  1566. '''.split())
  1567. _relation_symbols = set(r'''
  1568. = < > :
  1569. \leq \geq \equiv \models
  1570. \prec \succ \sim \perp
  1571. \preceq \succeq \simeq \mid
  1572. \ll \gg \asymp \parallel
  1573. \subset \supset \approx \bowtie
  1574. \subseteq \supseteq \cong \Join
  1575. \sqsubset \sqsupset \neq \smile
  1576. \sqsubseteq \sqsupseteq \doteq \frown
  1577. \in \ni \propto \vdash
  1578. \dashv \dots \doteqdot \leqq
  1579. \geqq \lneqq \gneqq \lessgtr
  1580. \leqslant \geqslant \eqgtr \eqless
  1581. \eqslantless \eqslantgtr \lesseqgtr \backsim
  1582. \backsimeq \lesssim \gtrsim \precsim
  1583. \precnsim \gnsim \lnsim \succsim
  1584. \succnsim \nsim \lesseqqgtr \gtreqqless
  1585. \gtreqless \subseteqq \supseteqq \subsetneqq
  1586. \supsetneqq \lessapprox \approxeq \gtrapprox
  1587. \precapprox \succapprox \precnapprox \succnapprox
  1588. \npreccurlyeq \nsucccurlyeq \nsqsubseteq \nsqsupseteq
  1589. \sqsubsetneq \sqsupsetneq \nlesssim \ngtrsim
  1590. \nlessgtr \ngtrless \lnapprox \gnapprox
  1591. \napprox \approxeq \approxident \lll
  1592. \ggg \nparallel \Vdash \Vvdash
  1593. \nVdash \nvdash \vDash \nvDash
  1594. \nVDash \oequal \simneqq \triangle
  1595. \triangleq \triangleeq \triangleleft
  1596. \triangleright \ntriangleleft \ntriangleright
  1597. \trianglelefteq \ntrianglelefteq \trianglerighteq
  1598. \ntrianglerighteq \blacktriangleleft \blacktriangleright
  1599. \equalparallel \measuredrightangle \varlrtriangle
  1600. \Doteq \Bumpeq \Subset \Supset
  1601. \backepsilon \because \therefore \bot
  1602. \top \bumpeq \circeq \coloneq
  1603. \curlyeqprec \curlyeqsucc \eqcirc \eqcolon
  1604. \eqsim \fallingdotseq \gtrdot \gtrless
  1605. \ltimes \rtimes \lessdot \ne
  1606. \ncong \nequiv \ngeq \ngtr
  1607. \nleq \nless \nmid \notin
  1608. \nprec \nsubset \nsubseteq \nsucc
  1609. \nsupset \nsupseteq \pitchfork \preccurlyeq
  1610. \risingdotseq \subsetneq \succcurlyeq \supsetneq
  1611. \varpropto \vartriangleleft \scurel
  1612. \vartriangleright \rightangle \equal \backcong
  1613. \eqdef \wedgeq \questeq \between
  1614. \veeeq \disin \varisins \isins
  1615. \isindot \varisinobar \isinobar \isinvb
  1616. \isinE \nisd \varnis \nis
  1617. \varniobar \niobar \bagmember \ratio
  1618. \Equiv \stareq \measeq \arceq
  1619. \rightassert \rightModels \smallin \smallowns
  1620. \notsmallowns \nsimeq'''.split())
  1621. _arrow_symbols = set(r"""
  1622. \leftarrow \longleftarrow \uparrow \Leftarrow \Longleftarrow
  1623. \Uparrow \rightarrow \longrightarrow \downarrow \Rightarrow
  1624. \Longrightarrow \Downarrow \leftrightarrow \updownarrow
  1625. \longleftrightarrow \updownarrow \Leftrightarrow
  1626. \Longleftrightarrow \Updownarrow \mapsto \longmapsto \nearrow
  1627. \hookleftarrow \hookrightarrow \searrow \leftharpoonup
  1628. \rightharpoonup \swarrow \leftharpoondown \rightharpoondown
  1629. \nwarrow \rightleftharpoons \leadsto \dashrightarrow
  1630. \dashleftarrow \leftleftarrows \leftrightarrows \Lleftarrow
  1631. \Rrightarrow \twoheadleftarrow \leftarrowtail \looparrowleft
  1632. \leftrightharpoons \curvearrowleft \circlearrowleft \Lsh
  1633. \upuparrows \upharpoonleft \downharpoonleft \multimap
  1634. \leftrightsquigarrow \rightrightarrows \rightleftarrows
  1635. \rightrightarrows \rightleftarrows \twoheadrightarrow
  1636. \rightarrowtail \looparrowright \rightleftharpoons
  1637. \curvearrowright \circlearrowright \Rsh \downdownarrows
  1638. \upharpoonright \downharpoonright \rightsquigarrow \nleftarrow
  1639. \nrightarrow \nLeftarrow \nRightarrow \nleftrightarrow
  1640. \nLeftrightarrow \to \Swarrow \Searrow \Nwarrow \Nearrow
  1641. \leftsquigarrow \overleftarrow \overleftrightarrow \cwopencirclearrow
  1642. \downzigzagarrow \cupleftarrow \rightzigzagarrow \twoheaddownarrow
  1643. \updownarrowbar \twoheaduparrow \rightarrowbar \updownarrows
  1644. \barleftarrow \mapsfrom \mapsdown \mapsup \Ldsh \Rdsh
  1645. """.split())
  1646. _spaced_symbols = _binary_operators | _relation_symbols | _arrow_symbols
  1647. _punctuation_symbols = set(r', ; . ! \ldotp \cdotp'.split())
  1648. _overunder_symbols = set(r'''
  1649. \sum \prod \coprod \bigcap \bigcup \bigsqcup \bigvee
  1650. \bigwedge \bigodot \bigotimes \bigoplus \biguplus
  1651. '''.split())
  1652. _overunder_functions = set("lim liminf limsup sup max min".split())
  1653. _dropsub_symbols = set(r'\int \oint \iint \oiint \iiint \oiiint \iiiint'.split())
  1654. _fontnames = set("rm cal it tt sf bf bfit "
  1655. "default bb frak scr regular".split())
  1656. _function_names = set("""
  1657. arccos csc ker min arcsin deg lg Pr arctan det lim sec arg dim
  1658. liminf sin cos exp limsup sinh cosh gcd ln sup cot hom log tan
  1659. coth inf max tanh""".split())
  1660. _ambi_delims = set(r"""
  1661. | \| / \backslash \uparrow \downarrow \updownarrow \Uparrow
  1662. \Downarrow \Updownarrow . \vert \Vert""".split())
  1663. _left_delims = set(r"""
  1664. ( [ \{ < \lfloor \langle \lceil \lbrace \leftbrace \lbrack \leftparen \lgroup
  1665. """.split())
  1666. _right_delims = set(r"""
  1667. ) ] \} > \rfloor \rangle \rceil \rbrace \rightbrace \rbrack \rightparen \rgroup
  1668. """.split())
  1669. _delims = _left_delims | _right_delims | _ambi_delims
  1670. _small_greek = set([unicodedata.name(chr(i)).split()[-1].lower() for i in
  1671. range(ord('\N{GREEK SMALL LETTER ALPHA}'),
  1672. ord('\N{GREEK SMALL LETTER OMEGA}') + 1)])
  1673. _latin_alphabets = set(string.ascii_letters)
  1674. def __init__(self) -> None:
  1675. p = types.SimpleNamespace()
  1676. def set_names_and_parse_actions() -> None:
  1677. for key, val in vars(p).items():
  1678. if not key.startswith('_'):
  1679. # Set names on (almost) everything -- very useful for debugging
  1680. # token, placeable, and auto_delim are forward references which
  1681. # are left without names to ensure useful error messages
  1682. if key not in ("token", "placeable", "auto_delim"):
  1683. val.setName(key)
  1684. # Set actions
  1685. if hasattr(self, key):
  1686. val.setParseAction(getattr(self, key))
  1687. # Root definitions.
  1688. # In TeX parlance, a csname is a control sequence name (a "\foo").
  1689. def csnames(group: str, names: Iterable[str]) -> Regex:
  1690. ends_with_alpha = []
  1691. ends_with_nonalpha = []
  1692. for name in names:
  1693. if name[-1].isalpha():
  1694. ends_with_alpha.append(name)
  1695. else:
  1696. ends_with_nonalpha.append(name)
  1697. return Regex(
  1698. r"\\(?P<{group}>(?:{alpha})(?![A-Za-z]){additional}{nonalpha})".format(
  1699. group=group,
  1700. alpha="|".join(map(re.escape, ends_with_alpha)),
  1701. additional="|" if ends_with_nonalpha else "",
  1702. nonalpha="|".join(map(re.escape, ends_with_nonalpha)),
  1703. )
  1704. )
  1705. p.float_literal = Regex(r"[-+]?([0-9]+\.?[0-9]*|\.[0-9]+)")
  1706. p.space = oneOf(self._space_widths)("space")
  1707. p.style_literal = oneOf(
  1708. [str(e.value) for e in self._MathStyle])("style_literal")
  1709. p.symbol = Regex(
  1710. r"[a-zA-Z0-9 +\-*/<>=:,.;!\?&'@()\[\]|\U00000080-\U0001ffff]"
  1711. r"|\\[%${}\[\]_|]"
  1712. + r"|\\(?:{})(?![A-Za-z])".format(
  1713. "|".join(map(re.escape, tex2uni)))
  1714. )("sym").leaveWhitespace()
  1715. p.unknown_symbol = Regex(r"\\[A-Za-z]+")("name")
  1716. p.font = csnames("font", self._fontnames)
  1717. p.start_group = Optional(r"\math" + oneOf(self._fontnames)("font")) + "{"
  1718. p.end_group = Literal("}")
  1719. p.delim = oneOf(self._delims)
  1720. # Mutually recursive definitions. (Minimizing the number of Forward
  1721. # elements is important for speed.)
  1722. p.auto_delim = Forward()
  1723. p.placeable = Forward()
  1724. p.required_group = Forward()
  1725. p.optional_group = Forward()
  1726. p.token = Forward()
  1727. set_names_and_parse_actions() # for mutually recursive definitions.
  1728. p.optional_group <<= "{" + ZeroOrMore(p.token)("group") + "}"
  1729. p.required_group <<= "{" + OneOrMore(p.token)("group") + "}"
  1730. p.customspace = cmd(r"\hspace", "{" + p.float_literal("space") + "}")
  1731. p.accent = (
  1732. csnames("accent", [*self._accent_map, *self._wide_accents])
  1733. - p.placeable("sym"))
  1734. p.function = csnames("name", self._function_names)
  1735. p.group = p.start_group + ZeroOrMore(p.token)("group") + p.end_group
  1736. p.unclosed_group = (p.start_group + ZeroOrMore(p.token)("group") + StringEnd())
  1737. p.frac = cmd(r"\frac", p.required_group("num") + p.required_group("den"))
  1738. p.dfrac = cmd(r"\dfrac", p.required_group("num") + p.required_group("den"))
  1739. p.binom = cmd(r"\binom", p.required_group("num") + p.required_group("den"))
  1740. p.genfrac = cmd(
  1741. r"\genfrac",
  1742. "{" + Optional(p.delim)("ldelim") + "}"
  1743. + "{" + Optional(p.delim)("rdelim") + "}"
  1744. + "{" + p.float_literal("rulesize") + "}"
  1745. + "{" + Optional(p.style_literal)("style") + "}"
  1746. + p.required_group("num")
  1747. + p.required_group("den"))
  1748. p.sqrt = cmd(
  1749. r"\sqrt{value}",
  1750. Optional("[" + OneOrMore(NotAny("]") + p.token)("root") + "]")
  1751. + p.required_group("value"))
  1752. p.overline = cmd(r"\overline", p.required_group("body"))
  1753. p.overset = cmd(
  1754. r"\overset",
  1755. p.optional_group("annotation") + p.optional_group("body"))
  1756. p.underset = cmd(
  1757. r"\underset",
  1758. p.optional_group("annotation") + p.optional_group("body"))
  1759. p.text = cmd(r"\text", QuotedString('{', '\\', endQuoteChar="}"))
  1760. p.substack = cmd(r"\substack",
  1761. nested_expr(opener="{", closer="}",
  1762. content=Group(OneOrMore(p.token)) +
  1763. ZeroOrMore(Literal("\\\\").suppress()))("parts"))
  1764. p.subsuper = (
  1765. (Optional(p.placeable)("nucleus")
  1766. + OneOrMore(oneOf(["_", "^"]) - p.placeable)("subsuper")
  1767. + Regex("'*")("apostrophes"))
  1768. | Regex("'+")("apostrophes")
  1769. | (p.placeable("nucleus") + Regex("'*")("apostrophes"))
  1770. )
  1771. p.simple = p.space | p.customspace | p.font | p.subsuper
  1772. p.token <<= (
  1773. p.simple
  1774. | p.auto_delim
  1775. | p.unclosed_group
  1776. | p.unknown_symbol # Must be last
  1777. )
  1778. p.operatorname = cmd(r"\operatorname", "{" + ZeroOrMore(p.simple)("name") + "}")
  1779. p.boldsymbol = cmd(
  1780. r"\boldsymbol", "{" + ZeroOrMore(p.simple)("value") + "}")
  1781. p.placeable <<= (
  1782. p.accent # Must be before symbol as all accents are symbols
  1783. | p.symbol # Must be second to catch all named symbols and single
  1784. # chars not in a group
  1785. | p.function
  1786. | p.operatorname
  1787. | p.group
  1788. | p.frac
  1789. | p.dfrac
  1790. | p.binom
  1791. | p.genfrac
  1792. | p.overset
  1793. | p.underset
  1794. | p.sqrt
  1795. | p.overline
  1796. | p.text
  1797. | p.boldsymbol
  1798. | p.substack
  1799. )
  1800. mdelim = r"\middle" - (p.delim("mdelim") | Error("Expected a delimiter"))
  1801. p.auto_delim <<= (
  1802. r"\left" - (p.delim("left") | Error("Expected a delimiter"))
  1803. + ZeroOrMore(p.simple | p.auto_delim | mdelim)("mid")
  1804. + r"\right" - (p.delim("right") | Error("Expected a delimiter"))
  1805. )
  1806. # Leaf definitions.
  1807. p.math = OneOrMore(p.token)
  1808. p.math_string = QuotedString('$', '\\', unquoteResults=False)
  1809. p.non_math = Regex(r"(?:(?:\\[$])|[^$])*").leaveWhitespace()
  1810. p.main = (
  1811. p.non_math + ZeroOrMore(p.math_string + p.non_math) + StringEnd()
  1812. )
  1813. set_names_and_parse_actions() # for leaf definitions.
  1814. self._expression = p.main
  1815. self._math_expression = p.math
  1816. # To add space to nucleus operators after sub/superscripts
  1817. self._in_subscript_or_superscript = False
  1818. def parse(self, s: str, fonts_object: Fonts, fontsize: float, dpi: float) -> Hlist:
  1819. """
  1820. Parse expression *s* using the given *fonts_object* for
  1821. output, at the given *fontsize* and *dpi*.
  1822. Returns the parse tree of `Node` instances.
  1823. """
  1824. self._state_stack = [
  1825. ParserState(fonts_object, 'default', 'rm', fontsize, dpi)]
  1826. self._em_width_cache: dict[tuple[str, float, float], float] = {}
  1827. try:
  1828. result = self._expression.parseString(s)
  1829. except ParseBaseException as err:
  1830. # explain becomes a plain method on pyparsing 3 (err.explain(0)).
  1831. raise ValueError("\n" + ParseException.explain(err, 0)) from None
  1832. self._state_stack = []
  1833. self._in_subscript_or_superscript = False
  1834. # prevent operator spacing from leaking into a new expression
  1835. self._em_width_cache = {}
  1836. ParserElement.resetCache()
  1837. return T.cast(Hlist, result[0]) # Known return type from main.
  1838. def get_state(self) -> ParserState:
  1839. """Get the current `State` of the parser."""
  1840. return self._state_stack[-1]
  1841. def pop_state(self) -> None:
  1842. """Pop a `State` off of the stack."""
  1843. self._state_stack.pop()
  1844. def push_state(self) -> None:
  1845. """Push a new `State` onto the stack, copying the current state."""
  1846. self._state_stack.append(self.get_state().copy())
  1847. def main(self, toks: ParseResults) -> list[Hlist]:
  1848. return [Hlist(toks.asList())]
  1849. def math_string(self, toks: ParseResults) -> ParseResults:
  1850. return self._math_expression.parseString(toks[0][1:-1], parseAll=True)
  1851. def math(self, toks: ParseResults) -> T.Any:
  1852. hlist = Hlist(toks.asList())
  1853. self.pop_state()
  1854. return [hlist]
  1855. def non_math(self, toks: ParseResults) -> T.Any:
  1856. s = toks[0].replace(r'\$', '$')
  1857. symbols = [Char(c, self.get_state()) for c in s]
  1858. hlist = Hlist(symbols)
  1859. # We're going into math now, so set font to 'it'
  1860. self.push_state()
  1861. self.get_state().font = mpl.rcParams['mathtext.default']
  1862. return [hlist]
  1863. float_literal = staticmethod(pyparsing_common.convertToFloat)
  1864. def text(self, toks: ParseResults) -> T.Any:
  1865. self.push_state()
  1866. state = self.get_state()
  1867. state.font = 'rm'
  1868. hlist = Hlist([Char(c, state) for c in toks[1]])
  1869. self.pop_state()
  1870. return [hlist]
  1871. def _make_space(self, percentage: float) -> Kern:
  1872. # In TeX, an em (the unit usually used to measure horizontal lengths)
  1873. # is not the width of the character 'm'; it is the same in different
  1874. # font styles (e.g. roman or italic). Mathtext, however, uses 'm' in
  1875. # the italic style so that horizontal spaces don't depend on the
  1876. # current font style.
  1877. state = self.get_state()
  1878. key = (state.font, state.fontsize, state.dpi)
  1879. width = self._em_width_cache.get(key)
  1880. if width is None:
  1881. metrics = state.fontset.get_metrics(
  1882. 'it', mpl.rcParams['mathtext.default'], 'm',
  1883. state.fontsize, state.dpi)
  1884. width = metrics.advance
  1885. self._em_width_cache[key] = width
  1886. return Kern(width * percentage)
  1887. _space_widths = {
  1888. r'\,': 0.16667, # 3/18 em = 3 mu
  1889. r'\thinspace': 0.16667, # 3/18 em = 3 mu
  1890. r'\/': 0.16667, # 3/18 em = 3 mu
  1891. r'\>': 0.22222, # 4/18 em = 4 mu
  1892. r'\:': 0.22222, # 4/18 em = 4 mu
  1893. r'\;': 0.27778, # 5/18 em = 5 mu
  1894. r'\ ': 0.33333, # 6/18 em = 6 mu
  1895. r'~': 0.33333, # 6/18 em = 6 mu, nonbreakable
  1896. r'\enspace': 0.5, # 9/18 em = 9 mu
  1897. r'\quad': 1, # 1 em = 18 mu
  1898. r'\qquad': 2, # 2 em = 36 mu
  1899. r'\!': -0.16667, # -3/18 em = -3 mu
  1900. }
  1901. def space(self, toks: ParseResults) -> T.Any:
  1902. num = self._space_widths[toks["space"]]
  1903. box = self._make_space(num)
  1904. return [box]
  1905. def customspace(self, toks: ParseResults) -> T.Any:
  1906. return [self._make_space(toks["space"])]
  1907. def symbol(self, s: str, loc: int,
  1908. toks: ParseResults | dict[str, str]) -> T.Any:
  1909. c = toks["sym"]
  1910. if c == "-":
  1911. # "U+2212 minus sign is the preferred representation of the unary
  1912. # and binary minus sign rather than the ASCII-derived U+002D
  1913. # hyphen-minus, because minus sign is unambiguous and because it
  1914. # is rendered with a more desirable length, usually longer than a
  1915. # hyphen." (https://www.unicode.org/reports/tr25/)
  1916. c = "\N{MINUS SIGN}"
  1917. try:
  1918. char = Char(c, self.get_state())
  1919. except ValueError as err:
  1920. raise ParseFatalException(s, loc,
  1921. "Unknown symbol: %s" % c) from err
  1922. if c in self._spaced_symbols:
  1923. # iterate until we find previous character, needed for cases
  1924. # such as ${ -2}$, $ -2$, or $ -2$.
  1925. prev_char = next((c for c in s[:loc][::-1] if c != ' '), '')
  1926. # Binary operators at start of string should not be spaced
  1927. # Also, operators in sub- or superscripts should not be spaced
  1928. if (self._in_subscript_or_superscript or (
  1929. c in self._binary_operators and (
  1930. len(s[:loc].split()) == 0 or prev_char == '{' or
  1931. prev_char in self._left_delims))):
  1932. return [char]
  1933. else:
  1934. return [Hlist([self._make_space(0.2),
  1935. char,
  1936. self._make_space(0.2)],
  1937. do_kern=True)]
  1938. elif c in self._punctuation_symbols:
  1939. prev_char = next((c for c in s[:loc][::-1] if c != ' '), '')
  1940. next_char = next((c for c in s[loc + 1:] if c != ' '), '')
  1941. # Do not space commas between brackets
  1942. if c == ',':
  1943. if prev_char == '{' and next_char == '}':
  1944. return [char]
  1945. # Do not space dots as decimal separators
  1946. if c == '.' and prev_char.isdigit() and next_char.isdigit():
  1947. return [char]
  1948. else:
  1949. return [Hlist([char, self._make_space(0.2)], do_kern=True)]
  1950. return [char]
  1951. def unknown_symbol(self, s: str, loc: int, toks: ParseResults) -> T.Any:
  1952. raise ParseFatalException(s, loc, f"Unknown symbol: {toks['name']}")
  1953. _accent_map = {
  1954. r'hat': r'\circumflexaccent',
  1955. r'breve': r'\combiningbreve',
  1956. r'bar': r'\combiningoverline',
  1957. r'grave': r'\combininggraveaccent',
  1958. r'acute': r'\combiningacuteaccent',
  1959. r'tilde': r'\combiningtilde',
  1960. r'dot': r'\combiningdotabove',
  1961. r'ddot': r'\combiningdiaeresis',
  1962. r'dddot': r'\combiningthreedotsabove',
  1963. r'ddddot': r'\combiningfourdotsabove',
  1964. r'vec': r'\combiningrightarrowabove',
  1965. r'"': r'\combiningdiaeresis',
  1966. r"`": r'\combininggraveaccent',
  1967. r"'": r'\combiningacuteaccent',
  1968. r'~': r'\combiningtilde',
  1969. r'.': r'\combiningdotabove',
  1970. r'^': r'\circumflexaccent',
  1971. r'overrightarrow': r'\rightarrow',
  1972. r'overleftarrow': r'\leftarrow',
  1973. r'mathring': r'\circ',
  1974. }
  1975. _wide_accents = set(r"widehat widetilde widebar".split())
  1976. def accent(self, toks: ParseResults) -> T.Any:
  1977. state = self.get_state()
  1978. thickness = state.get_current_underline_thickness()
  1979. accent = toks["accent"]
  1980. sym = toks["sym"]
  1981. accent_box: Node
  1982. if accent in self._wide_accents:
  1983. accent_box = AutoWidthChar(
  1984. '\\' + accent, sym.width, state, char_class=Accent)
  1985. else:
  1986. accent_box = Accent(self._accent_map[accent], state)
  1987. if accent == 'mathring':
  1988. accent_box.shrink()
  1989. accent_box.shrink()
  1990. centered = HCentered([Hbox(sym.width / 4.0), accent_box])
  1991. centered.hpack(sym.width, 'exactly')
  1992. return Vlist([
  1993. centered,
  1994. Vbox(0., thickness * 2.0),
  1995. Hlist([sym])
  1996. ])
  1997. def function(self, s: str, loc: int, toks: ParseResults) -> T.Any:
  1998. hlist = self.operatorname(s, loc, toks)
  1999. hlist.function_name = toks["name"]
  2000. return hlist
  2001. def operatorname(self, s: str, loc: int, toks: ParseResults) -> T.Any:
  2002. self.push_state()
  2003. state = self.get_state()
  2004. state.font = 'rm'
  2005. hlist_list: list[Node] = []
  2006. # Change the font of Chars, but leave Kerns alone
  2007. name = toks["name"]
  2008. for c in name:
  2009. if isinstance(c, Char):
  2010. c.font = 'rm'
  2011. c._update_metrics()
  2012. hlist_list.append(c)
  2013. elif isinstance(c, str):
  2014. hlist_list.append(Char(c, state))
  2015. else:
  2016. hlist_list.append(c)
  2017. next_char_loc = loc + len(name) + 1
  2018. if isinstance(name, ParseResults):
  2019. next_char_loc += len('operatorname{}')
  2020. next_char = next((c for c in s[next_char_loc:] if c != ' '), '')
  2021. delimiters = self._delims | {'^', '_'}
  2022. if (next_char not in delimiters and
  2023. name not in self._overunder_functions):
  2024. # Add thin space except when followed by parenthesis, bracket, etc.
  2025. hlist_list += [self._make_space(self._space_widths[r'\,'])]
  2026. self.pop_state()
  2027. # if followed by a super/subscript, set flag to true
  2028. # This flag tells subsuper to add space after this operator
  2029. if next_char in {'^', '_'}:
  2030. self._in_subscript_or_superscript = True
  2031. else:
  2032. self._in_subscript_or_superscript = False
  2033. return Hlist(hlist_list)
  2034. def start_group(self, toks: ParseResults) -> T.Any:
  2035. self.push_state()
  2036. # Deal with LaTeX-style font tokens
  2037. if toks.get("font"):
  2038. self.get_state().font = toks.get("font")
  2039. return []
  2040. def group(self, toks: ParseResults) -> T.Any:
  2041. grp = Hlist(toks.get("group", []))
  2042. return [grp]
  2043. def required_group(self, toks: ParseResults) -> T.Any:
  2044. return Hlist(toks.get("group", []))
  2045. optional_group = required_group
  2046. def end_group(self) -> T.Any:
  2047. self.pop_state()
  2048. return []
  2049. def unclosed_group(self, s: str, loc: int, toks: ParseResults) -> T.Any:
  2050. raise ParseFatalException(s, len(s), "Expected '}'")
  2051. def font(self, toks: ParseResults) -> T.Any:
  2052. self.get_state().font = toks["font"]
  2053. return []
  2054. def is_overunder(self, nucleus: Node) -> bool:
  2055. if isinstance(nucleus, Char):
  2056. return nucleus.c in self._overunder_symbols
  2057. elif isinstance(nucleus, Hlist) and hasattr(nucleus, 'function_name'):
  2058. return nucleus.function_name in self._overunder_functions
  2059. return False
  2060. def is_dropsub(self, nucleus: Node) -> bool:
  2061. if isinstance(nucleus, Char):
  2062. return nucleus.c in self._dropsub_symbols
  2063. return False
  2064. def is_slanted(self, nucleus: Node) -> bool:
  2065. if isinstance(nucleus, Char):
  2066. return nucleus.is_slanted()
  2067. return False
  2068. def subsuper(self, s: str, loc: int, toks: ParseResults) -> T.Any:
  2069. nucleus = toks.get("nucleus", Hbox(0))
  2070. subsuper = toks.get("subsuper", [])
  2071. napostrophes = len(toks.get("apostrophes", []))
  2072. if not subsuper and not napostrophes:
  2073. return nucleus
  2074. sub = super = None
  2075. while subsuper:
  2076. op, arg, *subsuper = subsuper
  2077. if op == '_':
  2078. if sub is not None:
  2079. raise ParseFatalException("Double subscript")
  2080. sub = arg
  2081. else:
  2082. if super is not None:
  2083. raise ParseFatalException("Double superscript")
  2084. super = arg
  2085. state = self.get_state()
  2086. rule_thickness = state.fontset.get_underline_thickness(
  2087. state.font, state.fontsize, state.dpi)
  2088. xHeight = state.fontset.get_xheight(
  2089. state.font, state.fontsize, state.dpi)
  2090. if napostrophes:
  2091. if super is None:
  2092. super = Hlist([])
  2093. for i in range(napostrophes):
  2094. super.children.extend(self.symbol(s, loc, {"sym": "\\prime"}))
  2095. # kern() and hpack() needed to get the metrics right after
  2096. # extending
  2097. super.kern()
  2098. super.hpack()
  2099. # Handle over/under symbols, such as sum or prod
  2100. if self.is_overunder(nucleus):
  2101. vlist = []
  2102. shift = 0.
  2103. width = nucleus.width
  2104. if super is not None:
  2105. super.shrink()
  2106. width = max(width, super.width)
  2107. if sub is not None:
  2108. sub.shrink()
  2109. width = max(width, sub.width)
  2110. vgap = rule_thickness * 3.0
  2111. if super is not None:
  2112. hlist = HCentered([super])
  2113. hlist.hpack(width, 'exactly')
  2114. vlist.extend([hlist, Vbox(0, vgap)])
  2115. hlist = HCentered([nucleus])
  2116. hlist.hpack(width, 'exactly')
  2117. vlist.append(hlist)
  2118. if sub is not None:
  2119. hlist = HCentered([sub])
  2120. hlist.hpack(width, 'exactly')
  2121. vlist.extend([Vbox(0, vgap), hlist])
  2122. shift = hlist.height + vgap + nucleus.depth
  2123. vlt = Vlist(vlist)
  2124. vlt.shift_amount = shift
  2125. result = Hlist([vlt])
  2126. return [result]
  2127. # We remove kerning on the last character for consistency (otherwise
  2128. # it will compute kerning based on non-shrunk characters and may put
  2129. # them too close together when superscripted)
  2130. # We change the width of the last character to match the advance to
  2131. # consider some fonts with weird metrics: e.g. stix's f has a width of
  2132. # 7.75 and a kerning of -4.0 for an advance of 3.72, and we want to put
  2133. # the superscript at the advance
  2134. last_char = nucleus
  2135. if isinstance(nucleus, Hlist):
  2136. new_children = nucleus.children
  2137. if len(new_children):
  2138. # remove last kern
  2139. if (isinstance(new_children[-1], Kern) and
  2140. hasattr(new_children[-2], '_metrics')):
  2141. new_children = new_children[:-1]
  2142. last_char = new_children[-1]
  2143. if hasattr(last_char, '_metrics'):
  2144. last_char.width = last_char._metrics.advance
  2145. # create new Hlist without kerning
  2146. nucleus = Hlist(new_children, do_kern=False)
  2147. else:
  2148. if isinstance(nucleus, Char):
  2149. last_char.width = last_char._metrics.advance
  2150. nucleus = Hlist([nucleus])
  2151. # Handle regular sub/superscripts
  2152. constants = _get_font_constant_set(state)
  2153. lc_height = last_char.height
  2154. lc_baseline = 0
  2155. if self.is_dropsub(last_char):
  2156. lc_baseline = last_char.depth
  2157. # Compute kerning for sub and super
  2158. superkern = constants.delta * xHeight
  2159. subkern = constants.delta * xHeight
  2160. if self.is_slanted(last_char):
  2161. superkern += constants.delta * xHeight
  2162. superkern += (constants.delta_slanted *
  2163. (lc_height - xHeight * 2. / 3.))
  2164. if self.is_dropsub(last_char):
  2165. subkern = (3 * constants.delta -
  2166. constants.delta_integral) * lc_height
  2167. superkern = (3 * constants.delta +
  2168. constants.delta_integral) * lc_height
  2169. else:
  2170. subkern = 0
  2171. x: List
  2172. if super is None:
  2173. # node757
  2174. # Note: One of super or sub must be a Node if we're in this function, but
  2175. # mypy can't know this, since it can't interpret pyparsing expressions,
  2176. # hence the cast.
  2177. x = Hlist([Kern(subkern), T.cast(Node, sub)])
  2178. x.shrink()
  2179. if self.is_dropsub(last_char):
  2180. shift_down = lc_baseline + constants.subdrop * xHeight
  2181. else:
  2182. shift_down = constants.sub1 * xHeight
  2183. x.shift_amount = shift_down
  2184. else:
  2185. x = Hlist([Kern(superkern), super])
  2186. x.shrink()
  2187. if self.is_dropsub(last_char):
  2188. shift_up = lc_height - constants.subdrop * xHeight
  2189. else:
  2190. shift_up = constants.sup1 * xHeight
  2191. if sub is None:
  2192. x.shift_amount = -shift_up
  2193. else: # Both sub and superscript
  2194. y = Hlist([Kern(subkern), sub])
  2195. y.shrink()
  2196. if self.is_dropsub(last_char):
  2197. shift_down = lc_baseline + constants.subdrop * xHeight
  2198. else:
  2199. shift_down = constants.sub2 * xHeight
  2200. # If sub and superscript collide, move super up
  2201. clr = (2.0 * rule_thickness -
  2202. ((shift_up - x.depth) - (y.height - shift_down)))
  2203. if clr > 0.:
  2204. shift_up += clr
  2205. x = Vlist([
  2206. x,
  2207. Kern((shift_up - x.depth) - (y.height - shift_down)),
  2208. y])
  2209. x.shift_amount = shift_down
  2210. if not self.is_dropsub(last_char):
  2211. x.width += constants.script_space * xHeight
  2212. # Do we need to add a space after the nucleus?
  2213. # To find out, check the flag set by operatorname
  2214. spaced_nucleus = [nucleus, x]
  2215. if self._in_subscript_or_superscript:
  2216. spaced_nucleus += [self._make_space(self._space_widths[r'\,'])]
  2217. self._in_subscript_or_superscript = False
  2218. result = Hlist(spaced_nucleus)
  2219. return [result]
  2220. def _genfrac(self, ldelim: str, rdelim: str, rule: float | None, style: _MathStyle,
  2221. num: Hlist, den: Hlist) -> T.Any:
  2222. state = self.get_state()
  2223. thickness = state.get_current_underline_thickness()
  2224. for _ in range(style.value):
  2225. num.shrink()
  2226. den.shrink()
  2227. cnum = HCentered([num])
  2228. cden = HCentered([den])
  2229. width = max(num.width, den.width)
  2230. cnum.hpack(width, 'exactly')
  2231. cden.hpack(width, 'exactly')
  2232. vlist = Vlist([cnum, # numerator
  2233. Vbox(0, thickness * 2.0), # space
  2234. Hrule(state, rule), # rule
  2235. Vbox(0, thickness * 2.0), # space
  2236. cden # denominator
  2237. ])
  2238. # Shift so the fraction line sits in the middle of the
  2239. # equals sign
  2240. metrics = state.fontset.get_metrics(
  2241. state.font, mpl.rcParams['mathtext.default'],
  2242. '=', state.fontsize, state.dpi)
  2243. shift = (cden.height -
  2244. ((metrics.ymax + metrics.ymin) / 2 -
  2245. thickness * 3.0))
  2246. vlist.shift_amount = shift
  2247. result = [Hlist([vlist, Hbox(thickness * 2.)])]
  2248. if ldelim or rdelim:
  2249. if ldelim == '':
  2250. ldelim = '.'
  2251. if rdelim == '':
  2252. rdelim = '.'
  2253. return self._auto_sized_delimiter(ldelim,
  2254. T.cast(list[T.Union[Box, Char, str]],
  2255. result),
  2256. rdelim)
  2257. return result
  2258. def style_literal(self, toks: ParseResults) -> T.Any:
  2259. return self._MathStyle(int(toks["style_literal"]))
  2260. def genfrac(self, toks: ParseResults) -> T.Any:
  2261. return self._genfrac(
  2262. toks.get("ldelim", ""), toks.get("rdelim", ""),
  2263. toks["rulesize"], toks.get("style", self._MathStyle.TEXTSTYLE),
  2264. toks["num"], toks["den"])
  2265. def frac(self, toks: ParseResults) -> T.Any:
  2266. return self._genfrac(
  2267. "", "", self.get_state().get_current_underline_thickness(),
  2268. self._MathStyle.TEXTSTYLE, toks["num"], toks["den"])
  2269. def dfrac(self, toks: ParseResults) -> T.Any:
  2270. return self._genfrac(
  2271. "", "", self.get_state().get_current_underline_thickness(),
  2272. self._MathStyle.DISPLAYSTYLE, toks["num"], toks["den"])
  2273. def binom(self, toks: ParseResults) -> T.Any:
  2274. return self._genfrac(
  2275. "(", ")", 0,
  2276. self._MathStyle.TEXTSTYLE, toks["num"], toks["den"])
  2277. def _genset(self, s: str, loc: int, toks: ParseResults) -> T.Any:
  2278. annotation = toks["annotation"]
  2279. body = toks["body"]
  2280. thickness = self.get_state().get_current_underline_thickness()
  2281. annotation.shrink()
  2282. cannotation = HCentered([annotation])
  2283. cbody = HCentered([body])
  2284. width = max(cannotation.width, cbody.width)
  2285. cannotation.hpack(width, 'exactly')
  2286. cbody.hpack(width, 'exactly')
  2287. vgap = thickness * 3
  2288. if s[loc + 1] == "u": # \underset
  2289. vlist = Vlist([cbody, # body
  2290. Vbox(0, vgap), # space
  2291. cannotation # annotation
  2292. ])
  2293. # Shift so the body sits in the same vertical position
  2294. vlist.shift_amount = cbody.depth + cannotation.height + vgap
  2295. else: # \overset
  2296. vlist = Vlist([cannotation, # annotation
  2297. Vbox(0, vgap), # space
  2298. cbody # body
  2299. ])
  2300. # To add horizontal gap between symbols: wrap the Vlist into
  2301. # an Hlist and extend it with an Hbox(0, horizontal_gap)
  2302. return vlist
  2303. overset = underset = _genset
  2304. def sqrt(self, toks: ParseResults) -> T.Any:
  2305. root = toks.get("root")
  2306. body = toks["value"]
  2307. state = self.get_state()
  2308. thickness = state.get_current_underline_thickness()
  2309. # Determine the height of the body, and add a little extra to
  2310. # the height so it doesn't seem cramped
  2311. height = body.height - body.shift_amount + thickness * 5.0
  2312. depth = body.depth + body.shift_amount
  2313. check = AutoHeightChar(r'\__sqrt__', height, depth, state, always=True)
  2314. height = check.height - check.shift_amount
  2315. depth = check.depth + check.shift_amount
  2316. # Put a little extra space to the left and right of the body
  2317. padded_body = Hlist([Hbox(2 * thickness), body, Hbox(2 * thickness)])
  2318. rightside = Vlist([Hrule(state), Glue('fill'), padded_body])
  2319. # Stretch the glue between the hrule and the body
  2320. rightside.vpack(height + (state.fontsize * state.dpi) / (100.0 * 12.0),
  2321. 'exactly', depth)
  2322. # Add the root and shift it upward so it is above the tick.
  2323. # The value of 0.6 is a hard-coded hack ;)
  2324. if not root:
  2325. root = Box(check.width * 0.5, 0., 0.)
  2326. else:
  2327. root = Hlist(root)
  2328. root.shrink()
  2329. root.shrink()
  2330. root_vlist = Vlist([Hlist([root])])
  2331. root_vlist.shift_amount = -height * 0.6
  2332. hlist = Hlist([root_vlist, # Root
  2333. # Negative kerning to put root over tick
  2334. Kern(-check.width * 0.5),
  2335. check, # Check
  2336. rightside]) # Body
  2337. return [hlist]
  2338. def overline(self, toks: ParseResults) -> T.Any:
  2339. body = toks["body"]
  2340. state = self.get_state()
  2341. thickness = state.get_current_underline_thickness()
  2342. height = body.height - body.shift_amount + thickness * 3.0
  2343. depth = body.depth + body.shift_amount
  2344. # Place overline above body
  2345. rightside = Vlist([Hrule(state), Glue('fill'), Hlist([body])])
  2346. # Stretch the glue between the hrule and the body
  2347. rightside.vpack(height + (state.fontsize * state.dpi) / (100.0 * 12.0),
  2348. 'exactly', depth)
  2349. hlist = Hlist([rightside])
  2350. return [hlist]
  2351. def _auto_sized_delimiter(self, front: str,
  2352. middle: list[Box | Char | str],
  2353. back: str) -> T.Any:
  2354. state = self.get_state()
  2355. if len(middle):
  2356. height = max([x.height for x in middle if not isinstance(x, str)])
  2357. depth = max([x.depth for x in middle if not isinstance(x, str)])
  2358. factor = None
  2359. for idx, el in enumerate(middle):
  2360. if isinstance(el, str) and el == '\\middle':
  2361. c = T.cast(str, middle[idx + 1]) # Should be one of p.delims.
  2362. if c != '.':
  2363. middle[idx + 1] = AutoHeightChar(
  2364. c, height, depth, state, factor=factor)
  2365. else:
  2366. middle.remove(c)
  2367. del middle[idx]
  2368. # There should only be \middle and its delimiter as str, which have
  2369. # just been removed.
  2370. middle_part = T.cast(list[T.Union[Box, Char]], middle)
  2371. else:
  2372. height = 0
  2373. depth = 0
  2374. factor = 1.0
  2375. middle_part = []
  2376. parts: list[Node] = []
  2377. # \left. and \right. aren't supposed to produce any symbols
  2378. if front != '.':
  2379. parts.append(
  2380. AutoHeightChar(front, height, depth, state, factor=factor))
  2381. parts.extend(middle_part)
  2382. if back != '.':
  2383. parts.append(
  2384. AutoHeightChar(back, height, depth, state, factor=factor))
  2385. hlist = Hlist(parts)
  2386. return hlist
  2387. def auto_delim(self, toks: ParseResults) -> T.Any:
  2388. return self._auto_sized_delimiter(
  2389. toks["left"],
  2390. # if "mid" in toks ... can be removed when requiring pyparsing 3.
  2391. toks["mid"].asList() if "mid" in toks else [],
  2392. toks["right"])
  2393. def boldsymbol(self, toks: ParseResults) -> T.Any:
  2394. self.push_state()
  2395. state = self.get_state()
  2396. hlist: list[Node] = []
  2397. name = toks["value"]
  2398. for c in name:
  2399. if isinstance(c, Hlist):
  2400. k = c.children[1]
  2401. if isinstance(k, Char):
  2402. k.font = "bf"
  2403. k._update_metrics()
  2404. hlist.append(c)
  2405. elif isinstance(c, Char):
  2406. c.font = "bf"
  2407. if (c.c in self._latin_alphabets or
  2408. c.c[1:] in self._small_greek):
  2409. c.font = "bfit"
  2410. c._update_metrics()
  2411. c._update_metrics()
  2412. hlist.append(c)
  2413. else:
  2414. hlist.append(c)
  2415. self.pop_state()
  2416. return Hlist(hlist)
  2417. def substack(self, toks: ParseResults) -> T.Any:
  2418. parts = toks["parts"]
  2419. state = self.get_state()
  2420. thickness = state.get_current_underline_thickness()
  2421. hlist = [Hlist(k) for k in parts[0]]
  2422. max_width = max(map(lambda c: c.width, hlist))
  2423. vlist = []
  2424. for sub in hlist:
  2425. cp = HCentered([sub])
  2426. cp.hpack(max_width, 'exactly')
  2427. vlist.append(cp)
  2428. stack = [val
  2429. for pair in zip(vlist, [Vbox(0, thickness * 2)] * len(vlist))
  2430. for val in pair]
  2431. del stack[-1]
  2432. vlt = Vlist(stack)
  2433. result = [Hlist([vlt])]
  2434. return result