tfmLib.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. """Module for reading TFM (TeX Font Metrics) files.
  2. The TFM format is described in the TFtoPL WEB source code, whose typeset form
  3. can be found on `CTAN <http://mirrors.ctan.org/info/knuth-pdf/texware/tftopl.pdf>`_.
  4. >>> from fontTools.tfmLib import TFM
  5. >>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm")
  6. >>>
  7. >>> # Accessing an attribute gets you metadata.
  8. >>> tfm.checksum
  9. 1274110073
  10. >>> tfm.designsize
  11. 10.0
  12. >>> tfm.codingscheme
  13. 'TeX text'
  14. >>> tfm.family
  15. 'CMR'
  16. >>> tfm.seven_bit_safe_flag
  17. False
  18. >>> tfm.face
  19. 234
  20. >>> tfm.extraheader
  21. {}
  22. >>> tfm.fontdimens
  23. {'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578}
  24. >>> # Accessing a character gets you its metrics.
  25. >>> # “width” is always available, other metrics are available only when
  26. >>> # applicable. All values are relative to “designsize”.
  27. >>> tfm.chars[ord("g")]
  28. {'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219}
  29. >>> # Kerning and ligature can be accessed as well.
  30. >>> tfm.kerning[ord("c")]
  31. {104: -0.02777862548828125, 107: -0.02777862548828125}
  32. >>> tfm.ligatures[ord("f")]
  33. {105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)}
  34. """
  35. from types import SimpleNamespace
  36. from fontTools.misc.sstruct import calcsize, unpack, unpack2
  37. SIZES_FORMAT = """
  38. >
  39. lf: h # length of the entire file, in words
  40. lh: h # length of the header data, in words
  41. bc: h # smallest character code in the font
  42. ec: h # largest character code in the font
  43. nw: h # number of words in the width table
  44. nh: h # number of words in the height table
  45. nd: h # number of words in the depth table
  46. ni: h # number of words in the italic correction table
  47. nl: h # number of words in the ligature/kern table
  48. nk: h # number of words in the kern table
  49. ne: h # number of words in the extensible character table
  50. np: h # number of font parameter words
  51. """
  52. SIZES_SIZE = calcsize(SIZES_FORMAT)
  53. FIXED_FORMAT = "12.20F"
  54. HEADER_FORMAT1 = f"""
  55. >
  56. checksum: L
  57. designsize: {FIXED_FORMAT}
  58. """
  59. HEADER_FORMAT2 = f"""
  60. {HEADER_FORMAT1}
  61. codingscheme: 40p
  62. """
  63. HEADER_FORMAT3 = f"""
  64. {HEADER_FORMAT2}
  65. family: 20p
  66. """
  67. HEADER_FORMAT4 = f"""
  68. {HEADER_FORMAT3}
  69. seven_bit_safe_flag: ?
  70. ignored: x
  71. ignored: x
  72. face: B
  73. """
  74. HEADER_SIZE1 = calcsize(HEADER_FORMAT1)
  75. HEADER_SIZE2 = calcsize(HEADER_FORMAT2)
  76. HEADER_SIZE3 = calcsize(HEADER_FORMAT3)
  77. HEADER_SIZE4 = calcsize(HEADER_FORMAT4)
  78. LIG_KERN_COMMAND = """
  79. >
  80. skip_byte: B
  81. next_char: B
  82. op_byte: B
  83. remainder: B
  84. """
  85. BASE_PARAMS = [
  86. "SLANT",
  87. "SPACE",
  88. "STRETCH",
  89. "SHRINK",
  90. "XHEIGHT",
  91. "QUAD",
  92. "EXTRASPACE",
  93. ]
  94. MATHSY_PARAMS = [
  95. "NUM1",
  96. "NUM2",
  97. "NUM3",
  98. "DENOM1",
  99. "DENOM2",
  100. "SUP1",
  101. "SUP2",
  102. "SUP3",
  103. "SUB1",
  104. "SUB2",
  105. "SUPDROP",
  106. "SUBDROP",
  107. "DELIM1",
  108. "DELIM2",
  109. "AXISHEIGHT",
  110. ]
  111. MATHEX_PARAMS = [
  112. "DEFAULTRULETHICKNESS",
  113. "BIGOPSPACING1",
  114. "BIGOPSPACING2",
  115. "BIGOPSPACING3",
  116. "BIGOPSPACING4",
  117. "BIGOPSPACING5",
  118. ]
  119. VANILLA = 0
  120. MATHSY = 1
  121. MATHEX = 2
  122. UNREACHABLE = 0
  123. PASSTHROUGH = 1
  124. ACCESSABLE = 2
  125. NO_TAG = 0
  126. LIG_TAG = 1
  127. LIST_TAG = 2
  128. EXT_TAG = 3
  129. STOP_FLAG = 128
  130. KERN_FLAG = 128
  131. class TFMException(Exception):
  132. def __init__(self, message):
  133. super().__init__(message)
  134. class TFM:
  135. def __init__(self, file):
  136. self._read(file)
  137. def __repr__(self):
  138. return (
  139. f"<TFM"
  140. f" for {self.family}"
  141. f" in {self.codingscheme}"
  142. f" at {self.designsize:g}pt>"
  143. )
  144. def _read(self, file):
  145. if hasattr(file, "read"):
  146. data = file.read()
  147. else:
  148. with open(file, "rb") as fp:
  149. data = fp.read()
  150. self._data = data
  151. if len(data) < SIZES_SIZE:
  152. raise TFMException("Too short input file")
  153. sizes = SimpleNamespace()
  154. unpack2(SIZES_FORMAT, data, sizes)
  155. # Do some file structure sanity checks.
  156. # TeX and TFtoPL do additional functional checks and might even correct
  157. # “errors” in the input file, but we instead try to output the file as
  158. # it is as long as it is parsable, even if the data make no sense.
  159. if sizes.lf < 0:
  160. raise TFMException("The file claims to have negative or zero length!")
  161. if len(data) < sizes.lf * 4:
  162. raise TFMException("The file has fewer bytes than it claims!")
  163. for name, length in vars(sizes).items():
  164. if length < 0:
  165. raise TFMException("The subfile size: '{name}' is negative!")
  166. if sizes.lh < 2:
  167. raise TFMException(f"The header length is only {sizes.lh}!")
  168. if sizes.bc > sizes.ec + 1 or sizes.ec > 255:
  169. raise TFMException(
  170. f"The character code range {sizes.bc}..{sizes.ec} is illegal!"
  171. )
  172. if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0:
  173. raise TFMException("Incomplete subfiles for character dimensions!")
  174. if sizes.ne > 256:
  175. raise TFMException(f"There are {ne} extensible recipes!")
  176. if sizes.lf != (
  177. 6
  178. + sizes.lh
  179. + (sizes.ec - sizes.bc + 1)
  180. + sizes.nw
  181. + sizes.nh
  182. + sizes.nd
  183. + sizes.ni
  184. + sizes.nl
  185. + sizes.nk
  186. + sizes.ne
  187. + sizes.np
  188. ):
  189. raise TFMException("Subfile sizes don’t add up to the stated total")
  190. # Subfile offsets, used in the helper function below. These all are
  191. # 32-bit word offsets not 8-bit byte offsets.
  192. char_base = 6 + sizes.lh - sizes.bc
  193. width_base = char_base + sizes.ec + 1
  194. height_base = width_base + sizes.nw
  195. depth_base = height_base + sizes.nh
  196. italic_base = depth_base + sizes.nd
  197. lig_kern_base = italic_base + sizes.ni
  198. kern_base = lig_kern_base + sizes.nl
  199. exten_base = kern_base + sizes.nk
  200. param_base = exten_base + sizes.ne
  201. # Helper functions for accessing individual data. If this looks
  202. # nonidiomatic Python, I blame the effect of reading the literate WEB
  203. # documentation of TFtoPL.
  204. def char_info(c):
  205. return 4 * (char_base + c)
  206. def width_index(c):
  207. return data[char_info(c)]
  208. def noneexistent(c):
  209. return c < sizes.bc or c > sizes.ec or width_index(c) == 0
  210. def height_index(c):
  211. return data[char_info(c) + 1] // 16
  212. def depth_index(c):
  213. return data[char_info(c) + 1] % 16
  214. def italic_index(c):
  215. return data[char_info(c) + 2] // 4
  216. def tag(c):
  217. return data[char_info(c) + 2] % 4
  218. def remainder(c):
  219. return data[char_info(c) + 3]
  220. def width(c):
  221. r = 4 * (width_base + width_index(c))
  222. return read_fixed(r, "v")["v"]
  223. def height(c):
  224. r = 4 * (height_base + height_index(c))
  225. return read_fixed(r, "v")["v"]
  226. def depth(c):
  227. r = 4 * (depth_base + depth_index(c))
  228. return read_fixed(r, "v")["v"]
  229. def italic(c):
  230. r = 4 * (italic_base + italic_index(c))
  231. return read_fixed(r, "v")["v"]
  232. def exten(c):
  233. return 4 * (exten_base + remainder(c))
  234. def lig_step(i):
  235. return 4 * (lig_kern_base + i)
  236. def lig_kern_command(i):
  237. command = SimpleNamespace()
  238. unpack2(LIG_KERN_COMMAND, data[i:], command)
  239. return command
  240. def kern(i):
  241. r = 4 * (kern_base + i)
  242. return read_fixed(r, "v")["v"]
  243. def param(i):
  244. return 4 * (param_base + i)
  245. def read_fixed(index, key, obj=None):
  246. ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj)
  247. return ret[0]
  248. # Set all attributes to empty values regardless of the header size.
  249. unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self)
  250. offset = 24
  251. length = sizes.lh * 4
  252. self.extraheader = {}
  253. if length >= HEADER_SIZE4:
  254. rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1]
  255. if self.face < 18:
  256. s = self.face % 2
  257. b = self.face // 2
  258. self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3]
  259. for i in range(sizes.lh - HEADER_SIZE4 // 4):
  260. rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1]
  261. elif length >= HEADER_SIZE3:
  262. unpack2(HEADER_FORMAT3, data[offset:], self)
  263. elif length >= HEADER_SIZE2:
  264. unpack2(HEADER_FORMAT2, data[offset:], self)
  265. elif length >= HEADER_SIZE1:
  266. unpack2(HEADER_FORMAT1, data[offset:], self)
  267. self.fonttype = VANILLA
  268. scheme = self.codingscheme.upper()
  269. if scheme.startswith("TEX MATH SY"):
  270. self.fonttype = MATHSY
  271. elif scheme.startswith("TEX MATH EX"):
  272. self.fonttype = MATHEX
  273. self.fontdimens = {}
  274. for i in range(sizes.np):
  275. name = f"PARAMETER{i+1}"
  276. if i <= 6:
  277. name = BASE_PARAMS[i]
  278. elif self.fonttype == MATHSY and i <= 21:
  279. name = MATHSY_PARAMS[i - 7]
  280. elif self.fonttype == MATHEX and i <= 12:
  281. name = MATHEX_PARAMS[i - 7]
  282. read_fixed(param(i), name, self.fontdimens)
  283. lig_kern_map = {}
  284. self.right_boundary_char = None
  285. self.left_boundary_char = None
  286. if sizes.nl > 0:
  287. cmd = lig_kern_command(lig_step(0))
  288. if cmd.skip_byte == 255:
  289. self.right_boundary_char = cmd.next_char
  290. cmd = lig_kern_command(lig_step((sizes.nl - 1)))
  291. if cmd.skip_byte == 255:
  292. self.left_boundary_char = 256
  293. r = 256 * cmd.op_byte + cmd.remainder
  294. lig_kern_map[self.left_boundary_char] = r
  295. self.chars = {}
  296. for c in range(sizes.bc, sizes.ec + 1):
  297. if width_index(c) > 0:
  298. self.chars[c] = info = {}
  299. info["width"] = width(c)
  300. if height_index(c) > 0:
  301. info["height"] = height(c)
  302. if depth_index(c) > 0:
  303. info["depth"] = depth(c)
  304. if italic_index(c) > 0:
  305. info["italic"] = italic(c)
  306. char_tag = tag(c)
  307. if char_tag == NO_TAG:
  308. pass
  309. elif char_tag == LIG_TAG:
  310. lig_kern_map[c] = remainder(c)
  311. elif char_tag == LIST_TAG:
  312. info["nextlarger"] = remainder(c)
  313. elif char_tag == EXT_TAG:
  314. info["varchar"] = varchar = {}
  315. for i in range(4):
  316. part = data[exten(c) + i]
  317. if i == 3 or part > 0:
  318. name = "rep"
  319. if i == 0:
  320. name = "top"
  321. elif i == 1:
  322. name = "mid"
  323. elif i == 2:
  324. name = "bot"
  325. if noneexistent(part):
  326. varchar[name] = c
  327. else:
  328. varchar[name] = part
  329. self.ligatures = {}
  330. self.kerning = {}
  331. for c, i in sorted(lig_kern_map.items()):
  332. cmd = lig_kern_command(lig_step(i))
  333. if cmd.skip_byte > STOP_FLAG:
  334. i = 256 * cmd.op_byte + cmd.remainder
  335. while i < sizes.nl:
  336. cmd = lig_kern_command(lig_step(i))
  337. if cmd.skip_byte > STOP_FLAG:
  338. pass
  339. else:
  340. if cmd.op_byte >= KERN_FLAG:
  341. r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder
  342. self.kerning.setdefault(c, {})[cmd.next_char] = kern(r)
  343. else:
  344. r = cmd.op_byte
  345. if r == 4 or (r > 7 and r != 11):
  346. # Ligature step with nonstandard code, we output
  347. # the code verbatim.
  348. lig = r
  349. else:
  350. lig = ""
  351. if r % 4 > 1:
  352. lig += "/"
  353. lig += "LIG"
  354. if r % 2 != 0:
  355. lig += "/"
  356. while r > 3:
  357. lig += ">"
  358. r -= 4
  359. self.ligatures.setdefault(c, {})[cmd.next_char] = (
  360. lig,
  361. cmd.remainder,
  362. )
  363. if cmd.skip_byte >= STOP_FLAG:
  364. break
  365. i += cmd.skip_byte + 1
  366. if __name__ == "__main__":
  367. import sys
  368. tfm = TFM(sys.argv[1])
  369. print(
  370. "\n".join(
  371. x
  372. for x in [
  373. f"tfm.checksum={tfm.checksum}",
  374. f"tfm.designsize={tfm.designsize}",
  375. f"tfm.codingscheme={tfm.codingscheme}",
  376. f"tfm.fonttype={tfm.fonttype}",
  377. f"tfm.family={tfm.family}",
  378. f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}",
  379. f"tfm.face={tfm.face}",
  380. f"tfm.extraheader={tfm.extraheader}",
  381. f"tfm.fontdimens={tfm.fontdimens}",
  382. f"tfm.right_boundary_char={tfm.right_boundary_char}",
  383. f"tfm.left_boundary_char={tfm.left_boundary_char}",
  384. f"tfm.kerning={tfm.kerning}",
  385. f"tfm.ligatures={tfm.ligatures}",
  386. f"tfm.chars={tfm.chars}",
  387. ]
  388. )
  389. )
  390. print(tfm)