psLib.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. from fontTools.misc.textTools import bytechr, byteord, bytesjoin, tobytes, tostr
  2. from fontTools.misc import eexec
  3. from .psOperators import (
  4. PSOperators,
  5. ps_StandardEncoding,
  6. ps_array,
  7. ps_boolean,
  8. ps_dict,
  9. ps_integer,
  10. ps_literal,
  11. ps_mark,
  12. ps_name,
  13. ps_operator,
  14. ps_procedure,
  15. ps_procmark,
  16. ps_real,
  17. ps_string,
  18. )
  19. import re
  20. from collections.abc import Callable
  21. from string import whitespace
  22. import logging
  23. log = logging.getLogger(__name__)
  24. ps_special = b"()<>[]{}%" # / is one too, but we take care of that one differently
  25. skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"]))
  26. endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"])
  27. endofthingRE = re.compile(endofthingPat)
  28. commentRE = re.compile(b"%[^\n\r]*")
  29. # XXX This not entirely correct as it doesn't allow *nested* embedded parens:
  30. stringPat = rb"""
  31. \(
  32. (
  33. (
  34. [^()]* \ [()]
  35. )
  36. |
  37. (
  38. [^()]* \( [^()]* \)
  39. )
  40. )*
  41. [^()]*
  42. \)
  43. """
  44. stringPat = b"".join(stringPat.split())
  45. stringRE = re.compile(stringPat)
  46. hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"]))
  47. class PSTokenError(Exception):
  48. pass
  49. class PSError(Exception):
  50. pass
  51. class PSTokenizer(object):
  52. def __init__(self, buf=b"", encoding="ascii"):
  53. # Force self.buf to be a byte string
  54. buf = tobytes(buf)
  55. self.buf = buf
  56. self.len = len(buf)
  57. self.pos = 0
  58. self.closed = False
  59. self.encoding = encoding
  60. def read(self, n=-1):
  61. """Read at most 'n' bytes from the buffer, or less if the read
  62. hits EOF before obtaining 'n' bytes.
  63. If 'n' is negative or omitted, read all data until EOF is reached.
  64. """
  65. if self.closed:
  66. raise ValueError("I/O operation on closed file")
  67. if n is None or n < 0:
  68. newpos = self.len
  69. else:
  70. newpos = min(self.pos + n, self.len)
  71. r = self.buf[self.pos : newpos]
  72. self.pos = newpos
  73. return r
  74. def close(self):
  75. if not self.closed:
  76. self.closed = True
  77. del self.buf, self.pos
  78. def getnexttoken(
  79. self,
  80. # localize some stuff, for performance
  81. len=len,
  82. ps_special=ps_special,
  83. stringmatch=stringRE.match,
  84. hexstringmatch=hexstringRE.match,
  85. commentmatch=commentRE.match,
  86. endmatch=endofthingRE.match,
  87. ):
  88. self.skipwhite()
  89. if self.pos >= self.len:
  90. return None, None
  91. pos = self.pos
  92. buf = self.buf
  93. char = bytechr(byteord(buf[pos]))
  94. if char in ps_special:
  95. if char in b"{}[]":
  96. tokentype = "do_special"
  97. token = char
  98. elif char == b"%":
  99. tokentype = "do_comment"
  100. _, nextpos = commentmatch(buf, pos).span()
  101. token = buf[pos:nextpos]
  102. elif char == b"(":
  103. tokentype = "do_string"
  104. m = stringmatch(buf, pos)
  105. if m is None:
  106. raise PSTokenError("bad string at character %d" % pos)
  107. _, nextpos = m.span()
  108. token = buf[pos:nextpos]
  109. elif char == b"<":
  110. tokentype = "do_hexstring"
  111. m = hexstringmatch(buf, pos)
  112. if m is None:
  113. raise PSTokenError("bad hexstring at character %d" % pos)
  114. _, nextpos = m.span()
  115. token = buf[pos:nextpos]
  116. else:
  117. raise PSTokenError("bad token at character %d" % pos)
  118. else:
  119. if char == b"/":
  120. tokentype = "do_literal"
  121. m = endmatch(buf, pos + 1)
  122. else:
  123. tokentype = ""
  124. m = endmatch(buf, pos)
  125. if m is None:
  126. raise PSTokenError("bad token at character %d" % pos)
  127. _, nextpos = m.span()
  128. token = buf[pos:nextpos]
  129. self.pos = pos + len(token)
  130. token = tostr(token, encoding=self.encoding)
  131. return tokentype, token
  132. def skipwhite(self, whitematch=skipwhiteRE.match):
  133. _, nextpos = whitematch(self.buf, self.pos).span()
  134. self.pos = nextpos
  135. def starteexec(self):
  136. self.pos = self.pos + 1
  137. self.dirtybuf = self.buf[self.pos :]
  138. self.buf, R = eexec.decrypt(self.dirtybuf, 55665)
  139. self.len = len(self.buf)
  140. self.pos = 4
  141. def stopeexec(self):
  142. if not hasattr(self, "dirtybuf"):
  143. return
  144. self.buf = self.dirtybuf
  145. del self.dirtybuf
  146. class PSInterpreter(PSOperators):
  147. def __init__(self, encoding="ascii"):
  148. systemdict = {}
  149. userdict = {}
  150. self.encoding = encoding
  151. self.dictstack = [systemdict, userdict]
  152. self.stack = []
  153. self.proclevel = 0
  154. self.procmark = ps_procmark()
  155. self.fillsystemdict()
  156. def fillsystemdict(self):
  157. systemdict = self.dictstack[0]
  158. systemdict["["] = systemdict["mark"] = self.mark = ps_mark()
  159. systemdict["]"] = ps_operator("]", self.do_makearray)
  160. systemdict["true"] = ps_boolean(1)
  161. systemdict["false"] = ps_boolean(0)
  162. systemdict["StandardEncoding"] = ps_array(ps_StandardEncoding)
  163. systemdict["FontDirectory"] = ps_dict({})
  164. self.suckoperators(systemdict, self.__class__)
  165. def suckoperators(self, systemdict, klass):
  166. for name in dir(klass):
  167. attr = getattr(self, name)
  168. if isinstance(attr, Callable) and name[:3] == "ps_":
  169. name = name[3:]
  170. systemdict[name] = ps_operator(name, attr)
  171. for baseclass in klass.__bases__:
  172. self.suckoperators(systemdict, baseclass)
  173. def interpret(self, data, getattr=getattr):
  174. tokenizer = self.tokenizer = PSTokenizer(data, self.encoding)
  175. getnexttoken = tokenizer.getnexttoken
  176. do_token = self.do_token
  177. handle_object = self.handle_object
  178. try:
  179. while 1:
  180. tokentype, token = getnexttoken()
  181. if not token:
  182. break
  183. if tokentype:
  184. handler = getattr(self, tokentype)
  185. object = handler(token)
  186. else:
  187. object = do_token(token)
  188. if object is not None:
  189. handle_object(object)
  190. tokenizer.close()
  191. self.tokenizer = None
  192. except:
  193. if self.tokenizer is not None:
  194. log.debug(
  195. "ps error:\n"
  196. "- - - - - - -\n"
  197. "%s\n"
  198. ">>>\n"
  199. "%s\n"
  200. "- - - - - - -",
  201. self.tokenizer.buf[self.tokenizer.pos - 50 : self.tokenizer.pos],
  202. self.tokenizer.buf[self.tokenizer.pos : self.tokenizer.pos + 50],
  203. )
  204. raise
  205. def handle_object(self, object):
  206. if not (self.proclevel or object.literal or object.type == "proceduretype"):
  207. if object.type != "operatortype":
  208. object = self.resolve_name(object.value)
  209. if object.literal:
  210. self.push(object)
  211. else:
  212. if object.type == "proceduretype":
  213. self.call_procedure(object)
  214. else:
  215. object.function()
  216. else:
  217. self.push(object)
  218. def call_procedure(self, proc):
  219. handle_object = self.handle_object
  220. for item in proc.value:
  221. handle_object(item)
  222. def resolve_name(self, name):
  223. dictstack = self.dictstack
  224. for i in range(len(dictstack) - 1, -1, -1):
  225. if name in dictstack[i]:
  226. return dictstack[i][name]
  227. raise PSError("name error: " + str(name))
  228. def do_token(
  229. self,
  230. token,
  231. int=int,
  232. float=float,
  233. ps_name=ps_name,
  234. ps_integer=ps_integer,
  235. ps_real=ps_real,
  236. ):
  237. try:
  238. num = int(token)
  239. except (ValueError, OverflowError):
  240. try:
  241. num = float(token)
  242. except (ValueError, OverflowError):
  243. if "#" in token:
  244. hashpos = token.find("#")
  245. try:
  246. base = int(token[:hashpos])
  247. num = int(token[hashpos + 1 :], base)
  248. except (ValueError, OverflowError):
  249. return ps_name(token)
  250. else:
  251. return ps_integer(num)
  252. else:
  253. return ps_name(token)
  254. else:
  255. return ps_real(num)
  256. else:
  257. return ps_integer(num)
  258. def do_comment(self, token):
  259. pass
  260. def do_literal(self, token):
  261. return ps_literal(token[1:])
  262. def do_string(self, token):
  263. return ps_string(token[1:-1])
  264. def do_hexstring(self, token):
  265. hexStr = "".join(token[1:-1].split())
  266. if len(hexStr) % 2:
  267. hexStr = hexStr + "0"
  268. cleanstr = []
  269. for i in range(0, len(hexStr), 2):
  270. cleanstr.append(chr(int(hexStr[i : i + 2], 16)))
  271. cleanstr = "".join(cleanstr)
  272. return ps_string(cleanstr)
  273. def do_special(self, token):
  274. if token == "{":
  275. self.proclevel = self.proclevel + 1
  276. return self.procmark
  277. elif token == "}":
  278. proc = []
  279. while 1:
  280. topobject = self.pop()
  281. if topobject == self.procmark:
  282. break
  283. proc.append(topobject)
  284. self.proclevel = self.proclevel - 1
  285. proc.reverse()
  286. return ps_procedure(proc)
  287. elif token == "[":
  288. return self.mark
  289. elif token == "]":
  290. return ps_name("]")
  291. else:
  292. raise PSTokenError("huh?")
  293. def push(self, object):
  294. self.stack.append(object)
  295. def pop(self, *types):
  296. stack = self.stack
  297. if not stack:
  298. raise PSError("stack underflow")
  299. object = stack[-1]
  300. if types:
  301. if object.type not in types:
  302. raise PSError(
  303. "typecheck, expected %s, found %s" % (repr(types), object.type)
  304. )
  305. del stack[-1]
  306. return object
  307. def do_makearray(self):
  308. array = []
  309. while 1:
  310. topobject = self.pop()
  311. if topobject == self.mark:
  312. break
  313. array.append(topobject)
  314. array.reverse()
  315. self.push(ps_array(array))
  316. def close(self):
  317. """Remove circular references."""
  318. del self.stack
  319. del self.dictstack
  320. def unpack_item(item):
  321. tp = type(item.value)
  322. if tp == dict:
  323. newitem = {}
  324. for key, value in item.value.items():
  325. newitem[key] = unpack_item(value)
  326. elif tp == list:
  327. newitem = [None] * len(item.value)
  328. for i in range(len(item.value)):
  329. newitem[i] = unpack_item(item.value[i])
  330. if item.type == "proceduretype":
  331. newitem = tuple(newitem)
  332. else:
  333. newitem = item.value
  334. return newitem
  335. def suckfont(data, encoding="ascii"):
  336. m = re.search(rb"/FontName\s+/([^ \t\n\r]+)\s+def", data)
  337. if m:
  338. fontName = m.group(1)
  339. fontName = fontName.decode()
  340. else:
  341. fontName = None
  342. interpreter = PSInterpreter(encoding=encoding)
  343. interpreter.interpret(
  344. b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop"
  345. )
  346. interpreter.interpret(data)
  347. fontdir = interpreter.dictstack[0]["FontDirectory"].value
  348. if fontName in fontdir:
  349. rawfont = fontdir[fontName]
  350. else:
  351. # fall back, in case fontName wasn't found
  352. fontNames = list(fontdir.keys())
  353. if len(fontNames) > 1:
  354. fontNames.remove("Helvetica")
  355. fontNames.sort()
  356. rawfont = fontdir[fontNames[0]]
  357. interpreter.close()
  358. return unpack_item(rawfont)