123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398 |
- from fontTools.misc.textTools import bytechr, byteord, bytesjoin, tobytes, tostr
- from fontTools.misc import eexec
- from .psOperators import (
- PSOperators,
- ps_StandardEncoding,
- ps_array,
- ps_boolean,
- ps_dict,
- ps_integer,
- ps_literal,
- ps_mark,
- ps_name,
- ps_operator,
- ps_procedure,
- ps_procmark,
- ps_real,
- ps_string,
- )
- import re
- from collections.abc import Callable
- from string import whitespace
- import logging
- log = logging.getLogger(__name__)
- ps_special = b"()<>[]{}%" # / is one too, but we take care of that one differently
- skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"]))
- endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"])
- endofthingRE = re.compile(endofthingPat)
- commentRE = re.compile(b"%[^\n\r]*")
- # XXX This not entirely correct as it doesn't allow *nested* embedded parens:
- stringPat = rb"""
- \(
- (
- (
- [^()]* \ [()]
- )
- |
- (
- [^()]* \( [^()]* \)
- )
- )*
- [^()]*
- \)
- """
- stringPat = b"".join(stringPat.split())
- stringRE = re.compile(stringPat)
- hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"]))
- class PSTokenError(Exception):
- pass
- class PSError(Exception):
- pass
- class PSTokenizer(object):
- def __init__(self, buf=b"", encoding="ascii"):
- # Force self.buf to be a byte string
- buf = tobytes(buf)
- self.buf = buf
- self.len = len(buf)
- self.pos = 0
- self.closed = False
- self.encoding = encoding
- def read(self, n=-1):
- """Read at most 'n' bytes from the buffer, or less if the read
- hits EOF before obtaining 'n' bytes.
- If 'n' is negative or omitted, read all data until EOF is reached.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
- if n is None or n < 0:
- newpos = self.len
- else:
- newpos = min(self.pos + n, self.len)
- r = self.buf[self.pos : newpos]
- self.pos = newpos
- return r
- def close(self):
- if not self.closed:
- self.closed = True
- del self.buf, self.pos
- def getnexttoken(
- self,
- # localize some stuff, for performance
- len=len,
- ps_special=ps_special,
- stringmatch=stringRE.match,
- hexstringmatch=hexstringRE.match,
- commentmatch=commentRE.match,
- endmatch=endofthingRE.match,
- ):
- self.skipwhite()
- if self.pos >= self.len:
- return None, None
- pos = self.pos
- buf = self.buf
- char = bytechr(byteord(buf[pos]))
- if char in ps_special:
- if char in b"{}[]":
- tokentype = "do_special"
- token = char
- elif char == b"%":
- tokentype = "do_comment"
- _, nextpos = commentmatch(buf, pos).span()
- token = buf[pos:nextpos]
- elif char == b"(":
- tokentype = "do_string"
- m = stringmatch(buf, pos)
- if m is None:
- raise PSTokenError("bad string at character %d" % pos)
- _, nextpos = m.span()
- token = buf[pos:nextpos]
- elif char == b"<":
- tokentype = "do_hexstring"
- m = hexstringmatch(buf, pos)
- if m is None:
- raise PSTokenError("bad hexstring at character %d" % pos)
- _, nextpos = m.span()
- token = buf[pos:nextpos]
- else:
- raise PSTokenError("bad token at character %d" % pos)
- else:
- if char == b"/":
- tokentype = "do_literal"
- m = endmatch(buf, pos + 1)
- else:
- tokentype = ""
- m = endmatch(buf, pos)
- if m is None:
- raise PSTokenError("bad token at character %d" % pos)
- _, nextpos = m.span()
- token = buf[pos:nextpos]
- self.pos = pos + len(token)
- token = tostr(token, encoding=self.encoding)
- return tokentype, token
- def skipwhite(self, whitematch=skipwhiteRE.match):
- _, nextpos = whitematch(self.buf, self.pos).span()
- self.pos = nextpos
- def starteexec(self):
- self.pos = self.pos + 1
- self.dirtybuf = self.buf[self.pos :]
- self.buf, R = eexec.decrypt(self.dirtybuf, 55665)
- self.len = len(self.buf)
- self.pos = 4
- def stopeexec(self):
- if not hasattr(self, "dirtybuf"):
- return
- self.buf = self.dirtybuf
- del self.dirtybuf
- class PSInterpreter(PSOperators):
- def __init__(self, encoding="ascii"):
- systemdict = {}
- userdict = {}
- self.encoding = encoding
- self.dictstack = [systemdict, userdict]
- self.stack = []
- self.proclevel = 0
- self.procmark = ps_procmark()
- self.fillsystemdict()
- def fillsystemdict(self):
- systemdict = self.dictstack[0]
- systemdict["["] = systemdict["mark"] = self.mark = ps_mark()
- systemdict["]"] = ps_operator("]", self.do_makearray)
- systemdict["true"] = ps_boolean(1)
- systemdict["false"] = ps_boolean(0)
- systemdict["StandardEncoding"] = ps_array(ps_StandardEncoding)
- systemdict["FontDirectory"] = ps_dict({})
- self.suckoperators(systemdict, self.__class__)
- def suckoperators(self, systemdict, klass):
- for name in dir(klass):
- attr = getattr(self, name)
- if isinstance(attr, Callable) and name[:3] == "ps_":
- name = name[3:]
- systemdict[name] = ps_operator(name, attr)
- for baseclass in klass.__bases__:
- self.suckoperators(systemdict, baseclass)
- def interpret(self, data, getattr=getattr):
- tokenizer = self.tokenizer = PSTokenizer(data, self.encoding)
- getnexttoken = tokenizer.getnexttoken
- do_token = self.do_token
- handle_object = self.handle_object
- try:
- while 1:
- tokentype, token = getnexttoken()
- if not token:
- break
- if tokentype:
- handler = getattr(self, tokentype)
- object = handler(token)
- else:
- object = do_token(token)
- if object is not None:
- handle_object(object)
- tokenizer.close()
- self.tokenizer = None
- except:
- if self.tokenizer is not None:
- log.debug(
- "ps error:\n"
- "- - - - - - -\n"
- "%s\n"
- ">>>\n"
- "%s\n"
- "- - - - - - -",
- self.tokenizer.buf[self.tokenizer.pos - 50 : self.tokenizer.pos],
- self.tokenizer.buf[self.tokenizer.pos : self.tokenizer.pos + 50],
- )
- raise
- def handle_object(self, object):
- if not (self.proclevel or object.literal or object.type == "proceduretype"):
- if object.type != "operatortype":
- object = self.resolve_name(object.value)
- if object.literal:
- self.push(object)
- else:
- if object.type == "proceduretype":
- self.call_procedure(object)
- else:
- object.function()
- else:
- self.push(object)
- def call_procedure(self, proc):
- handle_object = self.handle_object
- for item in proc.value:
- handle_object(item)
- def resolve_name(self, name):
- dictstack = self.dictstack
- for i in range(len(dictstack) - 1, -1, -1):
- if name in dictstack[i]:
- return dictstack[i][name]
- raise PSError("name error: " + str(name))
- def do_token(
- self,
- token,
- int=int,
- float=float,
- ps_name=ps_name,
- ps_integer=ps_integer,
- ps_real=ps_real,
- ):
- try:
- num = int(token)
- except (ValueError, OverflowError):
- try:
- num = float(token)
- except (ValueError, OverflowError):
- if "#" in token:
- hashpos = token.find("#")
- try:
- base = int(token[:hashpos])
- num = int(token[hashpos + 1 :], base)
- except (ValueError, OverflowError):
- return ps_name(token)
- else:
- return ps_integer(num)
- else:
- return ps_name(token)
- else:
- return ps_real(num)
- else:
- return ps_integer(num)
- def do_comment(self, token):
- pass
- def do_literal(self, token):
- return ps_literal(token[1:])
- def do_string(self, token):
- return ps_string(token[1:-1])
- def do_hexstring(self, token):
- hexStr = "".join(token[1:-1].split())
- if len(hexStr) % 2:
- hexStr = hexStr + "0"
- cleanstr = []
- for i in range(0, len(hexStr), 2):
- cleanstr.append(chr(int(hexStr[i : i + 2], 16)))
- cleanstr = "".join(cleanstr)
- return ps_string(cleanstr)
- def do_special(self, token):
- if token == "{":
- self.proclevel = self.proclevel + 1
- return self.procmark
- elif token == "}":
- proc = []
- while 1:
- topobject = self.pop()
- if topobject == self.procmark:
- break
- proc.append(topobject)
- self.proclevel = self.proclevel - 1
- proc.reverse()
- return ps_procedure(proc)
- elif token == "[":
- return self.mark
- elif token == "]":
- return ps_name("]")
- else:
- raise PSTokenError("huh?")
- def push(self, object):
- self.stack.append(object)
- def pop(self, *types):
- stack = self.stack
- if not stack:
- raise PSError("stack underflow")
- object = stack[-1]
- if types:
- if object.type not in types:
- raise PSError(
- "typecheck, expected %s, found %s" % (repr(types), object.type)
- )
- del stack[-1]
- return object
- def do_makearray(self):
- array = []
- while 1:
- topobject = self.pop()
- if topobject == self.mark:
- break
- array.append(topobject)
- array.reverse()
- self.push(ps_array(array))
- def close(self):
- """Remove circular references."""
- del self.stack
- del self.dictstack
- def unpack_item(item):
- tp = type(item.value)
- if tp == dict:
- newitem = {}
- for key, value in item.value.items():
- newitem[key] = unpack_item(value)
- elif tp == list:
- newitem = [None] * len(item.value)
- for i in range(len(item.value)):
- newitem[i] = unpack_item(item.value[i])
- if item.type == "proceduretype":
- newitem = tuple(newitem)
- else:
- newitem = item.value
- return newitem
- def suckfont(data, encoding="ascii"):
- m = re.search(rb"/FontName\s+/([^ \t\n\r]+)\s+def", data)
- if m:
- fontName = m.group(1)
- fontName = fontName.decode()
- else:
- fontName = None
- interpreter = PSInterpreter(encoding=encoding)
- interpreter.interpret(
- b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop"
- )
- interpreter.interpret(data)
- fontdir = interpreter.dictstack[0]["FontDirectory"].value
- if fontName in fontdir:
- rawfont = fontdir[fontName]
- else:
- # fall back, in case fontName wasn't found
- fontNames = list(fontdir.keys())
- if len(fontNames) > 1:
- fontNames.remove("Helvetica")
- fontNames.sort()
- rawfont = fontdir[fontNames[0]]
- interpreter.close()
- return unpack_item(rawfont)
|