123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- """Extend the Python codecs module with a few encodings that are used in OpenType (name table)
- but missing from Python. See https://github.com/fonttools/fonttools/issues/236 for details."""
- import codecs
- import encodings
- class ExtendCodec(codecs.Codec):
- def __init__(self, name, base_encoding, mapping):
- self.name = name
- self.base_encoding = base_encoding
- self.mapping = mapping
- self.reverse = {v: k for k, v in mapping.items()}
- self.max_len = max(len(v) for v in mapping.values())
- self.info = codecs.CodecInfo(
- name=self.name, encode=self.encode, decode=self.decode
- )
- codecs.register_error(name, self.error)
- def _map(self, mapper, output_type, exc_type, input, errors):
- base_error_handler = codecs.lookup_error(errors)
- length = len(input)
- out = output_type()
- while input:
- # first try to use self.error as the error handler
- try:
- part = mapper(input, self.base_encoding, errors=self.name)
- out += part
- break # All converted
- except exc_type as e:
- # else convert the correct part, handle error as requested and continue
- out += mapper(input[: e.start], self.base_encoding, self.name)
- replacement, pos = base_error_handler(e)
- out += replacement
- input = input[pos:]
- return out, length
- def encode(self, input, errors="strict"):
- return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)
- def decode(self, input, errors="strict"):
- return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)
- def error(self, e):
- if isinstance(e, UnicodeDecodeError):
- for end in range(e.start + 1, e.end + 1):
- s = e.object[e.start : end]
- if s in self.mapping:
- return self.mapping[s], end
- elif isinstance(e, UnicodeEncodeError):
- for end in range(e.start + 1, e.start + self.max_len + 1):
- s = e.object[e.start : end]
- if s in self.reverse:
- return self.reverse[s], end
- e.encoding = self.name
- raise e
- _extended_encodings = {
- "x_mac_japanese_ttx": (
- "shift_jis",
- {
- b"\xFC": chr(0x007C),
- b"\x7E": chr(0x007E),
- b"\x80": chr(0x005C),
- b"\xA0": chr(0x00A0),
- b"\xFD": chr(0x00A9),
- b"\xFE": chr(0x2122),
- b"\xFF": chr(0x2026),
- },
- ),
- "x_mac_trad_chinese_ttx": (
- "big5",
- {
- b"\x80": chr(0x005C),
- b"\xA0": chr(0x00A0),
- b"\xFD": chr(0x00A9),
- b"\xFE": chr(0x2122),
- b"\xFF": chr(0x2026),
- },
- ),
- "x_mac_korean_ttx": (
- "euc_kr",
- {
- b"\x80": chr(0x00A0),
- b"\x81": chr(0x20A9),
- b"\x82": chr(0x2014),
- b"\x83": chr(0x00A9),
- b"\xFE": chr(0x2122),
- b"\xFF": chr(0x2026),
- },
- ),
- "x_mac_simp_chinese_ttx": (
- "gb2312",
- {
- b"\x80": chr(0x00FC),
- b"\xA0": chr(0x00A0),
- b"\xFD": chr(0x00A9),
- b"\xFE": chr(0x2122),
- b"\xFF": chr(0x2026),
- },
- ),
- }
- _cache = {}
- def search_function(name):
- name = encodings.normalize_encoding(name) # Rather undocumented...
- if name in _extended_encodings:
- if name not in _cache:
- base_encoding, mapping = _extended_encodings[name]
- assert name[-4:] == "_ttx"
- # Python 2 didn't have any of the encodings that we are implementing
- # in this file. Python 3 added aliases for the East Asian ones, mapping
- # them "temporarily" to the same base encoding as us, with a comment
- # suggesting that full implementation will appear some time later.
- # As such, try the Python version of the x_mac_... first, if that is found,
- # use *that* as our base encoding. This would make our encoding upgrade
- # to the full encoding when and if Python finally implements that.
- # http://bugs.python.org/issue24041
- base_encodings = [name[:-4], base_encoding]
- for base_encoding in base_encodings:
- try:
- codecs.lookup(base_encoding)
- except LookupError:
- continue
- _cache[name] = ExtendCodec(name, base_encoding, mapping)
- break
- return _cache[name].info
- return None
- codecs.register(search_function)
|