123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532 |
- """
- A python interface to Adobe Font Metrics Files.
- Although a number of other Python implementations exist, and may be more
- complete than this, it was decided not to go with them because they were
- either:
- 1) copyrighted or used a non-BSD compatible license
- 2) had too many dependencies and a free standing lib was needed
- 3) did more than needed and it was easier to write afresh rather than
- figure out how to get just what was needed.
- It is pretty easy to use, and has no external dependencies:
- >>> import matplotlib as mpl
- >>> from pathlib import Path
- >>> afm_path = Path(mpl.get_data_path(), 'fonts', 'afm', 'ptmr8a.afm')
- >>>
- >>> from matplotlib.afm import AFM
- >>> with afm_path.open('rb') as fh:
- ... afm = AFM(fh)
- >>> afm.string_width_height('What the heck?')
- (6220.0, 694)
- >>> afm.get_fontname()
- 'Times-Roman'
- >>> afm.get_kern_dist('A', 'f')
- 0
- >>> afm.get_kern_dist('A', 'y')
- -92.0
- >>> afm.get_bbox_char('!')
- [130, -9, 238, 676]
- As in the Adobe Font Metrics File Format Specification, all dimensions
- are given in units of 1/1000 of the scale factor (point size) of the font
- being used.
- """
- from collections import namedtuple
- import logging
- import re
- from ._mathtext_data import uni2type1
- _log = logging.getLogger(__name__)
- def _to_int(x):
- # Some AFM files have floats where we are expecting ints -- there is
- # probably a better way to handle this (support floats, round rather than
- # truncate). But I don't know what the best approach is now and this
- # change to _to_int should at least prevent Matplotlib from crashing on
- # these. JDH (2009-11-06)
- return int(float(x))
- def _to_float(x):
- # Some AFM files use "," instead of "." as decimal separator -- this
- # shouldn't be ambiguous (unless someone is wicked enough to use "," as
- # thousands separator...).
- if isinstance(x, bytes):
- # Encoding doesn't really matter -- if we have codepoints >127 the call
- # to float() will error anyways.
- x = x.decode('latin-1')
- return float(x.replace(',', '.'))
- def _to_str(x):
- return x.decode('utf8')
- def _to_list_of_ints(s):
- s = s.replace(b',', b' ')
- return [_to_int(val) for val in s.split()]
- def _to_list_of_floats(s):
- return [_to_float(val) for val in s.split()]
- def _to_bool(s):
- if s.lower().strip() in (b'false', b'0', b'no'):
- return False
- else:
- return True
- def _parse_header(fh):
- """
- Read the font metrics header (up to the char metrics) and returns
- a dictionary mapping *key* to *val*. *val* will be converted to the
- appropriate python type as necessary; e.g.:
- * 'False'->False
- * '0'->0
- * '-168 -218 1000 898'-> [-168, -218, 1000, 898]
- Dictionary keys are
- StartFontMetrics, FontName, FullName, FamilyName, Weight,
- ItalicAngle, IsFixedPitch, FontBBox, UnderlinePosition,
- UnderlineThickness, Version, Notice, EncodingScheme, CapHeight,
- XHeight, Ascender, Descender, StartCharMetrics
- """
- header_converters = {
- b'StartFontMetrics': _to_float,
- b'FontName': _to_str,
- b'FullName': _to_str,
- b'FamilyName': _to_str,
- b'Weight': _to_str,
- b'ItalicAngle': _to_float,
- b'IsFixedPitch': _to_bool,
- b'FontBBox': _to_list_of_ints,
- b'UnderlinePosition': _to_float,
- b'UnderlineThickness': _to_float,
- b'Version': _to_str,
- # Some AFM files have non-ASCII characters (which are not allowed by
- # the spec). Given that there is actually no public API to even access
- # this field, just return it as straight bytes.
- b'Notice': lambda x: x,
- b'EncodingScheme': _to_str,
- b'CapHeight': _to_float, # Is the second version a mistake, or
- b'Capheight': _to_float, # do some AFM files contain 'Capheight'? -JKS
- b'XHeight': _to_float,
- b'Ascender': _to_float,
- b'Descender': _to_float,
- b'StdHW': _to_float,
- b'StdVW': _to_float,
- b'StartCharMetrics': _to_int,
- b'CharacterSet': _to_str,
- b'Characters': _to_int,
- }
- d = {}
- first_line = True
- for line in fh:
- line = line.rstrip()
- if line.startswith(b'Comment'):
- continue
- lst = line.split(b' ', 1)
- key = lst[0]
- if first_line:
- # AFM spec, Section 4: The StartFontMetrics keyword
- # [followed by a version number] must be the first line in
- # the file, and the EndFontMetrics keyword must be the
- # last non-empty line in the file. We just check the
- # first header entry.
- if key != b'StartFontMetrics':
- raise RuntimeError('Not an AFM file')
- first_line = False
- if len(lst) == 2:
- val = lst[1]
- else:
- val = b''
- try:
- converter = header_converters[key]
- except KeyError:
- _log.error("Found an unknown keyword in AFM header (was %r)", key)
- continue
- try:
- d[key] = converter(val)
- except ValueError:
- _log.error('Value error parsing header in AFM: %s, %s', key, val)
- continue
- if key == b'StartCharMetrics':
- break
- else:
- raise RuntimeError('Bad parse')
- return d
- CharMetrics = namedtuple('CharMetrics', 'width, name, bbox')
- CharMetrics.__doc__ = """
- Represents the character metrics of a single character.
- Notes
- -----
- The fields do currently only describe a subset of character metrics
- information defined in the AFM standard.
- """
- CharMetrics.width.__doc__ = """The character width (WX)."""
- CharMetrics.name.__doc__ = """The character name (N)."""
- CharMetrics.bbox.__doc__ = """
- The bbox of the character (B) as a tuple (*llx*, *lly*, *urx*, *ury*)."""
- def _parse_char_metrics(fh):
- """
- Parse the given filehandle for character metrics information and return
- the information as dicts.
- It is assumed that the file cursor is on the line behind
- 'StartCharMetrics'.
- Returns
- -------
- ascii_d : dict
- A mapping "ASCII num of the character" to `.CharMetrics`.
- name_d : dict
- A mapping "character name" to `.CharMetrics`.
- Notes
- -----
- This function is incomplete per the standard, but thus far parses
- all the sample afm files tried.
- """
- required_keys = {'C', 'WX', 'N', 'B'}
- ascii_d = {}
- name_d = {}
- for line in fh:
- # We are defensively letting values be utf8. The spec requires
- # ascii, but there are non-compliant fonts in circulation
- line = _to_str(line.rstrip()) # Convert from byte-literal
- if line.startswith('EndCharMetrics'):
- return ascii_d, name_d
- # Split the metric line into a dictionary, keyed by metric identifiers
- vals = dict(s.strip().split(' ', 1) for s in line.split(';') if s)
- # There may be other metrics present, but only these are needed
- if not required_keys.issubset(vals):
- raise RuntimeError('Bad char metrics line: %s' % line)
- num = _to_int(vals['C'])
- wx = _to_float(vals['WX'])
- name = vals['N']
- bbox = _to_list_of_floats(vals['B'])
- bbox = list(map(int, bbox))
- metrics = CharMetrics(wx, name, bbox)
- # Workaround: If the character name is 'Euro', give it the
- # corresponding character code, according to WinAnsiEncoding (see PDF
- # Reference).
- if name == 'Euro':
- num = 128
- elif name == 'minus':
- num = ord("\N{MINUS SIGN}") # 0x2212
- if num != -1:
- ascii_d[num] = metrics
- name_d[name] = metrics
- raise RuntimeError('Bad parse')
- def _parse_kern_pairs(fh):
- """
- Return a kern pairs dictionary; keys are (*char1*, *char2*) tuples and
- values are the kern pair value. For example, a kern pairs line like
- ``KPX A y -50``
- will be represented as::
- d[ ('A', 'y') ] = -50
- """
- line = next(fh)
- if not line.startswith(b'StartKernPairs'):
- raise RuntimeError('Bad start of kern pairs data: %s' % line)
- d = {}
- for line in fh:
- line = line.rstrip()
- if not line:
- continue
- if line.startswith(b'EndKernPairs'):
- next(fh) # EndKernData
- return d
- vals = line.split()
- if len(vals) != 4 or vals[0] != b'KPX':
- raise RuntimeError('Bad kern pairs line: %s' % line)
- c1, c2, val = _to_str(vals[1]), _to_str(vals[2]), _to_float(vals[3])
- d[(c1, c2)] = val
- raise RuntimeError('Bad kern pairs parse')
- CompositePart = namedtuple('CompositePart', 'name, dx, dy')
- CompositePart.__doc__ = """
- Represents the information on a composite element of a composite char."""
- CompositePart.name.__doc__ = """Name of the part, e.g. 'acute'."""
- CompositePart.dx.__doc__ = """x-displacement of the part from the origin."""
- CompositePart.dy.__doc__ = """y-displacement of the part from the origin."""
- def _parse_composites(fh):
- """
- Parse the given filehandle for composites information return them as a
- dict.
- It is assumed that the file cursor is on the line behind 'StartComposites'.
- Returns
- -------
- dict
- A dict mapping composite character names to a parts list. The parts
- list is a list of `.CompositePart` entries describing the parts of
- the composite.
- Examples
- --------
- A composite definition line::
- CC Aacute 2 ; PCC A 0 0 ; PCC acute 160 170 ;
- will be represented as::
- composites['Aacute'] = [CompositePart(name='A', dx=0, dy=0),
- CompositePart(name='acute', dx=160, dy=170)]
- """
- composites = {}
- for line in fh:
- line = line.rstrip()
- if not line:
- continue
- if line.startswith(b'EndComposites'):
- return composites
- vals = line.split(b';')
- cc = vals[0].split()
- name, _num_parts = cc[1], _to_int(cc[2])
- pccParts = []
- for s in vals[1:-1]:
- pcc = s.split()
- part = CompositePart(pcc[1], _to_float(pcc[2]), _to_float(pcc[3]))
- pccParts.append(part)
- composites[name] = pccParts
- raise RuntimeError('Bad composites parse')
- def _parse_optional(fh):
- """
- Parse the optional fields for kern pair data and composites.
- Returns
- -------
- kern_data : dict
- A dict containing kerning information. May be empty.
- See `._parse_kern_pairs`.
- composites : dict
- A dict containing composite information. May be empty.
- See `._parse_composites`.
- """
- optional = {
- b'StartKernData': _parse_kern_pairs,
- b'StartComposites': _parse_composites,
- }
- d = {b'StartKernData': {},
- b'StartComposites': {}}
- for line in fh:
- line = line.rstrip()
- if not line:
- continue
- key = line.split()[0]
- if key in optional:
- d[key] = optional[key](fh)
- return d[b'StartKernData'], d[b'StartComposites']
- class AFM:
- def __init__(self, fh):
- """Parse the AFM file in file object *fh*."""
- self._header = _parse_header(fh)
- self._metrics, self._metrics_by_name = _parse_char_metrics(fh)
- self._kern, self._composite = _parse_optional(fh)
- def get_bbox_char(self, c, isord=False):
- if not isord:
- c = ord(c)
- return self._metrics[c].bbox
- def string_width_height(self, s):
- """
- Return the string width (including kerning) and string height
- as a (*w*, *h*) tuple.
- """
- if not len(s):
- return 0, 0
- total_width = 0
- namelast = None
- miny = 1e9
- maxy = 0
- for c in s:
- if c == '\n':
- continue
- wx, name, bbox = self._metrics[ord(c)]
- total_width += wx + self._kern.get((namelast, name), 0)
- l, b, w, h = bbox
- miny = min(miny, b)
- maxy = max(maxy, b + h)
- namelast = name
- return total_width, maxy - miny
- def get_str_bbox_and_descent(self, s):
- """Return the string bounding box and the maximal descent."""
- if not len(s):
- return 0, 0, 0, 0, 0
- total_width = 0
- namelast = None
- miny = 1e9
- maxy = 0
- left = 0
- if not isinstance(s, str):
- s = _to_str(s)
- for c in s:
- if c == '\n':
- continue
- name = uni2type1.get(ord(c), f"uni{ord(c):04X}")
- try:
- wx, _, bbox = self._metrics_by_name[name]
- except KeyError:
- name = 'question'
- wx, _, bbox = self._metrics_by_name[name]
- total_width += wx + self._kern.get((namelast, name), 0)
- l, b, w, h = bbox
- left = min(left, l)
- miny = min(miny, b)
- maxy = max(maxy, b + h)
- namelast = name
- return left, miny, total_width, maxy - miny, -miny
- def get_str_bbox(self, s):
- """Return the string bounding box."""
- return self.get_str_bbox_and_descent(s)[:4]
- def get_name_char(self, c, isord=False):
- """Get the name of the character, i.e., ';' is 'semicolon'."""
- if not isord:
- c = ord(c)
- return self._metrics[c].name
- def get_width_char(self, c, isord=False):
- """
- Get the width of the character from the character metric WX field.
- """
- if not isord:
- c = ord(c)
- return self._metrics[c].width
- def get_width_from_char_name(self, name):
- """Get the width of the character from a type1 character name."""
- return self._metrics_by_name[name].width
- def get_height_char(self, c, isord=False):
- """Get the bounding box (ink) height of character *c* (space is 0)."""
- if not isord:
- c = ord(c)
- return self._metrics[c].bbox[-1]
- def get_kern_dist(self, c1, c2):
- """
- Return the kerning pair distance (possibly 0) for chars *c1* and *c2*.
- """
- name1, name2 = self.get_name_char(c1), self.get_name_char(c2)
- return self.get_kern_dist_from_name(name1, name2)
- def get_kern_dist_from_name(self, name1, name2):
- """
- Return the kerning pair distance (possibly 0) for chars
- *name1* and *name2*.
- """
- return self._kern.get((name1, name2), 0)
- def get_fontname(self):
- """Return the font name, e.g., 'Times-Roman'."""
- return self._header[b'FontName']
- @property
- def postscript_name(self): # For consistency with FT2Font.
- return self.get_fontname()
- def get_fullname(self):
- """Return the font full name, e.g., 'Times-Roman'."""
- name = self._header.get(b'FullName')
- if name is None: # use FontName as a substitute
- name = self._header[b'FontName']
- return name
- def get_familyname(self):
- """Return the font family name, e.g., 'Times'."""
- name = self._header.get(b'FamilyName')
- if name is not None:
- return name
- # FamilyName not specified so we'll make a guess
- name = self.get_fullname()
- extras = (r'(?i)([ -](regular|plain|italic|oblique|bold|semibold|'
- r'light|ultralight|extra|condensed))+$')
- return re.sub(extras, '', name)
- @property
- def family_name(self):
- """The font family name, e.g., 'Times'."""
- return self.get_familyname()
- def get_weight(self):
- """Return the font weight, e.g., 'Bold' or 'Roman'."""
- return self._header[b'Weight']
- def get_angle(self):
- """Return the fontangle as float."""
- return self._header[b'ItalicAngle']
- def get_capheight(self):
- """Return the cap height as float."""
- return self._header[b'CapHeight']
- def get_xheight(self):
- """Return the xheight as float."""
- return self._header[b'XHeight']
- def get_underline_thickness(self):
- """Return the underline thickness as float."""
- return self._header[b'UnderlineThickness']
- def get_horizontal_stem_width(self):
- """
- Return the standard horizontal stem width as float, or *None* if
- not specified in AFM file.
- """
- return self._header.get(b'StdHW', None)
- def get_vertical_stem_width(self):
- """
- Return the standard vertical stem width as float, or *None* if
- not specified in AFM file.
- """
- return self._header.get(b'StdVW', None)
|