123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204 |
- """xmlWriter.py -- Simple XML authoring class"""
- from fontTools.misc.textTools import byteord, strjoin, tobytes, tostr
- import sys
- import os
- import string
- INDENT = " "
- class XMLWriter(object):
- def __init__(
- self,
- fileOrPath,
- indentwhite=INDENT,
- idlefunc=None,
- encoding="utf_8",
- newlinestr="\n",
- ):
- if encoding.lower().replace("-", "").replace("_", "") != "utf8":
- raise Exception("Only UTF-8 encoding is supported.")
- if fileOrPath == "-":
- fileOrPath = sys.stdout
- if not hasattr(fileOrPath, "write"):
- self.filename = fileOrPath
- self.file = open(fileOrPath, "wb")
- self._closeStream = True
- else:
- self.filename = None
- # assume writable file object
- self.file = fileOrPath
- self._closeStream = False
- # Figure out if writer expects bytes or unicodes
- try:
- # The bytes check should be first. See:
- # https://github.com/fonttools/fonttools/pull/233
- self.file.write(b"")
- self.totype = tobytes
- except TypeError:
- # This better not fail.
- self.file.write("")
- self.totype = tostr
- self.indentwhite = self.totype(indentwhite)
- if newlinestr is None:
- self.newlinestr = self.totype(os.linesep)
- else:
- self.newlinestr = self.totype(newlinestr)
- self.indentlevel = 0
- self.stack = []
- self.needindent = 1
- self.idlefunc = idlefunc
- self.idlecounter = 0
- self._writeraw('<?xml version="1.0" encoding="UTF-8"?>')
- self.newline()
- def __enter__(self):
- return self
- def __exit__(self, exception_type, exception_value, traceback):
- self.close()
- def close(self):
- if self._closeStream:
- self.file.close()
- def write(self, string, indent=True):
- """Writes text."""
- self._writeraw(escape(string), indent=indent)
- def writecdata(self, string):
- """Writes text in a CDATA section."""
- self._writeraw("<![CDATA[" + string + "]]>")
- def write8bit(self, data, strip=False):
- """Writes a bytes() sequence into the XML, escaping
- non-ASCII bytes. When this is read in xmlReader,
- the original bytes can be recovered by encoding to
- 'latin-1'."""
- self._writeraw(escape8bit(data), strip=strip)
- def write_noindent(self, string):
- """Writes text without indentation."""
- self._writeraw(escape(string), indent=False)
- def _writeraw(self, data, indent=True, strip=False):
- """Writes bytes, possibly indented."""
- if indent and self.needindent:
- self.file.write(self.indentlevel * self.indentwhite)
- self.needindent = 0
- s = self.totype(data, encoding="utf_8")
- if strip:
- s = s.strip()
- self.file.write(s)
- def newline(self):
- self.file.write(self.newlinestr)
- self.needindent = 1
- idlecounter = self.idlecounter
- if not idlecounter % 100 and self.idlefunc is not None:
- self.idlefunc()
- self.idlecounter = idlecounter + 1
- def comment(self, data):
- data = escape(data)
- lines = data.split("\n")
- self._writeraw("<!-- " + lines[0])
- for line in lines[1:]:
- self.newline()
- self._writeraw(" " + line)
- self._writeraw(" -->")
- def simpletag(self, _TAG_, *args, **kwargs):
- attrdata = self.stringifyattrs(*args, **kwargs)
- data = "<%s%s/>" % (_TAG_, attrdata)
- self._writeraw(data)
- def begintag(self, _TAG_, *args, **kwargs):
- attrdata = self.stringifyattrs(*args, **kwargs)
- data = "<%s%s>" % (_TAG_, attrdata)
- self._writeraw(data)
- self.stack.append(_TAG_)
- self.indent()
- def endtag(self, _TAG_):
- assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag"
- del self.stack[-1]
- self.dedent()
- data = "</%s>" % _TAG_
- self._writeraw(data)
- def dumphex(self, data):
- linelength = 16
- hexlinelength = linelength * 2
- chunksize = 8
- for i in range(0, len(data), linelength):
- hexline = hexStr(data[i : i + linelength])
- line = ""
- white = ""
- for j in range(0, hexlinelength, chunksize):
- line = line + white + hexline[j : j + chunksize]
- white = " "
- self._writeraw(line)
- self.newline()
- def indent(self):
- self.indentlevel = self.indentlevel + 1
- def dedent(self):
- assert self.indentlevel > 0
- self.indentlevel = self.indentlevel - 1
- def stringifyattrs(self, *args, **kwargs):
- if kwargs:
- assert not args
- attributes = sorted(kwargs.items())
- elif args:
- assert len(args) == 1
- attributes = args[0]
- else:
- return ""
- data = ""
- for attr, value in attributes:
- if not isinstance(value, (bytes, str)):
- value = str(value)
- data = data + ' %s="%s"' % (attr, escapeattr(value))
- return data
- def escape(data):
- data = tostr(data, "utf_8")
- data = data.replace("&", "&")
- data = data.replace("<", "<")
- data = data.replace(">", ">")
- data = data.replace("\r", " ")
- return data
- def escapeattr(data):
- data = escape(data)
- data = data.replace('"', """)
- return data
- def escape8bit(data):
- """Input is Unicode string."""
- def escapechar(c):
- n = ord(c)
- if 32 <= n <= 127 and c not in "<&>":
- return c
- else:
- return "&#" + repr(n) + ";"
- return strjoin(map(escapechar, data.decode("latin-1")))
- def hexStr(s):
- h = string.hexdigits
- r = ""
- for c in s:
- i = byteord(c)
- r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
- return r
|