xmlWriter.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. """xmlWriter.py -- Simple XML authoring class"""
  2. from fontTools.misc.textTools import byteord, strjoin, tobytes, tostr
  3. import sys
  4. import os
  5. import string
  6. INDENT = " "
  7. class XMLWriter(object):
  8. def __init__(
  9. self,
  10. fileOrPath,
  11. indentwhite=INDENT,
  12. idlefunc=None,
  13. encoding="utf_8",
  14. newlinestr="\n",
  15. ):
  16. if encoding.lower().replace("-", "").replace("_", "") != "utf8":
  17. raise Exception("Only UTF-8 encoding is supported.")
  18. if fileOrPath == "-":
  19. fileOrPath = sys.stdout
  20. if not hasattr(fileOrPath, "write"):
  21. self.filename = fileOrPath
  22. self.file = open(fileOrPath, "wb")
  23. self._closeStream = True
  24. else:
  25. self.filename = None
  26. # assume writable file object
  27. self.file = fileOrPath
  28. self._closeStream = False
  29. # Figure out if writer expects bytes or unicodes
  30. try:
  31. # The bytes check should be first. See:
  32. # https://github.com/fonttools/fonttools/pull/233
  33. self.file.write(b"")
  34. self.totype = tobytes
  35. except TypeError:
  36. # This better not fail.
  37. self.file.write("")
  38. self.totype = tostr
  39. self.indentwhite = self.totype(indentwhite)
  40. if newlinestr is None:
  41. self.newlinestr = self.totype(os.linesep)
  42. else:
  43. self.newlinestr = self.totype(newlinestr)
  44. self.indentlevel = 0
  45. self.stack = []
  46. self.needindent = 1
  47. self.idlefunc = idlefunc
  48. self.idlecounter = 0
  49. self._writeraw('<?xml version="1.0" encoding="UTF-8"?>')
  50. self.newline()
  51. def __enter__(self):
  52. return self
  53. def __exit__(self, exception_type, exception_value, traceback):
  54. self.close()
  55. def close(self):
  56. if self._closeStream:
  57. self.file.close()
  58. def write(self, string, indent=True):
  59. """Writes text."""
  60. self._writeraw(escape(string), indent=indent)
  61. def writecdata(self, string):
  62. """Writes text in a CDATA section."""
  63. self._writeraw("<![CDATA[" + string + "]]>")
  64. def write8bit(self, data, strip=False):
  65. """Writes a bytes() sequence into the XML, escaping
  66. non-ASCII bytes. When this is read in xmlReader,
  67. the original bytes can be recovered by encoding to
  68. 'latin-1'."""
  69. self._writeraw(escape8bit(data), strip=strip)
  70. def write_noindent(self, string):
  71. """Writes text without indentation."""
  72. self._writeraw(escape(string), indent=False)
  73. def _writeraw(self, data, indent=True, strip=False):
  74. """Writes bytes, possibly indented."""
  75. if indent and self.needindent:
  76. self.file.write(self.indentlevel * self.indentwhite)
  77. self.needindent = 0
  78. s = self.totype(data, encoding="utf_8")
  79. if strip:
  80. s = s.strip()
  81. self.file.write(s)
  82. def newline(self):
  83. self.file.write(self.newlinestr)
  84. self.needindent = 1
  85. idlecounter = self.idlecounter
  86. if not idlecounter % 100 and self.idlefunc is not None:
  87. self.idlefunc()
  88. self.idlecounter = idlecounter + 1
  89. def comment(self, data):
  90. data = escape(data)
  91. lines = data.split("\n")
  92. self._writeraw("<!-- " + lines[0])
  93. for line in lines[1:]:
  94. self.newline()
  95. self._writeraw(" " + line)
  96. self._writeraw(" -->")
  97. def simpletag(self, _TAG_, *args, **kwargs):
  98. attrdata = self.stringifyattrs(*args, **kwargs)
  99. data = "<%s%s/>" % (_TAG_, attrdata)
  100. self._writeraw(data)
  101. def begintag(self, _TAG_, *args, **kwargs):
  102. attrdata = self.stringifyattrs(*args, **kwargs)
  103. data = "<%s%s>" % (_TAG_, attrdata)
  104. self._writeraw(data)
  105. self.stack.append(_TAG_)
  106. self.indent()
  107. def endtag(self, _TAG_):
  108. assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag"
  109. del self.stack[-1]
  110. self.dedent()
  111. data = "</%s>" % _TAG_
  112. self._writeraw(data)
  113. def dumphex(self, data):
  114. linelength = 16
  115. hexlinelength = linelength * 2
  116. chunksize = 8
  117. for i in range(0, len(data), linelength):
  118. hexline = hexStr(data[i : i + linelength])
  119. line = ""
  120. white = ""
  121. for j in range(0, hexlinelength, chunksize):
  122. line = line + white + hexline[j : j + chunksize]
  123. white = " "
  124. self._writeraw(line)
  125. self.newline()
  126. def indent(self):
  127. self.indentlevel = self.indentlevel + 1
  128. def dedent(self):
  129. assert self.indentlevel > 0
  130. self.indentlevel = self.indentlevel - 1
  131. def stringifyattrs(self, *args, **kwargs):
  132. if kwargs:
  133. assert not args
  134. attributes = sorted(kwargs.items())
  135. elif args:
  136. assert len(args) == 1
  137. attributes = args[0]
  138. else:
  139. return ""
  140. data = ""
  141. for attr, value in attributes:
  142. if not isinstance(value, (bytes, str)):
  143. value = str(value)
  144. data = data + ' %s="%s"' % (attr, escapeattr(value))
  145. return data
  146. def escape(data):
  147. data = tostr(data, "utf_8")
  148. data = data.replace("&", "&amp;")
  149. data = data.replace("<", "&lt;")
  150. data = data.replace(">", "&gt;")
  151. data = data.replace("\r", "&#13;")
  152. return data
  153. def escapeattr(data):
  154. data = escape(data)
  155. data = data.replace('"', "&quot;")
  156. return data
  157. def escape8bit(data):
  158. """Input is Unicode string."""
  159. def escapechar(c):
  160. n = ord(c)
  161. if 32 <= n <= 127 and c not in "<&>":
  162. return c
  163. else:
  164. return "&#" + repr(n) + ";"
  165. return strjoin(map(escapechar, data.decode("latin-1")))
  166. def hexStr(s):
  167. h = string.hexdigits
  168. r = ""
  169. for c in s:
  170. i = byteord(c)
  171. r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
  172. return r