xmlbuilder.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. """Implementation of the DOM Level 3 'LS-Load' feature."""
  2. import copy
  3. import xml.dom
  4. from xml.dom.NodeFilter import NodeFilter
  5. __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
  6. class Options:
  7. """Features object that has variables set for each DOMBuilder feature.
  8. The DOMBuilder class uses an instance of this class to pass settings to
  9. the ExpatBuilder class.
  10. """
  11. # Note that the DOMBuilder class in LoadSave constrains which of these
  12. # values can be set using the DOM Level 3 LoadSave feature.
  13. namespaces = 1
  14. namespace_declarations = True
  15. validation = False
  16. external_parameter_entities = True
  17. external_general_entities = True
  18. external_dtd_subset = True
  19. validate_if_schema = False
  20. validate = False
  21. datatype_normalization = False
  22. create_entity_ref_nodes = True
  23. entities = True
  24. whitespace_in_element_content = True
  25. cdata_sections = True
  26. comments = True
  27. charset_overrides_xml_encoding = True
  28. infoset = False
  29. supported_mediatypes_only = False
  30. errorHandler = None
  31. filter = None
  32. class DOMBuilder:
  33. entityResolver = None
  34. errorHandler = None
  35. filter = None
  36. ACTION_REPLACE = 1
  37. ACTION_APPEND_AS_CHILDREN = 2
  38. ACTION_INSERT_AFTER = 3
  39. ACTION_INSERT_BEFORE = 4
  40. _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
  41. ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
  42. def __init__(self):
  43. self._options = Options()
  44. def _get_entityResolver(self):
  45. return self.entityResolver
  46. def _set_entityResolver(self, entityResolver):
  47. self.entityResolver = entityResolver
  48. def _get_errorHandler(self):
  49. return self.errorHandler
  50. def _set_errorHandler(self, errorHandler):
  51. self.errorHandler = errorHandler
  52. def _get_filter(self):
  53. return self.filter
  54. def _set_filter(self, filter):
  55. self.filter = filter
  56. def setFeature(self, name, state):
  57. if self.supportsFeature(name):
  58. state = state and 1 or 0
  59. try:
  60. settings = self._settings[(_name_xform(name), state)]
  61. except KeyError:
  62. raise xml.dom.NotSupportedErr(
  63. "unsupported feature: %r" % (name,)) from None
  64. else:
  65. for name, value in settings:
  66. setattr(self._options, name, value)
  67. else:
  68. raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
  69. def supportsFeature(self, name):
  70. return hasattr(self._options, _name_xform(name))
  71. def canSetFeature(self, name, state):
  72. key = (_name_xform(name), state and 1 or 0)
  73. return key in self._settings
  74. # This dictionary maps from (feature,value) to a list of
  75. # (option,value) pairs that should be set on the Options object.
  76. # If a (feature,value) setting is not in this dictionary, it is
  77. # not supported by the DOMBuilder.
  78. #
  79. _settings = {
  80. ("namespace_declarations", 0): [
  81. ("namespace_declarations", 0)],
  82. ("namespace_declarations", 1): [
  83. ("namespace_declarations", 1)],
  84. ("validation", 0): [
  85. ("validation", 0)],
  86. ("external_general_entities", 0): [
  87. ("external_general_entities", 0)],
  88. ("external_general_entities", 1): [
  89. ("external_general_entities", 1)],
  90. ("external_parameter_entities", 0): [
  91. ("external_parameter_entities", 0)],
  92. ("external_parameter_entities", 1): [
  93. ("external_parameter_entities", 1)],
  94. ("validate_if_schema", 0): [
  95. ("validate_if_schema", 0)],
  96. ("create_entity_ref_nodes", 0): [
  97. ("create_entity_ref_nodes", 0)],
  98. ("create_entity_ref_nodes", 1): [
  99. ("create_entity_ref_nodes", 1)],
  100. ("entities", 0): [
  101. ("create_entity_ref_nodes", 0),
  102. ("entities", 0)],
  103. ("entities", 1): [
  104. ("entities", 1)],
  105. ("whitespace_in_element_content", 0): [
  106. ("whitespace_in_element_content", 0)],
  107. ("whitespace_in_element_content", 1): [
  108. ("whitespace_in_element_content", 1)],
  109. ("cdata_sections", 0): [
  110. ("cdata_sections", 0)],
  111. ("cdata_sections", 1): [
  112. ("cdata_sections", 1)],
  113. ("comments", 0): [
  114. ("comments", 0)],
  115. ("comments", 1): [
  116. ("comments", 1)],
  117. ("charset_overrides_xml_encoding", 0): [
  118. ("charset_overrides_xml_encoding", 0)],
  119. ("charset_overrides_xml_encoding", 1): [
  120. ("charset_overrides_xml_encoding", 1)],
  121. ("infoset", 0): [],
  122. ("infoset", 1): [
  123. ("namespace_declarations", 0),
  124. ("validate_if_schema", 0),
  125. ("create_entity_ref_nodes", 0),
  126. ("entities", 0),
  127. ("cdata_sections", 0),
  128. ("datatype_normalization", 1),
  129. ("whitespace_in_element_content", 1),
  130. ("comments", 1),
  131. ("charset_overrides_xml_encoding", 1)],
  132. ("supported_mediatypes_only", 0): [
  133. ("supported_mediatypes_only", 0)],
  134. ("namespaces", 0): [
  135. ("namespaces", 0)],
  136. ("namespaces", 1): [
  137. ("namespaces", 1)],
  138. }
  139. def getFeature(self, name):
  140. xname = _name_xform(name)
  141. try:
  142. return getattr(self._options, xname)
  143. except AttributeError:
  144. if name == "infoset":
  145. options = self._options
  146. return (options.datatype_normalization
  147. and options.whitespace_in_element_content
  148. and options.comments
  149. and options.charset_overrides_xml_encoding
  150. and not (options.namespace_declarations
  151. or options.validate_if_schema
  152. or options.create_entity_ref_nodes
  153. or options.entities
  154. or options.cdata_sections))
  155. raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
  156. def parseURI(self, uri):
  157. if self.entityResolver:
  158. input = self.entityResolver.resolveEntity(None, uri)
  159. else:
  160. input = DOMEntityResolver().resolveEntity(None, uri)
  161. return self.parse(input)
  162. def parse(self, input):
  163. options = copy.copy(self._options)
  164. options.filter = self.filter
  165. options.errorHandler = self.errorHandler
  166. fp = input.byteStream
  167. if fp is None and options.systemId:
  168. import urllib.request
  169. fp = urllib.request.urlopen(input.systemId)
  170. return self._parse_bytestream(fp, options)
  171. def parseWithContext(self, input, cnode, action):
  172. if action not in self._legal_actions:
  173. raise ValueError("not a legal action")
  174. raise NotImplementedError("Haven't written this yet...")
  175. def _parse_bytestream(self, stream, options):
  176. import xml.dom.expatbuilder
  177. builder = xml.dom.expatbuilder.makeBuilder(options)
  178. return builder.parseFile(stream)
  179. def _name_xform(name):
  180. return name.lower().replace('-', '_')
  181. class DOMEntityResolver(object):
  182. __slots__ = '_opener',
  183. def resolveEntity(self, publicId, systemId):
  184. assert systemId is not None
  185. source = DOMInputSource()
  186. source.publicId = publicId
  187. source.systemId = systemId
  188. source.byteStream = self._get_opener().open(systemId)
  189. # determine the encoding if the transport provided it
  190. source.encoding = self._guess_media_encoding(source)
  191. # determine the base URI is we can
  192. import posixpath, urllib.parse
  193. parts = urllib.parse.urlparse(systemId)
  194. scheme, netloc, path, params, query, fragment = parts
  195. # XXX should we check the scheme here as well?
  196. if path and not path.endswith("/"):
  197. path = posixpath.dirname(path) + "/"
  198. parts = scheme, netloc, path, params, query, fragment
  199. source.baseURI = urllib.parse.urlunparse(parts)
  200. return source
  201. def _get_opener(self):
  202. try:
  203. return self._opener
  204. except AttributeError:
  205. self._opener = self._create_opener()
  206. return self._opener
  207. def _create_opener(self):
  208. import urllib.request
  209. return urllib.request.build_opener()
  210. def _guess_media_encoding(self, source):
  211. info = source.byteStream.info()
  212. if "Content-Type" in info:
  213. for param in info.getplist():
  214. if param.startswith("charset="):
  215. return param.split("=", 1)[1].lower()
  216. class DOMInputSource(object):
  217. __slots__ = ('byteStream', 'characterStream', 'stringData',
  218. 'encoding', 'publicId', 'systemId', 'baseURI')
  219. def __init__(self):
  220. self.byteStream = None
  221. self.characterStream = None
  222. self.stringData = None
  223. self.encoding = None
  224. self.publicId = None
  225. self.systemId = None
  226. self.baseURI = None
  227. def _get_byteStream(self):
  228. return self.byteStream
  229. def _set_byteStream(self, byteStream):
  230. self.byteStream = byteStream
  231. def _get_characterStream(self):
  232. return self.characterStream
  233. def _set_characterStream(self, characterStream):
  234. self.characterStream = characterStream
  235. def _get_stringData(self):
  236. return self.stringData
  237. def _set_stringData(self, data):
  238. self.stringData = data
  239. def _get_encoding(self):
  240. return self.encoding
  241. def _set_encoding(self, encoding):
  242. self.encoding = encoding
  243. def _get_publicId(self):
  244. return self.publicId
  245. def _set_publicId(self, publicId):
  246. self.publicId = publicId
  247. def _get_systemId(self):
  248. return self.systemId
  249. def _set_systemId(self, systemId):
  250. self.systemId = systemId
  251. def _get_baseURI(self):
  252. return self.baseURI
  253. def _set_baseURI(self, uri):
  254. self.baseURI = uri
  255. class DOMBuilderFilter:
  256. """Element filter which can be used to tailor construction of
  257. a DOM instance.
  258. """
  259. # There's really no need for this class; concrete implementations
  260. # should just implement the endElement() and startElement()
  261. # methods as appropriate. Using this makes it easy to only
  262. # implement one of them.
  263. FILTER_ACCEPT = 1
  264. FILTER_REJECT = 2
  265. FILTER_SKIP = 3
  266. FILTER_INTERRUPT = 4
  267. whatToShow = NodeFilter.SHOW_ALL
  268. def _get_whatToShow(self):
  269. return self.whatToShow
  270. def acceptNode(self, element):
  271. return self.FILTER_ACCEPT
  272. def startContainer(self, element):
  273. return self.FILTER_ACCEPT
  274. del NodeFilter
  275. class DocumentLS:
  276. """Mixin to create documents that conform to the load/save spec."""
  277. async_ = False
  278. def _get_async(self):
  279. return False
  280. def _set_async(self, flag):
  281. if flag:
  282. raise xml.dom.NotSupportedErr(
  283. "asynchronous document loading is not supported")
  284. def abort(self):
  285. # What does it mean to "clear" a document? Does the
  286. # documentElement disappear?
  287. raise NotImplementedError(
  288. "haven't figured out what this means yet")
  289. def load(self, uri):
  290. raise NotImplementedError("haven't written this yet")
  291. def loadXML(self, source):
  292. raise NotImplementedError("haven't written this yet")
  293. def saveXML(self, snode):
  294. if snode is None:
  295. snode = self
  296. elif snode.ownerDocument is not self:
  297. raise xml.dom.WrongDocumentErr()
  298. return snode.toxml()
  299. class DOMImplementationLS:
  300. MODE_SYNCHRONOUS = 1
  301. MODE_ASYNCHRONOUS = 2
  302. def createDOMBuilder(self, mode, schemaType):
  303. if schemaType is not None:
  304. raise xml.dom.NotSupportedErr(
  305. "schemaType not yet supported")
  306. if mode == self.MODE_SYNCHRONOUS:
  307. return DOMBuilder()
  308. if mode == self.MODE_ASYNCHRONOUS:
  309. raise xml.dom.NotSupportedErr(
  310. "asynchronous builders are not supported")
  311. raise ValueError("unknown value for mode")
  312. def createDOMWriter(self):
  313. raise NotImplementedError(
  314. "the writer interface hasn't been written yet!")
  315. def createDOMInputSource(self):
  316. return DOMInputSource()