123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387 |
- """Implementation of the DOM Level 3 'LS-Load' feature."""
- import copy
- import xml.dom
- from xml.dom.NodeFilter import NodeFilter
- __all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
- class Options:
- """Features object that has variables set for each DOMBuilder feature.
- The DOMBuilder class uses an instance of this class to pass settings to
- the ExpatBuilder class.
- """
- # Note that the DOMBuilder class in LoadSave constrains which of these
- # values can be set using the DOM Level 3 LoadSave feature.
- namespaces = 1
- namespace_declarations = True
- validation = False
- external_parameter_entities = True
- external_general_entities = True
- external_dtd_subset = True
- validate_if_schema = False
- validate = False
- datatype_normalization = False
- create_entity_ref_nodes = True
- entities = True
- whitespace_in_element_content = True
- cdata_sections = True
- comments = True
- charset_overrides_xml_encoding = True
- infoset = False
- supported_mediatypes_only = False
- errorHandler = None
- filter = None
- class DOMBuilder:
- entityResolver = None
- errorHandler = None
- filter = None
- ACTION_REPLACE = 1
- ACTION_APPEND_AS_CHILDREN = 2
- ACTION_INSERT_AFTER = 3
- ACTION_INSERT_BEFORE = 4
- _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
- ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
- def __init__(self):
- self._options = Options()
- def _get_entityResolver(self):
- return self.entityResolver
- def _set_entityResolver(self, entityResolver):
- self.entityResolver = entityResolver
- def _get_errorHandler(self):
- return self.errorHandler
- def _set_errorHandler(self, errorHandler):
- self.errorHandler = errorHandler
- def _get_filter(self):
- return self.filter
- def _set_filter(self, filter):
- self.filter = filter
- def setFeature(self, name, state):
- if self.supportsFeature(name):
- state = state and 1 or 0
- try:
- settings = self._settings[(_name_xform(name), state)]
- except KeyError:
- raise xml.dom.NotSupportedErr(
- "unsupported feature: %r" % (name,)) from None
- else:
- for name, value in settings:
- setattr(self._options, name, value)
- else:
- raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
- def supportsFeature(self, name):
- return hasattr(self._options, _name_xform(name))
- def canSetFeature(self, name, state):
- key = (_name_xform(name), state and 1 or 0)
- return key in self._settings
- # This dictionary maps from (feature,value) to a list of
- # (option,value) pairs that should be set on the Options object.
- # If a (feature,value) setting is not in this dictionary, it is
- # not supported by the DOMBuilder.
- #
- _settings = {
- ("namespace_declarations", 0): [
- ("namespace_declarations", 0)],
- ("namespace_declarations", 1): [
- ("namespace_declarations", 1)],
- ("validation", 0): [
- ("validation", 0)],
- ("external_general_entities", 0): [
- ("external_general_entities", 0)],
- ("external_general_entities", 1): [
- ("external_general_entities", 1)],
- ("external_parameter_entities", 0): [
- ("external_parameter_entities", 0)],
- ("external_parameter_entities", 1): [
- ("external_parameter_entities", 1)],
- ("validate_if_schema", 0): [
- ("validate_if_schema", 0)],
- ("create_entity_ref_nodes", 0): [
- ("create_entity_ref_nodes", 0)],
- ("create_entity_ref_nodes", 1): [
- ("create_entity_ref_nodes", 1)],
- ("entities", 0): [
- ("create_entity_ref_nodes", 0),
- ("entities", 0)],
- ("entities", 1): [
- ("entities", 1)],
- ("whitespace_in_element_content", 0): [
- ("whitespace_in_element_content", 0)],
- ("whitespace_in_element_content", 1): [
- ("whitespace_in_element_content", 1)],
- ("cdata_sections", 0): [
- ("cdata_sections", 0)],
- ("cdata_sections", 1): [
- ("cdata_sections", 1)],
- ("comments", 0): [
- ("comments", 0)],
- ("comments", 1): [
- ("comments", 1)],
- ("charset_overrides_xml_encoding", 0): [
- ("charset_overrides_xml_encoding", 0)],
- ("charset_overrides_xml_encoding", 1): [
- ("charset_overrides_xml_encoding", 1)],
- ("infoset", 0): [],
- ("infoset", 1): [
- ("namespace_declarations", 0),
- ("validate_if_schema", 0),
- ("create_entity_ref_nodes", 0),
- ("entities", 0),
- ("cdata_sections", 0),
- ("datatype_normalization", 1),
- ("whitespace_in_element_content", 1),
- ("comments", 1),
- ("charset_overrides_xml_encoding", 1)],
- ("supported_mediatypes_only", 0): [
- ("supported_mediatypes_only", 0)],
- ("namespaces", 0): [
- ("namespaces", 0)],
- ("namespaces", 1): [
- ("namespaces", 1)],
- }
- def getFeature(self, name):
- xname = _name_xform(name)
- try:
- return getattr(self._options, xname)
- except AttributeError:
- if name == "infoset":
- options = self._options
- return (options.datatype_normalization
- and options.whitespace_in_element_content
- and options.comments
- and options.charset_overrides_xml_encoding
- and not (options.namespace_declarations
- or options.validate_if_schema
- or options.create_entity_ref_nodes
- or options.entities
- or options.cdata_sections))
- raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
- def parseURI(self, uri):
- if self.entityResolver:
- input = self.entityResolver.resolveEntity(None, uri)
- else:
- input = DOMEntityResolver().resolveEntity(None, uri)
- return self.parse(input)
- def parse(self, input):
- options = copy.copy(self._options)
- options.filter = self.filter
- options.errorHandler = self.errorHandler
- fp = input.byteStream
- if fp is None and options.systemId:
- import urllib.request
- fp = urllib.request.urlopen(input.systemId)
- return self._parse_bytestream(fp, options)
- def parseWithContext(self, input, cnode, action):
- if action not in self._legal_actions:
- raise ValueError("not a legal action")
- raise NotImplementedError("Haven't written this yet...")
- def _parse_bytestream(self, stream, options):
- import xml.dom.expatbuilder
- builder = xml.dom.expatbuilder.makeBuilder(options)
- return builder.parseFile(stream)
- def _name_xform(name):
- return name.lower().replace('-', '_')
- class DOMEntityResolver(object):
- __slots__ = '_opener',
- def resolveEntity(self, publicId, systemId):
- assert systemId is not None
- source = DOMInputSource()
- source.publicId = publicId
- source.systemId = systemId
- source.byteStream = self._get_opener().open(systemId)
- # determine the encoding if the transport provided it
- source.encoding = self._guess_media_encoding(source)
- # determine the base URI is we can
- import posixpath, urllib.parse
- parts = urllib.parse.urlparse(systemId)
- scheme, netloc, path, params, query, fragment = parts
- # XXX should we check the scheme here as well?
- if path and not path.endswith("/"):
- path = posixpath.dirname(path) + "/"
- parts = scheme, netloc, path, params, query, fragment
- source.baseURI = urllib.parse.urlunparse(parts)
- return source
- def _get_opener(self):
- try:
- return self._opener
- except AttributeError:
- self._opener = self._create_opener()
- return self._opener
- def _create_opener(self):
- import urllib.request
- return urllib.request.build_opener()
- def _guess_media_encoding(self, source):
- info = source.byteStream.info()
- if "Content-Type" in info:
- for param in info.getplist():
- if param.startswith("charset="):
- return param.split("=", 1)[1].lower()
- class DOMInputSource(object):
- __slots__ = ('byteStream', 'characterStream', 'stringData',
- 'encoding', 'publicId', 'systemId', 'baseURI')
- def __init__(self):
- self.byteStream = None
- self.characterStream = None
- self.stringData = None
- self.encoding = None
- self.publicId = None
- self.systemId = None
- self.baseURI = None
- def _get_byteStream(self):
- return self.byteStream
- def _set_byteStream(self, byteStream):
- self.byteStream = byteStream
- def _get_characterStream(self):
- return self.characterStream
- def _set_characterStream(self, characterStream):
- self.characterStream = characterStream
- def _get_stringData(self):
- return self.stringData
- def _set_stringData(self, data):
- self.stringData = data
- def _get_encoding(self):
- return self.encoding
- def _set_encoding(self, encoding):
- self.encoding = encoding
- def _get_publicId(self):
- return self.publicId
- def _set_publicId(self, publicId):
- self.publicId = publicId
- def _get_systemId(self):
- return self.systemId
- def _set_systemId(self, systemId):
- self.systemId = systemId
- def _get_baseURI(self):
- return self.baseURI
- def _set_baseURI(self, uri):
- self.baseURI = uri
- class DOMBuilderFilter:
- """Element filter which can be used to tailor construction of
- a DOM instance.
- """
- # There's really no need for this class; concrete implementations
- # should just implement the endElement() and startElement()
- # methods as appropriate. Using this makes it easy to only
- # implement one of them.
- FILTER_ACCEPT = 1
- FILTER_REJECT = 2
- FILTER_SKIP = 3
- FILTER_INTERRUPT = 4
- whatToShow = NodeFilter.SHOW_ALL
- def _get_whatToShow(self):
- return self.whatToShow
- def acceptNode(self, element):
- return self.FILTER_ACCEPT
- def startContainer(self, element):
- return self.FILTER_ACCEPT
- del NodeFilter
- class DocumentLS:
- """Mixin to create documents that conform to the load/save spec."""
- async_ = False
- def _get_async(self):
- return False
- def _set_async(self, flag):
- if flag:
- raise xml.dom.NotSupportedErr(
- "asynchronous document loading is not supported")
- def abort(self):
- # What does it mean to "clear" a document? Does the
- # documentElement disappear?
- raise NotImplementedError(
- "haven't figured out what this means yet")
- def load(self, uri):
- raise NotImplementedError("haven't written this yet")
- def loadXML(self, source):
- raise NotImplementedError("haven't written this yet")
- def saveXML(self, snode):
- if snode is None:
- snode = self
- elif snode.ownerDocument is not self:
- raise xml.dom.WrongDocumentErr()
- return snode.toxml()
- class DOMImplementationLS:
- MODE_SYNCHRONOUS = 1
- MODE_ASYNCHRONOUS = 2
- def createDOMBuilder(self, mode, schemaType):
- if schemaType is not None:
- raise xml.dom.NotSupportedErr(
- "schemaType not yet supported")
- if mode == self.MODE_SYNCHRONOUS:
- return DOMBuilder()
- if mode == self.MODE_ASYNCHRONOUS:
- raise xml.dom.NotSupportedErr(
- "asynchronous builders are not supported")
- raise ValueError("unknown value for mode")
- def createDOMWriter(self):
- raise NotImplementedError(
- "the writer interface hasn't been written yet!")
- def createDOMInputSource(self):
- return DOMInputSource()
|