123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387 |
- """
- This module contains the core classes of version 2.0 of SAX for Python.
- This file provides only default classes with absolutely minimum
- functionality, from which drivers and applications can be subclassed.
- Many of these classes are empty and are included only as documentation
- of the interfaces.
- $Id$
- """
- version = '2.0beta'
- #============================================================================
- #
- # HANDLER INTERFACES
- #
- #============================================================================
- # ===== ERRORHANDLER =====
- class ErrorHandler:
- """Basic interface for SAX error handlers.
- If you create an object that implements this interface, then
- register the object with your XMLReader, the parser will call the
- methods in your object to report all warnings and errors. There
- are three levels of errors available: warnings, (possibly)
- recoverable errors, and unrecoverable errors. All methods take a
- SAXParseException as the only parameter."""
- def error(self, exception):
- "Handle a recoverable error."
- raise exception
- def fatalError(self, exception):
- "Handle a non-recoverable error."
- raise exception
- def warning(self, exception):
- "Handle a warning."
- print(exception)
- # ===== CONTENTHANDLER =====
- class ContentHandler:
- """Interface for receiving logical document content events.
- This is the main callback interface in SAX, and the one most
- important to applications. The order of events in this interface
- mirrors the order of the information in the document."""
- def __init__(self):
- self._locator = None
- def setDocumentLocator(self, locator):
- """Called by the parser to give the application a locator for
- locating the origin of document events.
- SAX parsers are strongly encouraged (though not absolutely
- required) to supply a locator: if it does so, it must supply
- the locator to the application by invoking this method before
- invoking any of the other methods in the DocumentHandler
- interface.
- The locator allows the application to determine the end
- position of any document-related event, even if the parser is
- not reporting an error. Typically, the application will use
- this information for reporting its own errors (such as
- character content that does not match an application's
- business rules). The information returned by the locator is
- probably not sufficient for use with a search engine.
- Note that the locator will return correct information only
- during the invocation of the events in this interface. The
- application should not attempt to use it at any other time."""
- self._locator = locator
- def startDocument(self):
- """Receive notification of the beginning of a document.
- The SAX parser will invoke this method only once, before any
- other methods in this interface or in DTDHandler (except for
- setDocumentLocator)."""
- def endDocument(self):
- """Receive notification of the end of a document.
- The SAX parser will invoke this method only once, and it will
- be the last method invoked during the parse. The parser shall
- not invoke this method until it has either abandoned parsing
- (because of an unrecoverable error) or reached the end of
- input."""
- def startPrefixMapping(self, prefix, uri):
- """Begin the scope of a prefix-URI Namespace mapping.
- The information from this event is not necessary for normal
- Namespace processing: the SAX XML reader will automatically
- replace prefixes for element and attribute names when the
- http://xml.org/sax/features/namespaces feature is true (the
- default).
- There are cases, however, when applications need to use
- prefixes in character data or in attribute values, where they
- cannot safely be expanded automatically; the
- start/endPrefixMapping event supplies the information to the
- application to expand prefixes in those contexts itself, if
- necessary.
- Note that start/endPrefixMapping events are not guaranteed to
- be properly nested relative to each-other: all
- startPrefixMapping events will occur before the corresponding
- startElement event, and all endPrefixMapping events will occur
- after the corresponding endElement event, but their order is
- not guaranteed."""
- def endPrefixMapping(self, prefix):
- """End the scope of a prefix-URI mapping.
- See startPrefixMapping for details. This event will always
- occur after the corresponding endElement event, but the order
- of endPrefixMapping events is not otherwise guaranteed."""
- def startElement(self, name, attrs):
- """Signals the start of an element in non-namespace mode.
- The name parameter contains the raw XML 1.0 name of the
- element type as a string and the attrs parameter holds an
- instance of the Attributes class containing the attributes of
- the element."""
- def endElement(self, name):
- """Signals the end of an element in non-namespace mode.
- The name parameter contains the name of the element type, just
- as with the startElement event."""
- def startElementNS(self, name, qname, attrs):
- """Signals the start of an element in namespace mode.
- The name parameter contains the name of the element type as a
- (uri, localname) tuple, the qname parameter the raw XML 1.0
- name used in the source document, and the attrs parameter
- holds an instance of the Attributes class containing the
- attributes of the element.
- The uri part of the name tuple is None for elements which have
- no namespace."""
- def endElementNS(self, name, qname):
- """Signals the end of an element in namespace mode.
- The name parameter contains the name of the element type, just
- as with the startElementNS event."""
- def characters(self, content):
- """Receive notification of character data.
- The Parser will call this method to report each chunk of
- character data. SAX parsers may return all contiguous
- character data in a single chunk, or they may split it into
- several chunks; however, all of the characters in any single
- event must come from the same external entity so that the
- Locator provides useful information."""
- def ignorableWhitespace(self, whitespace):
- """Receive notification of ignorable whitespace in element content.
- Validating Parsers must use this method to report each chunk
- of ignorable whitespace (see the W3C XML 1.0 recommendation,
- section 2.10): non-validating parsers may also use this method
- if they are capable of parsing and using content models.
- SAX parsers may return all contiguous whitespace in a single
- chunk, or they may split it into several chunks; however, all
- of the characters in any single event must come from the same
- external entity, so that the Locator provides useful
- information."""
- def processingInstruction(self, target, data):
- """Receive notification of a processing instruction.
- The Parser will invoke this method once for each processing
- instruction found: note that processing instructions may occur
- before or after the main document element.
- A SAX parser should never report an XML declaration (XML 1.0,
- section 2.8) or a text declaration (XML 1.0, section 4.3.1)
- using this method."""
- def skippedEntity(self, name):
- """Receive notification of a skipped entity.
- The Parser will invoke this method once for each entity
- skipped. Non-validating processors may skip entities if they
- have not seen the declarations (because, for example, the
- entity was declared in an external DTD subset). All processors
- may skip external entities, depending on the values of the
- http://xml.org/sax/features/external-general-entities and the
- http://xml.org/sax/features/external-parameter-entities
- properties."""
- # ===== DTDHandler =====
- class DTDHandler:
- """Handle DTD events.
- This interface specifies only those DTD events required for basic
- parsing (unparsed entities and attributes)."""
- def notationDecl(self, name, publicId, systemId):
- "Handle a notation declaration event."
- def unparsedEntityDecl(self, name, publicId, systemId, ndata):
- "Handle an unparsed entity declaration event."
- # ===== ENTITYRESOLVER =====
- class EntityResolver:
- """Basic interface for resolving entities. If you create an object
- implementing this interface, then register the object with your
- Parser, the parser will call the method in your object to
- resolve all external entities. Note that DefaultHandler implements
- this interface with the default behaviour."""
- def resolveEntity(self, publicId, systemId):
- """Resolve the system identifier of an entity and return either
- the system identifier to read from as a string, or an InputSource
- to read from."""
- return systemId
- #============================================================================
- #
- # CORE FEATURES
- #
- #============================================================================
- feature_namespaces = "http://xml.org/sax/features/namespaces"
- # true: Perform Namespace processing (default).
- # false: Optionally do not perform Namespace processing
- # (implies namespace-prefixes).
- # access: (parsing) read-only; (not parsing) read/write
- feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
- # true: Report the original prefixed names and attributes used for Namespace
- # declarations.
- # false: Do not report attributes used for Namespace declarations, and
- # optionally do not report original prefixed names (default).
- # access: (parsing) read-only; (not parsing) read/write
- feature_string_interning = "http://xml.org/sax/features/string-interning"
- # true: All element names, prefixes, attribute names, Namespace URIs, and
- # local names are interned using the built-in intern function.
- # false: Names are not necessarily interned, although they may be (default).
- # access: (parsing) read-only; (not parsing) read/write
- feature_validation = "http://xml.org/sax/features/validation"
- # true: Report all validation errors (implies external-general-entities and
- # external-parameter-entities).
- # false: Do not report validation errors.
- # access: (parsing) read-only; (not parsing) read/write
- feature_external_ges = "http://xml.org/sax/features/external-general-entities"
- # true: Include all external general (text) entities.
- # false: Do not include external general entities.
- # access: (parsing) read-only; (not parsing) read/write
- feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
- # true: Include all external parameter entities, including the external
- # DTD subset.
- # false: Do not include any external parameter entities, even the external
- # DTD subset.
- # access: (parsing) read-only; (not parsing) read/write
- all_features = [feature_namespaces,
- feature_namespace_prefixes,
- feature_string_interning,
- feature_validation,
- feature_external_ges,
- feature_external_pes]
- #============================================================================
- #
- # CORE PROPERTIES
- #
- #============================================================================
- property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
- # data type: xml.sax.sax2lib.LexicalHandler
- # description: An optional extension handler for lexical events like comments.
- # access: read/write
- property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
- # data type: xml.sax.sax2lib.DeclHandler
- # description: An optional extension handler for DTD-related events other
- # than notations and unparsed entities.
- # access: read/write
- property_dom_node = "http://xml.org/sax/properties/dom-node"
- # data type: org.w3c.dom.Node
- # description: When parsing, the current DOM node being visited if this is
- # a DOM iterator; when not parsing, the root DOM node for
- # iteration.
- # access: (parsing) read-only; (not parsing) read/write
- property_xml_string = "http://xml.org/sax/properties/xml-string"
- # data type: String
- # description: The literal string of characters that was the source for
- # the current event.
- # access: read-only
- property_encoding = "http://www.python.org/sax/properties/encoding"
- # data type: String
- # description: The name of the encoding to assume for input data.
- # access: write: set the encoding, e.g. established by a higher-level
- # protocol. May change during parsing (e.g. after
- # processing a META tag)
- # read: return the current encoding (possibly established through
- # auto-detection.
- # initial value: UTF-8
- #
- property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
- # data type: Dictionary
- # description: The dictionary used to intern common strings in the document
- # access: write: Request that the parser uses a specific dictionary, to
- # allow interning across different documents
- # read: return the current interning dictionary, or None
- #
- all_properties = [property_lexical_handler,
- property_dom_node,
- property_declaration_handler,
- property_xml_string,
- property_encoding,
- property_interning_dict]
- class LexicalHandler:
- """Optional SAX2 handler for lexical events.
- This handler is used to obtain lexical information about an XML
- document, that is, information about how the document was encoded
- (as opposed to what it contains, which is reported to the
- ContentHandler), such as comments and CDATA marked section
- boundaries.
- To set the LexicalHandler of an XMLReader, use the setProperty
- method with the property identifier
- 'http://xml.org/sax/properties/lexical-handler'."""
- def comment(self, content):
- """Reports a comment anywhere in the document (including the
- DTD and outside the document element).
- content is a string that holds the contents of the comment."""
- def startDTD(self, name, public_id, system_id):
- """Report the start of the DTD declarations, if the document
- has an associated DTD.
- A startEntity event will be reported before declaration events
- from the external DTD subset are reported, and this can be
- used to infer from which subset DTD declarations derive.
- name is the name of the document element type, public_id the
- public identifier of the DTD (or None if none were supplied)
- and system_id the system identfier of the external subset (or
- None if none were supplied)."""
- def endDTD(self):
- """Signals the end of DTD declarations."""
- def startCDATA(self):
- """Reports the beginning of a CDATA marked section.
- The contents of the CDATA marked section will be reported
- through the characters event."""
- def endCDATA(self):
- """Reports the end of a CDATA marked section."""
|