handler.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. """
  2. This module contains the core classes of version 2.0 of SAX for Python.
  3. This file provides only default classes with absolutely minimum
  4. functionality, from which drivers and applications can be subclassed.
  5. Many of these classes are empty and are included only as documentation
  6. of the interfaces.
  7. $Id$
  8. """
  9. version = '2.0beta'
  10. #============================================================================
  11. #
  12. # HANDLER INTERFACES
  13. #
  14. #============================================================================
  15. # ===== ERRORHANDLER =====
  16. class ErrorHandler:
  17. """Basic interface for SAX error handlers.
  18. If you create an object that implements this interface, then
  19. register the object with your XMLReader, the parser will call the
  20. methods in your object to report all warnings and errors. There
  21. are three levels of errors available: warnings, (possibly)
  22. recoverable errors, and unrecoverable errors. All methods take a
  23. SAXParseException as the only parameter."""
  24. def error(self, exception):
  25. "Handle a recoverable error."
  26. raise exception
  27. def fatalError(self, exception):
  28. "Handle a non-recoverable error."
  29. raise exception
  30. def warning(self, exception):
  31. "Handle a warning."
  32. print(exception)
  33. # ===== CONTENTHANDLER =====
  34. class ContentHandler:
  35. """Interface for receiving logical document content events.
  36. This is the main callback interface in SAX, and the one most
  37. important to applications. The order of events in this interface
  38. mirrors the order of the information in the document."""
  39. def __init__(self):
  40. self._locator = None
  41. def setDocumentLocator(self, locator):
  42. """Called by the parser to give the application a locator for
  43. locating the origin of document events.
  44. SAX parsers are strongly encouraged (though not absolutely
  45. required) to supply a locator: if it does so, it must supply
  46. the locator to the application by invoking this method before
  47. invoking any of the other methods in the DocumentHandler
  48. interface.
  49. The locator allows the application to determine the end
  50. position of any document-related event, even if the parser is
  51. not reporting an error. Typically, the application will use
  52. this information for reporting its own errors (such as
  53. character content that does not match an application's
  54. business rules). The information returned by the locator is
  55. probably not sufficient for use with a search engine.
  56. Note that the locator will return correct information only
  57. during the invocation of the events in this interface. The
  58. application should not attempt to use it at any other time."""
  59. self._locator = locator
  60. def startDocument(self):
  61. """Receive notification of the beginning of a document.
  62. The SAX parser will invoke this method only once, before any
  63. other methods in this interface or in DTDHandler (except for
  64. setDocumentLocator)."""
  65. def endDocument(self):
  66. """Receive notification of the end of a document.
  67. The SAX parser will invoke this method only once, and it will
  68. be the last method invoked during the parse. The parser shall
  69. not invoke this method until it has either abandoned parsing
  70. (because of an unrecoverable error) or reached the end of
  71. input."""
  72. def startPrefixMapping(self, prefix, uri):
  73. """Begin the scope of a prefix-URI Namespace mapping.
  74. The information from this event is not necessary for normal
  75. Namespace processing: the SAX XML reader will automatically
  76. replace prefixes for element and attribute names when the
  77. http://xml.org/sax/features/namespaces feature is true (the
  78. default).
  79. There are cases, however, when applications need to use
  80. prefixes in character data or in attribute values, where they
  81. cannot safely be expanded automatically; the
  82. start/endPrefixMapping event supplies the information to the
  83. application to expand prefixes in those contexts itself, if
  84. necessary.
  85. Note that start/endPrefixMapping events are not guaranteed to
  86. be properly nested relative to each-other: all
  87. startPrefixMapping events will occur before the corresponding
  88. startElement event, and all endPrefixMapping events will occur
  89. after the corresponding endElement event, but their order is
  90. not guaranteed."""
  91. def endPrefixMapping(self, prefix):
  92. """End the scope of a prefix-URI mapping.
  93. See startPrefixMapping for details. This event will always
  94. occur after the corresponding endElement event, but the order
  95. of endPrefixMapping events is not otherwise guaranteed."""
  96. def startElement(self, name, attrs):
  97. """Signals the start of an element in non-namespace mode.
  98. The name parameter contains the raw XML 1.0 name of the
  99. element type as a string and the attrs parameter holds an
  100. instance of the Attributes class containing the attributes of
  101. the element."""
  102. def endElement(self, name):
  103. """Signals the end of an element in non-namespace mode.
  104. The name parameter contains the name of the element type, just
  105. as with the startElement event."""
  106. def startElementNS(self, name, qname, attrs):
  107. """Signals the start of an element in namespace mode.
  108. The name parameter contains the name of the element type as a
  109. (uri, localname) tuple, the qname parameter the raw XML 1.0
  110. name used in the source document, and the attrs parameter
  111. holds an instance of the Attributes class containing the
  112. attributes of the element.
  113. The uri part of the name tuple is None for elements which have
  114. no namespace."""
  115. def endElementNS(self, name, qname):
  116. """Signals the end of an element in namespace mode.
  117. The name parameter contains the name of the element type, just
  118. as with the startElementNS event."""
  119. def characters(self, content):
  120. """Receive notification of character data.
  121. The Parser will call this method to report each chunk of
  122. character data. SAX parsers may return all contiguous
  123. character data in a single chunk, or they may split it into
  124. several chunks; however, all of the characters in any single
  125. event must come from the same external entity so that the
  126. Locator provides useful information."""
  127. def ignorableWhitespace(self, whitespace):
  128. """Receive notification of ignorable whitespace in element content.
  129. Validating Parsers must use this method to report each chunk
  130. of ignorable whitespace (see the W3C XML 1.0 recommendation,
  131. section 2.10): non-validating parsers may also use this method
  132. if they are capable of parsing and using content models.
  133. SAX parsers may return all contiguous whitespace in a single
  134. chunk, or they may split it into several chunks; however, all
  135. of the characters in any single event must come from the same
  136. external entity, so that the Locator provides useful
  137. information."""
  138. def processingInstruction(self, target, data):
  139. """Receive notification of a processing instruction.
  140. The Parser will invoke this method once for each processing
  141. instruction found: note that processing instructions may occur
  142. before or after the main document element.
  143. A SAX parser should never report an XML declaration (XML 1.0,
  144. section 2.8) or a text declaration (XML 1.0, section 4.3.1)
  145. using this method."""
  146. def skippedEntity(self, name):
  147. """Receive notification of a skipped entity.
  148. The Parser will invoke this method once for each entity
  149. skipped. Non-validating processors may skip entities if they
  150. have not seen the declarations (because, for example, the
  151. entity was declared in an external DTD subset). All processors
  152. may skip external entities, depending on the values of the
  153. http://xml.org/sax/features/external-general-entities and the
  154. http://xml.org/sax/features/external-parameter-entities
  155. properties."""
  156. # ===== DTDHandler =====
  157. class DTDHandler:
  158. """Handle DTD events.
  159. This interface specifies only those DTD events required for basic
  160. parsing (unparsed entities and attributes)."""
  161. def notationDecl(self, name, publicId, systemId):
  162. "Handle a notation declaration event."
  163. def unparsedEntityDecl(self, name, publicId, systemId, ndata):
  164. "Handle an unparsed entity declaration event."
  165. # ===== ENTITYRESOLVER =====
  166. class EntityResolver:
  167. """Basic interface for resolving entities. If you create an object
  168. implementing this interface, then register the object with your
  169. Parser, the parser will call the method in your object to
  170. resolve all external entities. Note that DefaultHandler implements
  171. this interface with the default behaviour."""
  172. def resolveEntity(self, publicId, systemId):
  173. """Resolve the system identifier of an entity and return either
  174. the system identifier to read from as a string, or an InputSource
  175. to read from."""
  176. return systemId
  177. #============================================================================
  178. #
  179. # CORE FEATURES
  180. #
  181. #============================================================================
  182. feature_namespaces = "http://xml.org/sax/features/namespaces"
  183. # true: Perform Namespace processing (default).
  184. # false: Optionally do not perform Namespace processing
  185. # (implies namespace-prefixes).
  186. # access: (parsing) read-only; (not parsing) read/write
  187. feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
  188. # true: Report the original prefixed names and attributes used for Namespace
  189. # declarations.
  190. # false: Do not report attributes used for Namespace declarations, and
  191. # optionally do not report original prefixed names (default).
  192. # access: (parsing) read-only; (not parsing) read/write
  193. feature_string_interning = "http://xml.org/sax/features/string-interning"
  194. # true: All element names, prefixes, attribute names, Namespace URIs, and
  195. # local names are interned using the built-in intern function.
  196. # false: Names are not necessarily interned, although they may be (default).
  197. # access: (parsing) read-only; (not parsing) read/write
  198. feature_validation = "http://xml.org/sax/features/validation"
  199. # true: Report all validation errors (implies external-general-entities and
  200. # external-parameter-entities).
  201. # false: Do not report validation errors.
  202. # access: (parsing) read-only; (not parsing) read/write
  203. feature_external_ges = "http://xml.org/sax/features/external-general-entities"
  204. # true: Include all external general (text) entities.
  205. # false: Do not include external general entities.
  206. # access: (parsing) read-only; (not parsing) read/write
  207. feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
  208. # true: Include all external parameter entities, including the external
  209. # DTD subset.
  210. # false: Do not include any external parameter entities, even the external
  211. # DTD subset.
  212. # access: (parsing) read-only; (not parsing) read/write
  213. all_features = [feature_namespaces,
  214. feature_namespace_prefixes,
  215. feature_string_interning,
  216. feature_validation,
  217. feature_external_ges,
  218. feature_external_pes]
  219. #============================================================================
  220. #
  221. # CORE PROPERTIES
  222. #
  223. #============================================================================
  224. property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
  225. # data type: xml.sax.sax2lib.LexicalHandler
  226. # description: An optional extension handler for lexical events like comments.
  227. # access: read/write
  228. property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
  229. # data type: xml.sax.sax2lib.DeclHandler
  230. # description: An optional extension handler for DTD-related events other
  231. # than notations and unparsed entities.
  232. # access: read/write
  233. property_dom_node = "http://xml.org/sax/properties/dom-node"
  234. # data type: org.w3c.dom.Node
  235. # description: When parsing, the current DOM node being visited if this is
  236. # a DOM iterator; when not parsing, the root DOM node for
  237. # iteration.
  238. # access: (parsing) read-only; (not parsing) read/write
  239. property_xml_string = "http://xml.org/sax/properties/xml-string"
  240. # data type: String
  241. # description: The literal string of characters that was the source for
  242. # the current event.
  243. # access: read-only
  244. property_encoding = "http://www.python.org/sax/properties/encoding"
  245. # data type: String
  246. # description: The name of the encoding to assume for input data.
  247. # access: write: set the encoding, e.g. established by a higher-level
  248. # protocol. May change during parsing (e.g. after
  249. # processing a META tag)
  250. # read: return the current encoding (possibly established through
  251. # auto-detection.
  252. # initial value: UTF-8
  253. #
  254. property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
  255. # data type: Dictionary
  256. # description: The dictionary used to intern common strings in the document
  257. # access: write: Request that the parser uses a specific dictionary, to
  258. # allow interning across different documents
  259. # read: return the current interning dictionary, or None
  260. #
  261. all_properties = [property_lexical_handler,
  262. property_dom_node,
  263. property_declaration_handler,
  264. property_xml_string,
  265. property_encoding,
  266. property_interning_dict]
  267. class LexicalHandler:
  268. """Optional SAX2 handler for lexical events.
  269. This handler is used to obtain lexical information about an XML
  270. document, that is, information about how the document was encoded
  271. (as opposed to what it contains, which is reported to the
  272. ContentHandler), such as comments and CDATA marked section
  273. boundaries.
  274. To set the LexicalHandler of an XMLReader, use the setProperty
  275. method with the property identifier
  276. 'http://xml.org/sax/properties/lexical-handler'."""
  277. def comment(self, content):
  278. """Reports a comment anywhere in the document (including the
  279. DTD and outside the document element).
  280. content is a string that holds the contents of the comment."""
  281. def startDTD(self, name, public_id, system_id):
  282. """Report the start of the DTD declarations, if the document
  283. has an associated DTD.
  284. A startEntity event will be reported before declaration events
  285. from the external DTD subset are reported, and this can be
  286. used to infer from which subset DTD declarations derive.
  287. name is the name of the document element type, public_id the
  288. public identifier of the DTD (or None if none were supplied)
  289. and system_id the system identfier of the external subset (or
  290. None if none were supplied)."""
  291. def endDTD(self):
  292. """Signals the end of DTD declarations."""
  293. def startCDATA(self):
  294. """Reports the beginning of a CDATA marked section.
  295. The contents of the CDATA marked section will be reported
  296. through the characters event."""
  297. def endCDATA(self):
  298. """Reports the end of a CDATA marked section."""