123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374 |
- """Policy framework for the email package.
- Allows fine grained feature control of how the package parses and emits data.
- """
- import abc
- from email import header
- from email import charset as _charset
- from email.utils import _has_surrogates
- __all__ = [
- 'Policy',
- 'Compat32',
- 'compat32',
- ]
- class _PolicyBase:
- """Policy Object basic framework.
- This class is useless unless subclassed. A subclass should define
- class attributes with defaults for any values that are to be
- managed by the Policy object. The constructor will then allow
- non-default values to be set for these attributes at instance
- creation time. The instance will be callable, taking these same
- attributes keyword arguments, and returning a new instance
- identical to the called instance except for those values changed
- by the keyword arguments. Instances may be added, yielding new
- instances with any non-default values from the right hand
- operand overriding those in the left hand operand. That is,
- A + B == A(<non-default values of B>)
- The repr of an instance can be used to reconstruct the object
- if and only if the repr of the values can be used to reconstruct
- those values.
- """
- def __init__(self, **kw):
- """Create new Policy, possibly overriding some defaults.
- See class docstring for a list of overridable attributes.
- """
- for name, value in kw.items():
- if hasattr(self, name):
- super(_PolicyBase,self).__setattr__(name, value)
- else:
- raise TypeError(
- "{!r} is an invalid keyword argument for {}".format(
- name, self.__class__.__name__))
- def __repr__(self):
- args = [ "{}={!r}".format(name, value)
- for name, value in self.__dict__.items() ]
- return "{}({})".format(self.__class__.__name__, ', '.join(args))
- def clone(self, **kw):
- """Return a new instance with specified attributes changed.
- The new instance has the same attribute values as the current object,
- except for the changes passed in as keyword arguments.
- """
- newpolicy = self.__class__.__new__(self.__class__)
- for attr, value in self.__dict__.items():
- object.__setattr__(newpolicy, attr, value)
- for attr, value in kw.items():
- if not hasattr(self, attr):
- raise TypeError(
- "{!r} is an invalid keyword argument for {}".format(
- attr, self.__class__.__name__))
- object.__setattr__(newpolicy, attr, value)
- return newpolicy
- def __setattr__(self, name, value):
- if hasattr(self, name):
- msg = "{!r} object attribute {!r} is read-only"
- else:
- msg = "{!r} object has no attribute {!r}"
- raise AttributeError(msg.format(self.__class__.__name__, name))
- def __add__(self, other):
- """Non-default values from right operand override those from left.
- The object returned is a new instance of the subclass.
- """
- return self.clone(**other.__dict__)
- def _append_doc(doc, added_doc):
- doc = doc.rsplit('\n', 1)[0]
- added_doc = added_doc.split('\n', 1)[1]
- return doc + '\n' + added_doc
- def _extend_docstrings(cls):
- if cls.__doc__ and cls.__doc__.startswith('+'):
- cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
- for name, attr in cls.__dict__.items():
- if attr.__doc__ and attr.__doc__.startswith('+'):
- for c in (c for base in cls.__bases__ for c in base.mro()):
- doc = getattr(getattr(c, name), '__doc__')
- if doc:
- attr.__doc__ = _append_doc(doc, attr.__doc__)
- break
- return cls
- class Policy(_PolicyBase, metaclass=abc.ABCMeta):
- r"""Controls for how messages are interpreted and formatted.
- Most of the classes and many of the methods in the email package accept
- Policy objects as parameters. A Policy object contains a set of values and
- functions that control how input is interpreted and how output is rendered.
- For example, the parameter 'raise_on_defect' controls whether or not an RFC
- violation results in an error being raised or not, while 'max_line_length'
- controls the maximum length of output lines when a Message is serialized.
- Any valid attribute may be overridden when a Policy is created by passing
- it as a keyword argument to the constructor. Policy objects are immutable,
- but a new Policy object can be created with only certain values changed by
- calling the Policy instance with keyword arguments. Policy objects can
- also be added, producing a new Policy object in which the non-default
- attributes set in the right hand operand overwrite those specified in the
- left operand.
- Settable attributes:
- raise_on_defect -- If true, then defects should be raised as errors.
- Default: False.
- linesep -- string containing the value to use as separation
- between output lines. Default '\n'.
- cte_type -- Type of allowed content transfer encodings
- 7bit -- ASCII only
- 8bit -- Content-Transfer-Encoding: 8bit is allowed
- Default: 8bit. Also controls the disposition of
- (RFC invalid) binary data in headers; see the
- documentation of the binary_fold method.
- max_line_length -- maximum length of lines, excluding 'linesep',
- during serialization. None or 0 means no line
- wrapping is done. Default is 78.
- mangle_from_ -- a flag that, when True escapes From_ lines in the
- body of the message by putting a `>' in front of
- them. This is used when the message is being
- serialized by a generator. Default: True.
- message_factory -- the class to use to create new message objects.
- If the value is None, the default is Message.
- """
- raise_on_defect = False
- linesep = '\n'
- cte_type = '8bit'
- max_line_length = 78
- mangle_from_ = False
- message_factory = None
- def handle_defect(self, obj, defect):
- """Based on policy, either raise defect or call register_defect.
- handle_defect(obj, defect)
- defect should be a Defect subclass, but in any case must be an
- Exception subclass. obj is the object on which the defect should be
- registered if it is not raised. If the raise_on_defect is True, the
- defect is raised as an error, otherwise the object and the defect are
- passed to register_defect.
- This method is intended to be called by parsers that discover defects.
- The email package parsers always call it with Defect instances.
- """
- if self.raise_on_defect:
- raise defect
- self.register_defect(obj, defect)
- def register_defect(self, obj, defect):
- """Record 'defect' on 'obj'.
- Called by handle_defect if raise_on_defect is False. This method is
- part of the Policy API so that Policy subclasses can implement custom
- defect handling. The default implementation calls the append method of
- the defects attribute of obj. The objects used by the email package by
- default that get passed to this method will always have a defects
- attribute with an append method.
- """
- obj.defects.append(defect)
- def header_max_count(self, name):
- """Return the maximum allowed number of headers named 'name'.
- Called when a header is added to a Message object. If the returned
- value is not 0 or None, and there are already a number of headers with
- the name 'name' equal to the value returned, a ValueError is raised.
- Because the default behavior of Message's __setitem__ is to append the
- value to the list of headers, it is easy to create duplicate headers
- without realizing it. This method allows certain headers to be limited
- in the number of instances of that header that may be added to a
- Message programmatically. (The limit is not observed by the parser,
- which will faithfully produce as many headers as exist in the message
- being parsed.)
- The default implementation returns None for all header names.
- """
- return None
- @abc.abstractmethod
- def header_source_parse(self, sourcelines):
- """Given a list of linesep terminated strings constituting the lines of
- a single header, return the (name, value) tuple that should be stored
- in the model. The input lines should retain their terminating linesep
- characters. The lines passed in by the email package may contain
- surrogateescaped binary data.
- """
- raise NotImplementedError
- @abc.abstractmethod
- def header_store_parse(self, name, value):
- """Given the header name and the value provided by the application
- program, return the (name, value) that should be stored in the model.
- """
- raise NotImplementedError
- @abc.abstractmethod
- def header_fetch_parse(self, name, value):
- """Given the header name and the value from the model, return the value
- to be returned to the application program that is requesting that
- header. The value passed in by the email package may contain
- surrogateescaped binary data if the lines were parsed by a BytesParser.
- The returned value should not contain any surrogateescaped data.
- """
- raise NotImplementedError
- @abc.abstractmethod
- def fold(self, name, value):
- """Given the header name and the value from the model, return a string
- containing linesep characters that implement the folding of the header
- according to the policy controls. The value passed in by the email
- package may contain surrogateescaped binary data if the lines were
- parsed by a BytesParser. The returned value should not contain any
- surrogateescaped data.
- """
- raise NotImplementedError
- @abc.abstractmethod
- def fold_binary(self, name, value):
- """Given the header name and the value from the model, return binary
- data containing linesep characters that implement the folding of the
- header according to the policy controls. The value passed in by the
- email package may contain surrogateescaped binary data.
- """
- raise NotImplementedError
- @_extend_docstrings
- class Compat32(Policy):
- """+
- This particular policy is the backward compatibility Policy. It
- replicates the behavior of the email package version 5.1.
- """
- mangle_from_ = True
- def _sanitize_header(self, name, value):
- # If the header value contains surrogates, return a Header using
- # the unknown-8bit charset to encode the bytes as encoded words.
- if not isinstance(value, str):
- # Assume it is already a header object
- return value
- if _has_surrogates(value):
- return header.Header(value, charset=_charset.UNKNOWN8BIT,
- header_name=name)
- else:
- return value
- def header_source_parse(self, sourcelines):
- """+
- The name is parsed as everything up to the ':' and returned unmodified.
- The value is determined by stripping leading whitespace off the
- remainder of the first line, joining all subsequent lines together, and
- stripping any trailing carriage return or linefeed characters.
- """
- name, value = sourcelines[0].split(':', 1)
- value = value.lstrip(' \t') + ''.join(sourcelines[1:])
- return (name, value.rstrip('\r\n'))
- def header_store_parse(self, name, value):
- """+
- The name and value are returned unmodified.
- """
- return (name, value)
- def header_fetch_parse(self, name, value):
- """+
- If the value contains binary data, it is converted into a Header object
- using the unknown-8bit charset. Otherwise it is returned unmodified.
- """
- return self._sanitize_header(name, value)
- def fold(self, name, value):
- """+
- Headers are folded using the Header folding algorithm, which preserves
- existing line breaks in the value, and wraps each resulting line to the
- max_line_length. Non-ASCII binary data are CTE encoded using the
- unknown-8bit charset.
- """
- return self._fold(name, value, sanitize=True)
- def fold_binary(self, name, value):
- """+
- Headers are folded using the Header folding algorithm, which preserves
- existing line breaks in the value, and wraps each resulting line to the
- max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
- encoded using the unknown-8bit charset. Otherwise the original source
- header is used, with its existing line breaks and/or binary data.
- """
- folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
- return folded.encode('ascii', 'surrogateescape')
- def _fold(self, name, value, sanitize):
- parts = []
- parts.append('%s: ' % name)
- if isinstance(value, str):
- if _has_surrogates(value):
- if sanitize:
- h = header.Header(value,
- charset=_charset.UNKNOWN8BIT,
- header_name=name)
- else:
- # If we have raw 8bit data in a byte string, we have no idea
- # what the encoding is. There is no safe way to split this
- # string. If it's ascii-subset, then we could do a normal
- # ascii split, but if it's multibyte then we could break the
- # string. There's no way to know so the least harm seems to
- # be to not split the string and risk it being too long.
- parts.append(value)
- h = None
- else:
- h = header.Header(value, header_name=name)
- else:
- # Assume it is a Header-like object.
- h = value
- if h is not None:
- # The Header class interprets a value of None for maxlinelen as the
- # default value of 78, as recommended by RFC 2822.
- maxlinelen = 0
- if self.max_line_length is not None:
- maxlinelen = self.max_line_length
- parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen))
- parts.append(self.linesep)
- return ''.join(parts)
- compat32 = Compat32()
|