123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280 |
- """A collection of string constants.
- Public module variables:
- whitespace -- a string containing all ASCII whitespace
- ascii_lowercase -- a string containing all ASCII lowercase letters
- ascii_uppercase -- a string containing all ASCII uppercase letters
- ascii_letters -- a string containing all ASCII letters
- digits -- a string containing all ASCII decimal digits
- hexdigits -- a string containing all ASCII hexadecimal digits
- octdigits -- a string containing all ASCII octal digits
- punctuation -- a string containing all ASCII punctuation characters
- printable -- a string containing all ASCII characters considered printable
- """
- __all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords",
- "digits", "hexdigits", "octdigits", "printable", "punctuation",
- "whitespace", "Formatter", "Template"]
- import _string
- # Some strings for ctype-style character classification
- whitespace = ' \t\n\r\v\f'
- ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
- ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- ascii_letters = ascii_lowercase + ascii_uppercase
- digits = '0123456789'
- hexdigits = digits + 'abcdef' + 'ABCDEF'
- octdigits = '01234567'
- punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
- printable = digits + ascii_letters + punctuation + whitespace
- # Functions which aren't available as string methods.
- # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
- def capwords(s, sep=None):
- """capwords(s [,sep]) -> string
- Split the argument into words using split, capitalize each
- word using capitalize, and join the capitalized words using
- join. If the optional second argument sep is absent or None,
- runs of whitespace characters are replaced by a single space
- and leading and trailing whitespace are removed, otherwise
- sep is used to split and join the words.
- """
- return (sep or ' ').join(x.capitalize() for x in s.split(sep))
- ####################################################################
- import re as _re
- from collections import ChainMap as _ChainMap
- _sentinel_dict = {}
- class Template:
- """A string class for supporting $-substitutions."""
- delimiter = '$'
- # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but
- # without the ASCII flag. We can't add re.ASCII to flags because of
- # backward compatibility. So we use the ?a local flag and [a-z] pattern.
- # See https://bugs.python.org/issue31672
- idpattern = r'(?a:[_a-z][_a-z0-9]*)'
- braceidpattern = None
- flags = _re.IGNORECASE
- def __init_subclass__(cls):
- super().__init_subclass__()
- if 'pattern' in cls.__dict__:
- pattern = cls.pattern
- else:
- delim = _re.escape(cls.delimiter)
- id = cls.idpattern
- bid = cls.braceidpattern or cls.idpattern
- pattern = fr"""
- {delim}(?:
- (?P<escaped>{delim}) | # Escape sequence of two delimiters
- (?P<named>{id}) | # delimiter and a Python identifier
- {{(?P<braced>{bid})}} | # delimiter and a braced identifier
- (?P<invalid>) # Other ill-formed delimiter exprs
- )
- """
- cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
- def __init__(self, template):
- self.template = template
- # Search for $$, $identifier, ${identifier}, and any bare $'s
- def _invalid(self, mo):
- i = mo.start('invalid')
- lines = self.template[:i].splitlines(keepends=True)
- if not lines:
- colno = 1
- lineno = 1
- else:
- colno = i - len(''.join(lines[:-1]))
- lineno = len(lines)
- raise ValueError('Invalid placeholder in string: line %d, col %d' %
- (lineno, colno))
- def substitute(self, mapping=_sentinel_dict, /, **kws):
- if mapping is _sentinel_dict:
- mapping = kws
- elif kws:
- mapping = _ChainMap(kws, mapping)
- # Helper function for .sub()
- def convert(mo):
- # Check the most common path first.
- named = mo.group('named') or mo.group('braced')
- if named is not None:
- return str(mapping[named])
- if mo.group('escaped') is not None:
- return self.delimiter
- if mo.group('invalid') is not None:
- self._invalid(mo)
- raise ValueError('Unrecognized named group in pattern',
- self.pattern)
- return self.pattern.sub(convert, self.template)
- def safe_substitute(self, mapping=_sentinel_dict, /, **kws):
- if mapping is _sentinel_dict:
- mapping = kws
- elif kws:
- mapping = _ChainMap(kws, mapping)
- # Helper function for .sub()
- def convert(mo):
- named = mo.group('named') or mo.group('braced')
- if named is not None:
- try:
- return str(mapping[named])
- except KeyError:
- return mo.group()
- if mo.group('escaped') is not None:
- return self.delimiter
- if mo.group('invalid') is not None:
- return mo.group()
- raise ValueError('Unrecognized named group in pattern',
- self.pattern)
- return self.pattern.sub(convert, self.template)
- # Initialize Template.pattern. __init_subclass__() is automatically called
- # only for subclasses, not for the Template class itself.
- Template.__init_subclass__()
- ########################################################################
- # the Formatter class
- # see PEP 3101 for details and purpose of this class
- # The hard parts are reused from the C implementation. They're exposed as "_"
- # prefixed methods of str.
- # The overall parser is implemented in _string.formatter_parser.
- # The field name parser is implemented in _string.formatter_field_name_split
- class Formatter:
- def format(self, format_string, /, *args, **kwargs):
- return self.vformat(format_string, args, kwargs)
- def vformat(self, format_string, args, kwargs):
- used_args = set()
- result, _ = self._vformat(format_string, args, kwargs, used_args, 2)
- self.check_unused_args(used_args, args, kwargs)
- return result
- def _vformat(self, format_string, args, kwargs, used_args, recursion_depth,
- auto_arg_index=0):
- if recursion_depth < 0:
- raise ValueError('Max string recursion exceeded')
- result = []
- for literal_text, field_name, format_spec, conversion in \
- self.parse(format_string):
- # output the literal text
- if literal_text:
- result.append(literal_text)
- # if there's a field, output it
- if field_name is not None:
- # this is some markup, find the object and do
- # the formatting
- # handle arg indexing when empty field_names are given.
- if field_name == '':
- if auto_arg_index is False:
- raise ValueError('cannot switch from manual field '
- 'specification to automatic field '
- 'numbering')
- field_name = str(auto_arg_index)
- auto_arg_index += 1
- elif field_name.isdigit():
- if auto_arg_index:
- raise ValueError('cannot switch from manual field '
- 'specification to automatic field '
- 'numbering')
- # disable auto arg incrementing, if it gets
- # used later on, then an exception will be raised
- auto_arg_index = False
- # given the field_name, find the object it references
- # and the argument it came from
- obj, arg_used = self.get_field(field_name, args, kwargs)
- used_args.add(arg_used)
- # do any conversion on the resulting object
- obj = self.convert_field(obj, conversion)
- # expand the format spec, if needed
- format_spec, auto_arg_index = self._vformat(
- format_spec, args, kwargs,
- used_args, recursion_depth-1,
- auto_arg_index=auto_arg_index)
- # format the object and append to the result
- result.append(self.format_field(obj, format_spec))
- return ''.join(result), auto_arg_index
- def get_value(self, key, args, kwargs):
- if isinstance(key, int):
- return args[key]
- else:
- return kwargs[key]
- def check_unused_args(self, used_args, args, kwargs):
- pass
- def format_field(self, value, format_spec):
- return format(value, format_spec)
- def convert_field(self, value, conversion):
- # do any conversion on the resulting object
- if conversion is None:
- return value
- elif conversion == 's':
- return str(value)
- elif conversion == 'r':
- return repr(value)
- elif conversion == 'a':
- return ascii(value)
- raise ValueError("Unknown conversion specifier {0!s}".format(conversion))
- # returns an iterable that contains tuples of the form:
- # (literal_text, field_name, format_spec, conversion)
- # literal_text can be zero length
- # field_name can be None, in which case there's no
- # object to format and output
- # if field_name is not None, it is looked up, formatted
- # with format_spec and conversion and then used
- def parse(self, format_string):
- return _string.formatter_parser(format_string)
- # given a field_name, find the object it references.
- # field_name: the field being looked up, e.g. "0.name"
- # or "lookup[3]"
- # used_args: a set of which args have been used
- # args, kwargs: as passed in to vformat
- def get_field(self, field_name, args, kwargs):
- first, rest = _string.formatter_field_name_split(field_name)
- obj = self.get_value(first, args, kwargs)
- # loop through the rest of the field_name, doing
- # getattr or getitem as needed
- for is_attr, i in rest:
- if is_attr:
- obj = getattr(obj, i)
- else:
- obj = obj[i]
- return obj, first
|