1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144 |
- import functools
- import sys
- import warnings
- from collections.abc import Mapping, Sequence
- from ipaddress import ip_address
- from urllib.parse import SplitResult, parse_qsl, urljoin, urlsplit, urlunsplit, quote
- from multidict import MultiDict, MultiDictProxy
- import idna
- import math
- from ._quoting import _Quoter, _Unquoter
- DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443}
- sentinel = object()
- def rewrite_module(obj: object) -> object:
- obj.__module__ = "yarl"
- return obj
- class cached_property:
- """Use as a class method decorator. It operates almost exactly like
- the Python `@property` decorator, but it puts the result of the
- method it decorates into the instance dict after the first call,
- effectively replacing the function it decorates with an instance
- variable. It is, in Python parlance, a data descriptor.
- """
- def __init__(self, wrapped):
- self.wrapped = wrapped
- try:
- self.__doc__ = wrapped.__doc__
- except AttributeError: # pragma: no cover
- self.__doc__ = ""
- self.name = wrapped.__name__
- def __get__(self, inst, owner, _sentinel=sentinel):
- if inst is None:
- return self
- val = inst._cache.get(self.name, _sentinel)
- if val is not _sentinel:
- return val
- val = self.wrapped(inst)
- inst._cache[self.name] = val
- return val
- def __set__(self, inst, value):
- raise AttributeError("cached property is read-only")
- @rewrite_module
- class URL:
- # Don't derive from str
- # follow pathlib.Path design
- # probably URL will not suffer from pathlib problems:
- # it's intended for libraries like aiohttp,
- # not to be passed into standard library functions like os.open etc.
- # URL grammar (RFC 3986)
- # pct-encoded = "%" HEXDIG HEXDIG
- # reserved = gen-delims / sub-delims
- # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
- # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
- # / "*" / "+" / "," / ";" / "="
- # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
- # hier-part = "//" authority path-abempty
- # / path-absolute
- # / path-rootless
- # / path-empty
- # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
- # authority = [ userinfo "@" ] host [ ":" port ]
- # userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
- # host = IP-literal / IPv4address / reg-name
- # IP-literal = "[" ( IPv6address / IPvFuture ) "]"
- # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
- # IPv6address = 6( h16 ":" ) ls32
- # / "::" 5( h16 ":" ) ls32
- # / [ h16 ] "::" 4( h16 ":" ) ls32
- # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
- # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
- # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
- # / [ *4( h16 ":" ) h16 ] "::" ls32
- # / [ *5( h16 ":" ) h16 ] "::" h16
- # / [ *6( h16 ":" ) h16 ] "::"
- # ls32 = ( h16 ":" h16 ) / IPv4address
- # ; least-significant 32 bits of address
- # h16 = 1*4HEXDIG
- # ; 16 bits of address represented in hexadecimal
- # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
- # dec-octet = DIGIT ; 0-9
- # / %x31-39 DIGIT ; 10-99
- # / "1" 2DIGIT ; 100-199
- # / "2" %x30-34 DIGIT ; 200-249
- # / "25" %x30-35 ; 250-255
- # reg-name = *( unreserved / pct-encoded / sub-delims )
- # port = *DIGIT
- # path = path-abempty ; begins with "/" or is empty
- # / path-absolute ; begins with "/" but not "//"
- # / path-noscheme ; begins with a non-colon segment
- # / path-rootless ; begins with a segment
- # / path-empty ; zero characters
- # path-abempty = *( "/" segment )
- # path-absolute = "/" [ segment-nz *( "/" segment ) ]
- # path-noscheme = segment-nz-nc *( "/" segment )
- # path-rootless = segment-nz *( "/" segment )
- # path-empty = 0<pchar>
- # segment = *pchar
- # segment-nz = 1*pchar
- # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
- # ; non-zero-length segment without any colon ":"
- # pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
- # query = *( pchar / "/" / "?" )
- # fragment = *( pchar / "/" / "?" )
- # URI-reference = URI / relative-ref
- # relative-ref = relative-part [ "?" query ] [ "#" fragment ]
- # relative-part = "//" authority path-abempty
- # / path-absolute
- # / path-noscheme
- # / path-empty
- # absolute-URI = scheme ":" hier-part [ "?" query ]
- __slots__ = ("_cache", "_val")
- _QUOTER = _Quoter(requote=False)
- _REQUOTER = _Quoter()
- _PATH_QUOTER = _Quoter(safe="@:", protected="/+", requote=False)
- _PATH_REQUOTER = _Quoter(safe="@:", protected="/+")
- _QUERY_QUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True, requote=False)
- _QUERY_REQUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True)
- _QUERY_PART_QUOTER = _Quoter(safe="?/:@", qs=True, requote=False)
- _FRAGMENT_QUOTER = _Quoter(safe="?/:@", requote=False)
- _FRAGMENT_REQUOTER = _Quoter(safe="?/:@")
- _UNQUOTER = _Unquoter()
- _PATH_UNQUOTER = _Unquoter(unsafe="+")
- _QS_UNQUOTER = _Unquoter(qs=True)
- def __new__(cls, val="", *, encoded=False, strict=None):
- if strict is not None: # pragma: no cover
- warnings.warn("strict parameter is ignored")
- if type(val) is cls:
- return val
- if type(val) is str:
- val = urlsplit(val)
- elif type(val) is SplitResult:
- if not encoded:
- raise ValueError("Cannot apply decoding to SplitResult")
- elif isinstance(val, str):
- val = urlsplit(str(val))
- else:
- raise TypeError("Constructor parameter should be str")
- if not encoded:
- if not val[1]: # netloc
- netloc = ""
- host = ""
- else:
- host = val.hostname
- if host is None:
- raise ValueError("Invalid URL: host is required for absolute urls")
- try:
- port = val.port
- except ValueError as e:
- raise ValueError(
- "Invalid URL: port can't be converted to integer"
- ) from e
- netloc = cls._make_netloc(
- val.username, val.password, host, port, encode=True, requote=True
- )
- path = cls._PATH_REQUOTER(val[2])
- if netloc:
- path = cls._normalize_path(path)
- cls._validate_authority_uri_abs_path(host=host, path=path)
- query = cls._QUERY_REQUOTER(val[3])
- fragment = cls._FRAGMENT_REQUOTER(val[4])
- val = SplitResult(val[0], netloc, path, query, fragment)
- self = object.__new__(cls)
- self._val = val
- self._cache = {}
- return self
- @classmethod
- def build(
- cls,
- *,
- scheme="",
- authority="",
- user=None,
- password=None,
- host="",
- port=None,
- path="",
- query=None,
- query_string="",
- fragment="",
- encoded=False
- ):
- """Creates and returns a new URL"""
- if authority and (user or password or host or port):
- raise ValueError(
- 'Can\'t mix "authority" with "user", "password", "host" or "port".'
- )
- if port and not host:
- raise ValueError('Can\'t build URL with "port" but without "host".')
- if query and query_string:
- raise ValueError('Only one of "query" or "query_string" should be passed')
- if (
- scheme is None
- or authority is None
- or path is None
- or query_string is None
- or fragment is None
- ):
- raise TypeError(
- 'NoneType is illegal for "scheme", "authority", "path", '
- '"query_string", and "fragment" args, use empty string instead.'
- )
- if authority:
- if encoded:
- netloc = authority
- else:
- tmp = SplitResult("", authority, "", "", "")
- netloc = cls._make_netloc(
- tmp.username, tmp.password, tmp.hostname, tmp.port, encode=True
- )
- elif not user and not password and not host and not port:
- netloc = ""
- else:
- netloc = cls._make_netloc(
- user, password, host, port, encode=not encoded, encode_host=not encoded
- )
- if not encoded:
- path = cls._PATH_QUOTER(path)
- if netloc:
- path = cls._normalize_path(path)
- cls._validate_authority_uri_abs_path(host=host, path=path)
- query_string = cls._QUERY_QUOTER(query_string)
- fragment = cls._FRAGMENT_QUOTER(fragment)
- url = cls(
- SplitResult(scheme, netloc, path, query_string, fragment), encoded=True
- )
- if query:
- return url.with_query(query)
- else:
- return url
- def __init_subclass__(cls):
- raise TypeError("Inheritance a class {!r} from URL is forbidden".format(cls))
- def __str__(self):
- val = self._val
- if not val.path and self.is_absolute() and (val.query or val.fragment):
- val = val._replace(path="/")
- return urlunsplit(val)
- def __repr__(self):
- return "{}('{}')".format(self.__class__.__name__, str(self))
- def __eq__(self, other):
- if not type(other) is URL:
- return NotImplemented
- val1 = self._val
- if not val1.path and self.is_absolute():
- val1 = val1._replace(path="/")
- val2 = other._val
- if not val2.path and other.is_absolute():
- val2 = val2._replace(path="/")
- return val1 == val2
- def __hash__(self):
- ret = self._cache.get("hash")
- if ret is None:
- val = self._val
- if not val.path and self.is_absolute():
- val = val._replace(path="/")
- ret = self._cache["hash"] = hash(val)
- return ret
- def __le__(self, other):
- if not type(other) is URL:
- return NotImplemented
- return self._val <= other._val
- def __lt__(self, other):
- if not type(other) is URL:
- return NotImplemented
- return self._val < other._val
- def __ge__(self, other):
- if not type(other) is URL:
- return NotImplemented
- return self._val >= other._val
- def __gt__(self, other):
- if not type(other) is URL:
- return NotImplemented
- return self._val > other._val
- def __truediv__(self, name):
- name = self._PATH_QUOTER(name)
- if name.startswith("/"):
- raise ValueError(
- "Appending path {!r} starting from slash is forbidden".format(name)
- )
- path = self._val.path
- if path == "/":
- new_path = "/" + name
- elif not path and not self.is_absolute():
- new_path = name
- else:
- parts = path.rstrip("/").split("/")
- parts.append(name)
- new_path = "/".join(parts)
- if self.is_absolute():
- new_path = self._normalize_path(new_path)
- return URL(
- self._val._replace(path=new_path, query="", fragment=""), encoded=True
- )
- def __mod__(self, query):
- return self.update_query(query)
- def __bool__(self) -> bool:
- return bool(
- self._val.netloc or self._val.path or self._val.query or self._val.fragment
- )
- def __getstate__(self):
- return (self._val,)
- def __setstate__(self, state):
- if state[0] is None and isinstance(state[1], dict):
- # default style pickle
- self._val = state[1]["_val"]
- else:
- self._val, *unused = state
- self._cache = {}
- def is_absolute(self):
- """A check for absolute URLs.
- Return True for absolute ones (having scheme or starting
- with //), False otherwise.
- """
- return self.raw_host is not None
- def is_default_port(self):
- """A check for default port.
- Return True if port is default for specified scheme,
- e.g. 'http://python.org' or 'http://python.org:80', False
- otherwise.
- """
- if self.port is None:
- return False
- default = DEFAULT_PORTS.get(self.scheme)
- if default is None:
- return False
- return self.port == default
- def origin(self):
- """Return an URL with scheme, host and port parts only.
- user, password, path, query and fragment are removed.
- """
- # TODO: add a keyword-only option for keeping user/pass maybe?
- if not self.is_absolute():
- raise ValueError("URL should be absolute")
- if not self._val.scheme:
- raise ValueError("URL should have scheme")
- v = self._val
- netloc = self._make_netloc(None, None, v.hostname, v.port)
- val = v._replace(netloc=netloc, path="", query="", fragment="")
- return URL(val, encoded=True)
- def relative(self):
- """Return a relative part of the URL.
- scheme, user, password, host and port are removed.
- """
- if not self.is_absolute():
- raise ValueError("URL should be absolute")
- val = self._val._replace(scheme="", netloc="")
- return URL(val, encoded=True)
- @property
- def scheme(self):
- """Scheme for absolute URLs.
- Empty string for relative URLs or URLs starting with //
- """
- return self._val.scheme
- @property
- def raw_authority(self):
- """Encoded authority part of URL.
- Empty string for relative URLs.
- """
- return self._val.netloc
- @cached_property
- def authority(self):
- """Decoded authority part of URL.
- Empty string for relative URLs.
- """
- return self._make_netloc(
- self.user, self.password, self.host, self.port, encode_host=False
- )
- @property
- def raw_user(self):
- """Encoded user part of URL.
- None if user is missing.
- """
- # not .username
- ret = self._val.username
- if not ret:
- return None
- return ret
- @cached_property
- def user(self):
- """Decoded user part of URL.
- None if user is missing.
- """
- return self._UNQUOTER(self.raw_user)
- @property
- def raw_password(self):
- """Encoded password part of URL.
- None if password is missing.
- """
- return self._val.password
- @cached_property
- def password(self):
- """Decoded password part of URL.
- None if password is missing.
- """
- return self._UNQUOTER(self.raw_password)
- @property
- def raw_host(self):
- """Encoded host part of URL.
- None for relative URLs.
- """
- # Use host instead of hostname for sake of shortness
- # May add .hostname prop later
- return self._val.hostname
- @cached_property
- def host(self):
- """Decoded host part of URL.
- None for relative URLs.
- """
- raw = self.raw_host
- if raw is None:
- return None
- if "%" in raw:
- # Hack for scoped IPv6 addresses like
- # fe80::2%Проверка
- # presence of '%' sign means only IPv6 address, so idna is useless.
- return raw
- return _idna_decode(raw)
- @property
- def port(self):
- """Port part of URL, with scheme-based fallback.
- None for relative URLs or URLs without explicit port and
- scheme without default port substitution.
- """
- return self._val.port or DEFAULT_PORTS.get(self._val.scheme)
- @property
- def explicit_port(self):
- """Port part of URL, without scheme-based fallback.
- None for relative URLs or URLs without explicit port.
- """
- return self._val.port
- @property
- def raw_path(self):
- """Encoded path of URL.
- / for absolute URLs without path part.
- """
- ret = self._val.path
- if not ret and self.is_absolute():
- ret = "/"
- return ret
- @cached_property
- def path(self):
- """Decoded path of URL.
- / for absolute URLs without path part.
- """
- return self._PATH_UNQUOTER(self.raw_path)
- @cached_property
- def query(self):
- """A MultiDictProxy representing parsed query parameters in decoded
- representation.
- Empty value if URL has no query part.
- """
- ret = MultiDict(parse_qsl(self.raw_query_string, keep_blank_values=True))
- return MultiDictProxy(ret)
- @property
- def raw_query_string(self):
- """Encoded query part of URL.
- Empty string if query is missing.
- """
- return self._val.query
- @cached_property
- def query_string(self):
- """Decoded query part of URL.
- Empty string if query is missing.
- """
- return self._QS_UNQUOTER(self.raw_query_string)
- @cached_property
- def path_qs(self):
- """Decoded path of URL with query."""
- if not self.query_string:
- return self.path
- return "{}?{}".format(self.path, self.query_string)
- @cached_property
- def raw_path_qs(self):
- """Encoded path of URL with query."""
- if not self.raw_query_string:
- return self.raw_path
- return "{}?{}".format(self.raw_path, self.raw_query_string)
- @property
- def raw_fragment(self):
- """Encoded fragment part of URL.
- Empty string if fragment is missing.
- """
- return self._val.fragment
- @cached_property
- def fragment(self):
- """Decoded fragment part of URL.
- Empty string if fragment is missing.
- """
- return self._UNQUOTER(self.raw_fragment)
- @cached_property
- def raw_parts(self):
- """A tuple containing encoded *path* parts.
- ('/',) for absolute URLs if *path* is missing.
- """
- path = self._val.path
- if self.is_absolute():
- if not path:
- parts = ["/"]
- else:
- parts = ["/"] + path[1:].split("/")
- else:
- if path.startswith("/"):
- parts = ["/"] + path[1:].split("/")
- else:
- parts = path.split("/")
- return tuple(parts)
- @cached_property
- def parts(self):
- """A tuple containing decoded *path* parts.
- ('/',) for absolute URLs if *path* is missing.
- """
- return tuple(self._UNQUOTER(part) for part in self.raw_parts)
- @cached_property
- def parent(self):
- """A new URL with last part of path removed and cleaned up query and
- fragment.
- """
- path = self.raw_path
- if not path or path == "/":
- if self.raw_fragment or self.raw_query_string:
- return URL(self._val._replace(query="", fragment=""), encoded=True)
- return self
- parts = path.split("/")
- val = self._val._replace(path="/".join(parts[:-1]), query="", fragment="")
- return URL(val, encoded=True)
- @cached_property
- def raw_name(self):
- """The last part of raw_parts."""
- parts = self.raw_parts
- if self.is_absolute():
- parts = parts[1:]
- if not parts:
- return ""
- else:
- return parts[-1]
- else:
- return parts[-1]
- @cached_property
- def name(self):
- """The last part of parts."""
- return self._UNQUOTER(self.raw_name)
- @staticmethod
- def _validate_authority_uri_abs_path(host, path):
- """Ensure that path in URL with authority starts with a leading slash.
- Raise ValueError if not.
- """
- if len(host) > 0 and len(path) > 0 and not path.startswith("/"):
- raise ValueError(
- "Path in a URL with authority should start with a slash ('/') if set"
- )
- @classmethod
- def _normalize_path(cls, path):
- # Drop '.' and '..' from path
- segments = path.split("/")
- resolved_path = []
- for seg in segments:
- if seg == "..":
- try:
- resolved_path.pop()
- except IndexError:
- # ignore any .. segments that would otherwise cause an
- # IndexError when popped from resolved_path if
- # resolving for rfc3986
- pass
- elif seg == ".":
- continue
- else:
- resolved_path.append(seg)
- if segments[-1] in (".", ".."):
- # do some post-processing here.
- # if the last segment was a relative dir,
- # then we need to append the trailing '/'
- resolved_path.append("")
- return "/".join(resolved_path)
- if sys.version_info >= (3, 7):
- @classmethod
- def _encode_host(cls, host, human=False):
- try:
- ip, sep, zone = host.partition("%")
- ip = ip_address(ip)
- except ValueError:
- host = host.lower()
- # IDNA encoding is slow,
- # skip it for ASCII-only strings
- # Don't move the check into _idna_encode() helper
- # to reduce the cache size
- if human or host.isascii():
- return host
- host = _idna_encode(host)
- else:
- host = ip.compressed
- if sep:
- host += "%" + zone
- if ip.version == 6:
- host = "[" + host + "]"
- return host
- else:
- # work around for missing str.isascii() in Python <= 3.6
- @classmethod
- def _encode_host(cls, host, human=False):
- try:
- ip, sep, zone = host.partition("%")
- ip = ip_address(ip)
- except ValueError:
- host = host.lower()
- if human:
- return host
- for char in host:
- if char > "\x7f":
- break
- else:
- return host
- host = _idna_encode(host)
- else:
- host = ip.compressed
- if sep:
- host += "%" + zone
- if ip.version == 6:
- host = "[" + host + "]"
- return host
- @classmethod
- def _make_netloc(
- cls, user, password, host, port, encode=False, encode_host=True, requote=False
- ):
- quoter = cls._REQUOTER if requote else cls._QUOTER
- if encode_host:
- ret = cls._encode_host(host)
- else:
- ret = host
- if port:
- ret = ret + ":" + str(port)
- if password is not None:
- if not user:
- user = ""
- else:
- if encode:
- user = quoter(user)
- if encode:
- password = quoter(password)
- user = user + ":" + password
- elif user and encode:
- user = quoter(user)
- if user:
- ret = user + "@" + ret
- return ret
- def with_scheme(self, scheme):
- """Return a new URL with scheme replaced."""
- # N.B. doesn't cleanup query/fragment
- if not isinstance(scheme, str):
- raise TypeError("Invalid scheme type")
- if not self.is_absolute():
- raise ValueError("scheme replacement is not allowed for relative URLs")
- return URL(self._val._replace(scheme=scheme.lower()), encoded=True)
- def with_user(self, user):
- """Return a new URL with user replaced.
- Autoencode user if needed.
- Clear user/password if user is None.
- """
- # N.B. doesn't cleanup query/fragment
- val = self._val
- if user is None:
- password = None
- elif isinstance(user, str):
- user = self._QUOTER(user)
- password = val.password
- else:
- raise TypeError("Invalid user type")
- if not self.is_absolute():
- raise ValueError("user replacement is not allowed for relative URLs")
- return URL(
- self._val._replace(
- netloc=self._make_netloc(user, password, val.hostname, val.port)
- ),
- encoded=True,
- )
- def with_password(self, password):
- """Return a new URL with password replaced.
- Autoencode password if needed.
- Clear password if argument is None.
- """
- # N.B. doesn't cleanup query/fragment
- if password is None:
- pass
- elif isinstance(password, str):
- password = self._QUOTER(password)
- else:
- raise TypeError("Invalid password type")
- if not self.is_absolute():
- raise ValueError("password replacement is not allowed for relative URLs")
- val = self._val
- return URL(
- self._val._replace(
- netloc=self._make_netloc(val.username, password, val.hostname, val.port)
- ),
- encoded=True,
- )
- def with_host(self, host):
- """Return a new URL with host replaced.
- Autoencode host if needed.
- Changing host for relative URLs is not allowed, use .join()
- instead.
- """
- # N.B. doesn't cleanup query/fragment
- if not isinstance(host, str):
- raise TypeError("Invalid host type")
- if not self.is_absolute():
- raise ValueError("host replacement is not allowed for relative URLs")
- if not host:
- raise ValueError("host removing is not allowed")
- val = self._val
- return URL(
- self._val._replace(
- netloc=self._make_netloc(val.username, val.password, host, val.port)
- ),
- encoded=True,
- )
- def with_port(self, port):
- """Return a new URL with port replaced.
- Clear port to default if None is passed.
- """
- # N.B. doesn't cleanup query/fragment
- if port is not None and not isinstance(port, int):
- raise TypeError("port should be int or None, got {}".format(type(port)))
- if not self.is_absolute():
- raise ValueError("port replacement is not allowed for relative URLs")
- val = self._val
- return URL(
- self._val._replace(
- netloc=self._make_netloc(
- val.username, val.password, val.hostname, port, encode=True
- )
- ),
- encoded=True,
- )
- def with_path(self, path, *, encoded=False):
- """Return a new URL with path replaced."""
- if not encoded:
- path = self._PATH_QUOTER(path)
- if self.is_absolute():
- path = self._normalize_path(path)
- if len(path) > 0 and path[0] != "/":
- path = "/" + path
- return URL(self._val._replace(path=path, query="", fragment=""), encoded=True)
- @classmethod
- def _query_seq_pairs(cls, quoter, pairs):
- for key, val in pairs:
- if isinstance(val, (list, tuple)):
- for v in val:
- yield quoter(key) + "=" + quoter(cls._query_var(v))
- else:
- yield quoter(key) + "=" + quoter(cls._query_var(val))
- @staticmethod
- def _query_var(v):
- cls = type(v)
- if issubclass(cls, str):
- return v
- if issubclass(cls, float):
- if math.isinf(v):
- raise ValueError("float('inf') is not supported")
- if math.isnan(v):
- raise ValueError("float('nan') is not supported")
- return str(float(v))
- if issubclass(cls, int) and cls is not bool:
- return str(int(v))
- raise TypeError(
- "Invalid variable type: value "
- "should be str, int or float, got {!r} "
- "of type {}".format(v, cls)
- )
- def _get_str_query(self, *args, **kwargs):
- if kwargs:
- if len(args) > 0:
- raise ValueError(
- "Either kwargs or single query parameter must be present"
- )
- query = kwargs
- elif len(args) == 1:
- query = args[0]
- else:
- raise ValueError("Either kwargs or single query parameter must be present")
- if query is None:
- query = ""
- elif isinstance(query, Mapping):
- quoter = self._QUERY_PART_QUOTER
- query = "&".join(self._query_seq_pairs(quoter, query.items()))
- elif isinstance(query, str):
- query = self._QUERY_QUOTER(query)
- elif isinstance(query, (bytes, bytearray, memoryview)):
- raise TypeError(
- "Invalid query type: bytes, bytearray and memoryview are forbidden"
- )
- elif isinstance(query, Sequence):
- quoter = self._QUERY_PART_QUOTER
- # We don't expect sequence values if we're given a list of pairs
- # already; only mappings like builtin `dict` which can't have the
- # same key pointing to multiple values are allowed to use
- # `_query_seq_pairs`.
- query = "&".join(
- quoter(k) + "=" + quoter(self._query_var(v)) for k, v in query
- )
- else:
- raise TypeError(
- "Invalid query type: only str, mapping or "
- "sequence of (key, value) pairs is allowed"
- )
- return query
- def with_query(self, *args, **kwargs):
- """Return a new URL with query part replaced.
- Accepts any Mapping (e.g. dict, multidict.MultiDict instances)
- or str, autoencode the argument if needed.
- A sequence of (key, value) pairs is supported as well.
- It also can take an arbitrary number of keyword arguments.
- Clear query if None is passed.
- """
- # N.B. doesn't cleanup query/fragment
- new_query = self._get_str_query(*args, **kwargs)
- return URL(
- self._val._replace(path=self._val.path, query=new_query), encoded=True
- )
- def update_query(self, *args, **kwargs):
- """Return a new URL with query part updated."""
- s = self._get_str_query(*args, **kwargs)
- new_query = MultiDict(parse_qsl(s, keep_blank_values=True))
- query = MultiDict(self.query)
- query.update(new_query)
- return URL(self._val._replace(query=self._get_str_query(query)), encoded=True)
- def with_fragment(self, fragment):
- """Return a new URL with fragment replaced.
- Autoencode fragment if needed.
- Clear fragment to default if None is passed.
- """
- # N.B. doesn't cleanup query/fragment
- if fragment is None:
- raw_fragment = ""
- elif not isinstance(fragment, str):
- raise TypeError("Invalid fragment type")
- else:
- raw_fragment = self._FRAGMENT_QUOTER(fragment)
- if self.raw_fragment == raw_fragment:
- return self
- return URL(self._val._replace(fragment=raw_fragment), encoded=True)
- def with_name(self, name):
- """Return a new URL with name (last part of path) replaced.
- Query and fragment parts are cleaned up.
- Name is encoded if needed.
- """
- # N.B. DOES cleanup query/fragment
- if not isinstance(name, str):
- raise TypeError("Invalid name type")
- if "/" in name:
- raise ValueError("Slash in name is not allowed")
- name = self._PATH_QUOTER(name)
- if name in (".", ".."):
- raise ValueError(". and .. values are forbidden")
- parts = list(self.raw_parts)
- if self.is_absolute():
- if len(parts) == 1:
- parts.append(name)
- else:
- parts[-1] = name
- parts[0] = "" # replace leading '/'
- else:
- parts[-1] = name
- if parts[0] == "/":
- parts[0] = "" # replace leading '/'
- return URL(
- self._val._replace(path="/".join(parts), query="", fragment=""),
- encoded=True,
- )
- def join(self, url):
- """Join URLs
- Construct a full (“absolute”) URL by combining a “base URL”
- (self) with another URL (url).
- Informally, this uses components of the base URL, in
- particular the addressing scheme, the network location and
- (part of) the path, to provide missing components in the
- relative URL.
- """
- # See docs for urllib.parse.urljoin
- if not isinstance(url, URL):
- raise TypeError("url should be URL")
- return URL(urljoin(str(self), str(url)), encoded=True)
- def human_repr(self):
- """Return decoded human readable string for URL representation."""
- user = _human_quote(self.user, "#/:?@")
- password = _human_quote(self.password, "#/:?@")
- host = self.host
- if host:
- host = self._encode_host(self.host, human=True)
- path = _human_quote(self.path, "#?")
- query_string = "&".join(
- "{}={}".format(_human_quote(k, "#&+;="), _human_quote(v, "#&+;="))
- for k, v in self.query.items()
- )
- fragment = _human_quote(self.fragment, "")
- return urlunsplit(
- SplitResult(
- self.scheme,
- self._make_netloc(
- user,
- password,
- host,
- self._val.port,
- encode_host=False,
- ),
- path,
- query_string,
- fragment,
- )
- )
- def _human_quote(s, unsafe):
- if not s:
- return s
- for c in "%" + unsafe:
- if c in s:
- s = s.replace(c, "%{:02X}".format(ord(c)))
- if s.isprintable():
- return s
- return "".join(c if c.isprintable() else quote(c) for c in s)
- _MAXCACHE = 256
- @functools.lru_cache(_MAXCACHE)
- def _idna_decode(raw):
- try:
- return idna.decode(raw.encode("ascii"))
- except UnicodeError: # e.g. '::1'
- return raw.encode("ascii").decode("idna")
- @functools.lru_cache(_MAXCACHE)
- def _idna_encode(host):
- try:
- return idna.encode(host, uts46=True).decode("ascii")
- except UnicodeError:
- return host.encode("idna").decode("ascii")
- @rewrite_module
- def cache_clear():
- _idna_decode.cache_clear()
- _idna_encode.cache_clear()
- @rewrite_module
- def cache_info():
- return {
- "idna_encode": _idna_encode.cache_info(),
- "idna_decode": _idna_decode.cache_info(),
- }
- @rewrite_module
- def cache_configure(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE):
- global _idna_decode, _idna_encode
- _idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__)
- _idna_decode = functools.lru_cache(idna_decode_size)(_idna_decode.__wrapped__)
|