123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529 |
- """Utilities to support packages."""
- from collections import namedtuple
- from functools import singledispatch as simplegeneric
- import importlib
- import importlib.util
- import importlib.machinery
- import os
- import os.path
- import sys
- from types import ModuleType
- import warnings
- __all__ = [
- 'get_importer', 'iter_importers', 'get_loader', 'find_loader',
- 'walk_packages', 'iter_modules', 'get_data',
- 'read_code', 'extend_path',
- 'ModuleInfo',
- ]
- ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg')
- ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.'
- def read_code(stream):
- # This helper is needed in order for the PEP 302 emulation to
- # correctly handle compiled files
- import marshal
- magic = stream.read(4)
- if magic != importlib.util.MAGIC_NUMBER:
- return None
- stream.read(12) # Skip rest of the header
- return marshal.load(stream)
- def walk_packages(path=None, prefix='', onerror=None):
- """Yields ModuleInfo for all modules recursively
- on path, or, if path is None, all accessible modules.
- 'path' should be either None or a list of paths to look for
- modules in.
- 'prefix' is a string to output on the front of every module name
- on output.
- Note that this function must import all *packages* (NOT all
- modules!) on the given path, in order to access the __path__
- attribute to find submodules.
- 'onerror' is a function which gets called with one argument (the
- name of the package which was being imported) if any exception
- occurs while trying to import a package. If no onerror function is
- supplied, ImportErrors are caught and ignored, while all other
- exceptions are propagated, terminating the search.
- Examples:
- # list all modules python can access
- walk_packages()
- # list all submodules of ctypes
- walk_packages(ctypes.__path__, ctypes.__name__+'.')
- """
- def seen(p, m={}):
- if p in m:
- return True
- m[p] = True
- for info in iter_modules(path, prefix):
- yield info
- if info.ispkg:
- try:
- __import__(info.name)
- except ImportError:
- if onerror is not None:
- onerror(info.name)
- except Exception:
- if onerror is not None:
- onerror(info.name)
- else:
- raise
- else:
- path = getattr(sys.modules[info.name], '__path__', None) or []
- # don't traverse path items we've seen before
- path = [p for p in path if not seen(p)]
- yield from walk_packages(path, info.name+'.', onerror)
- def iter_modules(path=None, prefix=''):
- """Yields ModuleInfo for all submodules on path,
- or, if path is None, all top-level modules on sys.path.
- 'path' should be either None or a list of paths to look for
- modules in.
- 'prefix' is a string to output on the front of every module name
- on output.
- """
- if path is None:
- importers = iter_importers()
- elif isinstance(path, str):
- raise ValueError("path must be None or list of paths to look for "
- "modules in")
- else:
- importers = map(get_importer, path)
- yielded = {}
- for i in importers:
- for name, ispkg in iter_importer_modules(i, prefix):
- if name not in yielded:
- yielded[name] = 1
- yield ModuleInfo(i, name, ispkg)
- @simplegeneric
- def iter_importer_modules(importer, prefix=''):
- if not hasattr(importer, 'iter_modules'):
- return []
- return importer.iter_modules(prefix)
- # Implement a file walker for the normal importlib path hook
- def _iter_file_finder_modules(importer, prefix=''):
- if importer.path is None or not os.path.isdir(importer.path):
- return
- yielded = {}
- import inspect
- try:
- filenames = os.listdir(importer.path)
- except OSError:
- # ignore unreadable directories like import does
- filenames = []
- filenames.sort() # handle packages before same-named modules
- for fn in filenames:
- modname = inspect.getmodulename(fn)
- if modname=='__init__' or modname in yielded:
- continue
- path = os.path.join(importer.path, fn)
- ispkg = False
- if not modname and os.path.isdir(path) and '.' not in fn:
- modname = fn
- try:
- dircontents = os.listdir(path)
- except OSError:
- # ignore unreadable directories like import does
- dircontents = []
- for fn in dircontents:
- subname = inspect.getmodulename(fn)
- if subname=='__init__':
- ispkg = True
- break
- else:
- continue # not a package
- if modname and '.' not in modname:
- yielded[modname] = 1
- yield prefix + modname, ispkg
- iter_importer_modules.register(
- importlib.machinery.FileFinder, _iter_file_finder_modules)
- try:
- import zipimport
- from zipimport import zipimporter
- def iter_zipimport_modules(importer, prefix=''):
- dirlist = sorted(zipimport._zip_directory_cache[importer.archive])
- _prefix = importer.prefix
- plen = len(_prefix)
- yielded = {}
- import inspect
- for fn in dirlist:
- if not fn.startswith(_prefix):
- continue
- fn = fn[plen:].split(os.sep)
- if len(fn)==2 and fn[1].startswith('__init__.py'):
- if fn[0] not in yielded:
- yielded[fn[0]] = 1
- yield prefix + fn[0], True
- if len(fn)!=1:
- continue
- modname = inspect.getmodulename(fn[0])
- if modname=='__init__':
- continue
- if modname and '.' not in modname and modname not in yielded:
- yielded[modname] = 1
- yield prefix + modname, False
- iter_importer_modules.register(zipimporter, iter_zipimport_modules)
- except ImportError:
- pass
- def get_importer(path_item):
- """Retrieve a finder for the given path item
- The returned finder is cached in sys.path_importer_cache
- if it was newly created by a path hook.
- The cache (or part of it) can be cleared manually if a
- rescan of sys.path_hooks is necessary.
- """
- path_item = os.fsdecode(path_item)
- try:
- importer = sys.path_importer_cache[path_item]
- except KeyError:
- for path_hook in sys.path_hooks:
- try:
- importer = path_hook(path_item)
- sys.path_importer_cache.setdefault(path_item, importer)
- break
- except ImportError:
- pass
- else:
- importer = None
- return importer
- def iter_importers(fullname=""):
- """Yield finders for the given module name
- If fullname contains a '.', the finders will be for the package
- containing fullname, otherwise they will be all registered top level
- finders (i.e. those on both sys.meta_path and sys.path_hooks).
- If the named module is in a package, that package is imported as a side
- effect of invoking this function.
- If no module name is specified, all top level finders are produced.
- """
- if fullname.startswith('.'):
- msg = "Relative module name {!r} not supported".format(fullname)
- raise ImportError(msg)
- if '.' in fullname:
- # Get the containing package's __path__
- pkg_name = fullname.rpartition(".")[0]
- pkg = importlib.import_module(pkg_name)
- path = getattr(pkg, '__path__', None)
- if path is None:
- return
- else:
- yield from sys.meta_path
- path = sys.path
- for item in path:
- yield get_importer(item)
- def get_loader(module_or_name):
- """Get a "loader" object for module_or_name
- Returns None if the module cannot be found or imported.
- If the named module is not already imported, its containing package
- (if any) is imported, in order to establish the package __path__.
- """
- warnings._deprecated("pkgutil.get_loader",
- f"{warnings._DEPRECATED_MSG}; "
- "use importlib.util.find_spec() instead",
- remove=(3, 14))
- if module_or_name in sys.modules:
- module_or_name = sys.modules[module_or_name]
- if module_or_name is None:
- return None
- if isinstance(module_or_name, ModuleType):
- module = module_or_name
- loader = getattr(module, '__loader__', None)
- if loader is not None:
- return loader
- if getattr(module, '__spec__', None) is None:
- return None
- fullname = module.__name__
- else:
- fullname = module_or_name
- return find_loader(fullname)
- def find_loader(fullname):
- """Find a "loader" object for fullname
- This is a backwards compatibility wrapper around
- importlib.util.find_spec that converts most failures to ImportError
- and only returns the loader rather than the full spec
- """
- warnings._deprecated("pkgutil.find_loader",
- f"{warnings._DEPRECATED_MSG}; "
- "use importlib.util.find_spec() instead",
- remove=(3, 14))
- if fullname.startswith('.'):
- msg = "Relative module name {!r} not supported".format(fullname)
- raise ImportError(msg)
- try:
- spec = importlib.util.find_spec(fullname)
- except (ImportError, AttributeError, TypeError, ValueError) as ex:
- # This hack fixes an impedance mismatch between pkgutil and
- # importlib, where the latter raises other errors for cases where
- # pkgutil previously raised ImportError
- msg = "Error while finding loader for {!r} ({}: {})"
- raise ImportError(msg.format(fullname, type(ex), ex)) from ex
- return spec.loader if spec is not None else None
- def extend_path(path, name):
- """Extend a package's path.
- Intended use is to place the following code in a package's __init__.py:
- from pkgutil import extend_path
- __path__ = extend_path(__path__, __name__)
- For each directory on sys.path that has a subdirectory that
- matches the package name, add the subdirectory to the package's
- __path__. This is useful if one wants to distribute different
- parts of a single logical package as multiple directories.
- It also looks for *.pkg files beginning where * matches the name
- argument. This feature is similar to *.pth files (see site.py),
- except that it doesn't special-case lines starting with 'import'.
- A *.pkg file is trusted at face value: apart from checking for
- duplicates, all entries found in a *.pkg file are added to the
- path, regardless of whether they are exist the filesystem. (This
- is a feature.)
- If the input path is not a list (as is the case for frozen
- packages) it is returned unchanged. The input path is not
- modified; an extended copy is returned. Items are only appended
- to the copy at the end.
- It is assumed that sys.path is a sequence. Items of sys.path that
- are not (unicode or 8-bit) strings referring to existing
- directories are ignored. Unicode items of sys.path that cause
- errors when used as filenames may cause this function to raise an
- exception (in line with os.path.isdir() behavior).
- """
- if not isinstance(path, list):
- # This could happen e.g. when this is called from inside a
- # frozen package. Return the path unchanged in that case.
- return path
- sname_pkg = name + ".pkg"
- path = path[:] # Start with a copy of the existing path
- parent_package, _, final_name = name.rpartition('.')
- if parent_package:
- try:
- search_path = sys.modules[parent_package].__path__
- except (KeyError, AttributeError):
- # We can't do anything: find_loader() returns None when
- # passed a dotted name.
- return path
- else:
- search_path = sys.path
- for dir in search_path:
- if not isinstance(dir, str):
- continue
- finder = get_importer(dir)
- if finder is not None:
- portions = []
- if hasattr(finder, 'find_spec'):
- spec = finder.find_spec(final_name)
- if spec is not None:
- portions = spec.submodule_search_locations or []
- # Is this finder PEP 420 compliant?
- elif hasattr(finder, 'find_loader'):
- _, portions = finder.find_loader(final_name)
- for portion in portions:
- # XXX This may still add duplicate entries to path on
- # case-insensitive filesystems
- if portion not in path:
- path.append(portion)
- # XXX Is this the right thing for subpackages like zope.app?
- # It looks for a file named "zope.app.pkg"
- pkgfile = os.path.join(dir, sname_pkg)
- if os.path.isfile(pkgfile):
- try:
- f = open(pkgfile)
- except OSError as msg:
- sys.stderr.write("Can't open %s: %s\n" %
- (pkgfile, msg))
- else:
- with f:
- for line in f:
- line = line.rstrip('\n')
- if not line or line.startswith('#'):
- continue
- path.append(line) # Don't check for existence!
- return path
- def get_data(package, resource):
- """Get a resource from a package.
- This is a wrapper round the PEP 302 loader get_data API. The package
- argument should be the name of a package, in standard module format
- (foo.bar). The resource argument should be in the form of a relative
- filename, using '/' as the path separator. The parent directory name '..'
- is not allowed, and nor is a rooted name (starting with a '/').
- The function returns a binary string, which is the contents of the
- specified resource.
- For packages located in the filesystem, which have already been imported,
- this is the rough equivalent of
- d = os.path.dirname(sys.modules[package].__file__)
- data = open(os.path.join(d, resource), 'rb').read()
- If the package cannot be located or loaded, or it uses a PEP 302 loader
- which does not support get_data(), then None is returned.
- """
- spec = importlib.util.find_spec(package)
- if spec is None:
- return None
- loader = spec.loader
- if loader is None or not hasattr(loader, 'get_data'):
- return None
- # XXX needs test
- mod = (sys.modules.get(package) or
- importlib._bootstrap._load(spec))
- if mod is None or not hasattr(mod, '__file__'):
- return None
- # Modify the resource name to be compatible with the loader.get_data
- # signature - an os.path format "filename" starting with the dirname of
- # the package's __file__
- parts = resource.split('/')
- parts.insert(0, os.path.dirname(mod.__file__))
- resource_name = os.path.join(*parts)
- return loader.get_data(resource_name)
- _NAME_PATTERN = None
- def resolve_name(name):
- """
- Resolve a name to an object.
- It is expected that `name` will be a string in one of the following
- formats, where W is shorthand for a valid Python identifier and dot stands
- for a literal period in these pseudo-regexes:
- W(.W)*
- W(.W)*:(W(.W)*)?
- The first form is intended for backward compatibility only. It assumes that
- some part of the dotted name is a package, and the rest is an object
- somewhere within that package, possibly nested inside other objects.
- Because the place where the package stops and the object hierarchy starts
- can't be inferred by inspection, repeated attempts to import must be done
- with this form.
- In the second form, the caller makes the division point clear through the
- provision of a single colon: the dotted name to the left of the colon is a
- package to be imported, and the dotted name to the right is the object
- hierarchy within that package. Only one import is needed in this form. If
- it ends with the colon, then a module object is returned.
- The function will return an object (which might be a module), or raise one
- of the following exceptions:
- ValueError - if `name` isn't in a recognised format
- ImportError - if an import failed when it shouldn't have
- AttributeError - if a failure occurred when traversing the object hierarchy
- within the imported package to get to the desired object.
- """
- global _NAME_PATTERN
- if _NAME_PATTERN is None:
- # Lazy import to speedup Python startup time
- import re
- dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
- _NAME_PATTERN = re.compile(f'^(?P<pkg>{dotted_words})'
- f'(?P<cln>:(?P<obj>{dotted_words})?)?$',
- re.UNICODE)
- m = _NAME_PATTERN.match(name)
- if not m:
- raise ValueError(f'invalid format: {name!r}')
- gd = m.groupdict()
- if gd.get('cln'):
- # there is a colon - a one-step import is all that's needed
- mod = importlib.import_module(gd['pkg'])
- parts = gd.get('obj')
- parts = parts.split('.') if parts else []
- else:
- # no colon - have to iterate to find the package boundary
- parts = name.split('.')
- modname = parts.pop(0)
- # first part *must* be a module/package.
- mod = importlib.import_module(modname)
- while parts:
- p = parts[0]
- s = f'{modname}.{p}'
- try:
- mod = importlib.import_module(s)
- parts.pop(0)
- modname = s
- except ImportError:
- break
- # if we reach this point, mod is the module, already imported, and
- # parts is the list of parts in the object hierarchy to be traversed, or
- # an empty list if just the module is wanted.
- result = mod
- for p in parts:
- result = getattr(result, p)
- return result
|