metadata.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. import io
  2. import os
  3. import re
  4. import abc
  5. import csv
  6. import sys
  7. import email
  8. import pathlib
  9. import zipfile
  10. import operator
  11. import functools
  12. import itertools
  13. import posixpath
  14. import collections
  15. from configparser import ConfigParser
  16. from contextlib import suppress
  17. from importlib import import_module
  18. from importlib.abc import MetaPathFinder
  19. from itertools import starmap
  20. __all__ = [
  21. 'Distribution',
  22. 'DistributionFinder',
  23. 'PackageNotFoundError',
  24. 'distribution',
  25. 'distributions',
  26. 'entry_points',
  27. 'files',
  28. 'metadata',
  29. 'requires',
  30. 'version',
  31. ]
  32. class PackageNotFoundError(ModuleNotFoundError):
  33. """The package was not found."""
  34. class EntryPoint(
  35. collections.namedtuple('EntryPointBase', 'name value group')):
  36. """An entry point as defined by Python packaging conventions.
  37. See `the packaging docs on entry points
  38. <https://packaging.python.org/specifications/entry-points/>`_
  39. for more information.
  40. >>> ep = EntryPoint(
  41. ... name=None, group=None, value='package.module:attr [extra1, extra2]')
  42. >>> ep.module
  43. 'package.module'
  44. >>> ep.attr
  45. 'attr'
  46. >>> ep.extras
  47. ['extra1', 'extra2']
  48. """
  49. pattern = re.compile(
  50. r'(?P<module>[\w.]+)\s*'
  51. r'(:\s*(?P<attr>[\w.]+)\s*)?'
  52. r'((?P<extras>\[.*\])\s*)?$'
  53. )
  54. """
  55. A regular expression describing the syntax for an entry point,
  56. which might look like:
  57. - module
  58. - package.module
  59. - package.module:attribute
  60. - package.module:object.attribute
  61. - package.module:attr [extra1, extra2]
  62. Other combinations are possible as well.
  63. The expression is lenient about whitespace around the ':',
  64. following the attr, and following any extras.
  65. """
  66. def load(self):
  67. """Load the entry point from its definition. If only a module
  68. is indicated by the value, return that module. Otherwise,
  69. return the named object.
  70. """
  71. match = self.pattern.match(self.value)
  72. module = import_module(match.group('module'))
  73. attrs = filter(None, (match.group('attr') or '').split('.'))
  74. return functools.reduce(getattr, attrs, module)
  75. @property
  76. def module(self):
  77. match = self.pattern.match(self.value)
  78. return match.group('module')
  79. @property
  80. def attr(self):
  81. match = self.pattern.match(self.value)
  82. return match.group('attr')
  83. @property
  84. def extras(self):
  85. match = self.pattern.match(self.value)
  86. return re.findall(r'\w+', match.group('extras') or '')
  87. @classmethod
  88. def _from_config(cls, config):
  89. return [
  90. cls(name, value, group)
  91. for group in config.sections()
  92. for name, value in config.items(group)
  93. ]
  94. @classmethod
  95. def _from_text(cls, text):
  96. config = ConfigParser(delimiters='=')
  97. # case sensitive: https://stackoverflow.com/q/1611799/812183
  98. config.optionxform = str
  99. try:
  100. config.read_string(text)
  101. except AttributeError: # pragma: nocover
  102. # Python 2 has no read_string
  103. config.readfp(io.StringIO(text))
  104. return EntryPoint._from_config(config)
  105. def __iter__(self):
  106. """
  107. Supply iter so one may construct dicts of EntryPoints easily.
  108. """
  109. return iter((self.name, self))
  110. def __reduce__(self):
  111. return (
  112. self.__class__,
  113. (self.name, self.value, self.group),
  114. )
  115. class PackagePath(pathlib.PurePosixPath):
  116. """A reference to a path in a package"""
  117. def read_text(self, encoding='utf-8'):
  118. with self.locate().open(encoding=encoding) as stream:
  119. return stream.read()
  120. def read_binary(self):
  121. with self.locate().open('rb') as stream:
  122. return stream.read()
  123. def locate(self):
  124. """Return a path-like object for this path"""
  125. return self.dist.locate_file(self)
  126. class FileHash:
  127. def __init__(self, spec):
  128. self.mode, _, self.value = spec.partition('=')
  129. def __repr__(self):
  130. return '<FileHash mode: {} value: {}>'.format(self.mode, self.value)
  131. class Distribution:
  132. """A Python distribution package."""
  133. @abc.abstractmethod
  134. def read_text(self, filename):
  135. """Attempt to load metadata file given by the name.
  136. :param filename: The name of the file in the distribution info.
  137. :return: The text if found, otherwise None.
  138. """
  139. @abc.abstractmethod
  140. def locate_file(self, path):
  141. """
  142. Given a path to a file in this distribution, return a path
  143. to it.
  144. """
  145. @classmethod
  146. def from_name(cls, name):
  147. """Return the Distribution for the given package name.
  148. :param name: The name of the distribution package to search for.
  149. :return: The Distribution instance (or subclass thereof) for the named
  150. package, if found.
  151. :raises PackageNotFoundError: When the named package's distribution
  152. metadata cannot be found.
  153. """
  154. for resolver in cls._discover_resolvers():
  155. dists = resolver(DistributionFinder.Context(name=name))
  156. dist = next(iter(dists), None)
  157. if dist is not None:
  158. return dist
  159. else:
  160. raise PackageNotFoundError(name)
  161. @classmethod
  162. def discover(cls, **kwargs):
  163. """Return an iterable of Distribution objects for all packages.
  164. Pass a ``context`` or pass keyword arguments for constructing
  165. a context.
  166. :context: A ``DistributionFinder.Context`` object.
  167. :return: Iterable of Distribution objects for all packages.
  168. """
  169. context = kwargs.pop('context', None)
  170. if context and kwargs:
  171. raise ValueError("cannot accept context and kwargs")
  172. context = context or DistributionFinder.Context(**kwargs)
  173. return itertools.chain.from_iterable(
  174. resolver(context)
  175. for resolver in cls._discover_resolvers()
  176. )
  177. @staticmethod
  178. def at(path):
  179. """Return a Distribution for the indicated metadata path
  180. :param path: a string or path-like object
  181. :return: a concrete Distribution instance for the path
  182. """
  183. return PathDistribution(pathlib.Path(path))
  184. @staticmethod
  185. def _discover_resolvers():
  186. """Search the meta_path for resolvers."""
  187. declared = (
  188. getattr(finder, 'find_distributions', None)
  189. for finder in sys.meta_path
  190. )
  191. return filter(None, declared)
  192. @classmethod
  193. def _local(cls, root='.'):
  194. from pep517 import build, meta
  195. system = build.compat_system(root)
  196. builder = functools.partial(
  197. meta.build,
  198. source_dir=root,
  199. system=system,
  200. )
  201. return PathDistribution(zipfile.Path(meta.build_as_zip(builder)))
  202. @property
  203. def metadata(self):
  204. """Return the parsed metadata for this Distribution.
  205. The returned object will have keys that name the various bits of
  206. metadata. See PEP 566 for details.
  207. """
  208. text = (
  209. self.read_text('METADATA')
  210. or self.read_text('PKG-INFO')
  211. # This last clause is here to support old egg-info files. Its
  212. # effect is to just end up using the PathDistribution's self._path
  213. # (which points to the egg-info file) attribute unchanged.
  214. or self.read_text('')
  215. )
  216. return email.message_from_string(text)
  217. @property
  218. def version(self):
  219. """Return the 'Version' metadata for the distribution package."""
  220. return self.metadata['Version']
  221. @property
  222. def entry_points(self):
  223. return EntryPoint._from_text(self.read_text('entry_points.txt'))
  224. @property
  225. def files(self):
  226. """Files in this distribution.
  227. :return: List of PackagePath for this distribution or None
  228. Result is `None` if the metadata file that enumerates files
  229. (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
  230. missing.
  231. Result may be empty if the metadata exists but is empty.
  232. """
  233. file_lines = self._read_files_distinfo() or self._read_files_egginfo()
  234. def make_file(name, hash=None, size_str=None):
  235. result = PackagePath(name)
  236. result.hash = FileHash(hash) if hash else None
  237. result.size = int(size_str) if size_str else None
  238. result.dist = self
  239. return result
  240. return file_lines and list(starmap(make_file, csv.reader(file_lines)))
  241. def _read_files_distinfo(self):
  242. """
  243. Read the lines of RECORD
  244. """
  245. text = self.read_text('RECORD')
  246. return text and text.splitlines()
  247. def _read_files_egginfo(self):
  248. """
  249. SOURCES.txt might contain literal commas, so wrap each line
  250. in quotes.
  251. """
  252. text = self.read_text('SOURCES.txt')
  253. return text and map('"{}"'.format, text.splitlines())
  254. @property
  255. def requires(self):
  256. """Generated requirements specified for this Distribution"""
  257. reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
  258. return reqs and list(reqs)
  259. def _read_dist_info_reqs(self):
  260. return self.metadata.get_all('Requires-Dist')
  261. def _read_egg_info_reqs(self):
  262. source = self.read_text('requires.txt')
  263. return None if source is None else self._deps_from_requires_text(source)
  264. @classmethod
  265. def _deps_from_requires_text(cls, source):
  266. section_pairs = cls._read_sections(source.splitlines())
  267. sections = {
  268. section: list(map(operator.itemgetter('line'), results))
  269. for section, results in
  270. itertools.groupby(section_pairs, operator.itemgetter('section'))
  271. }
  272. return cls._convert_egg_info_reqs_to_simple_reqs(sections)
  273. @staticmethod
  274. def _read_sections(lines):
  275. section = None
  276. for line in filter(None, lines):
  277. section_match = re.match(r'\[(.*)\]$', line)
  278. if section_match:
  279. section = section_match.group(1)
  280. continue
  281. yield locals()
  282. @staticmethod
  283. def _convert_egg_info_reqs_to_simple_reqs(sections):
  284. """
  285. Historically, setuptools would solicit and store 'extra'
  286. requirements, including those with environment markers,
  287. in separate sections. More modern tools expect each
  288. dependency to be defined separately, with any relevant
  289. extras and environment markers attached directly to that
  290. requirement. This method converts the former to the
  291. latter. See _test_deps_from_requires_text for an example.
  292. """
  293. def make_condition(name):
  294. return name and 'extra == "{name}"'.format(name=name)
  295. def quoted_marker(section):
  296. section = section or ''
  297. extra, sep, markers = section.partition(':')
  298. if extra and markers:
  299. markers = f'({markers})'
  300. conditions = list(filter(None, [markers, make_condition(extra)]))
  301. return '; ' + ' and '.join(conditions) if conditions else ''
  302. def url_req_space(req):
  303. """
  304. PEP 508 requires a space between the url_spec and the quoted_marker.
  305. Ref python/importlib_metadata#357.
  306. """
  307. # '@' is uniquely indicative of a url_req.
  308. return ' ' * ('@' in req)
  309. for section, deps in sections.items():
  310. for dep in deps:
  311. space = url_req_space(dep)
  312. yield dep + space + quoted_marker(section)
  313. class DistributionFinder(MetaPathFinder):
  314. """
  315. A MetaPathFinder capable of discovering installed distributions.
  316. """
  317. class Context:
  318. """
  319. Keyword arguments presented by the caller to
  320. ``distributions()`` or ``Distribution.discover()``
  321. to narrow the scope of a search for distributions
  322. in all DistributionFinders.
  323. Each DistributionFinder may expect any parameters
  324. and should attempt to honor the canonical
  325. parameters defined below when appropriate.
  326. """
  327. name = None
  328. """
  329. Specific name for which a distribution finder should match.
  330. A name of ``None`` matches all distributions.
  331. """
  332. def __init__(self, **kwargs):
  333. vars(self).update(kwargs)
  334. @property
  335. def path(self):
  336. """
  337. The path that a distribution finder should search.
  338. Typically refers to Python package paths and defaults
  339. to ``sys.path``.
  340. """
  341. return vars(self).get('path', sys.path)
  342. @abc.abstractmethod
  343. def find_distributions(self, context=Context()):
  344. """
  345. Find distributions.
  346. Return an iterable of all Distribution instances capable of
  347. loading the metadata for packages matching the ``context``,
  348. a DistributionFinder.Context instance.
  349. """
  350. class FastPath:
  351. """
  352. Micro-optimized class for searching a path for
  353. children.
  354. """
  355. def __init__(self, root):
  356. self.root = root
  357. self.base = os.path.basename(self.root).lower()
  358. def joinpath(self, child):
  359. return pathlib.Path(self.root, child)
  360. def children(self):
  361. with suppress(Exception):
  362. return os.listdir(self.root or '.')
  363. with suppress(Exception):
  364. return self.zip_children()
  365. return []
  366. def zip_children(self):
  367. zip_path = zipfile.Path(self.root)
  368. names = zip_path.root.namelist()
  369. self.joinpath = zip_path.joinpath
  370. return dict.fromkeys(
  371. child.split(posixpath.sep, 1)[0]
  372. for child in names
  373. )
  374. def is_egg(self, search):
  375. base = self.base
  376. return (
  377. base == search.versionless_egg_name
  378. or base.startswith(search.prefix)
  379. and base.endswith('.egg'))
  380. def search(self, name):
  381. for child in self.children():
  382. n_low = child.lower()
  383. if (n_low in name.exact_matches
  384. or n_low.startswith(name.prefix)
  385. and n_low.endswith(name.suffixes)
  386. # legacy case:
  387. or self.is_egg(name) and n_low == 'egg-info'):
  388. yield self.joinpath(child)
  389. class Prepared:
  390. """
  391. A prepared search for metadata on a possibly-named package.
  392. """
  393. normalized = ''
  394. prefix = ''
  395. suffixes = '.dist-info', '.egg-info'
  396. exact_matches = [''][:0]
  397. versionless_egg_name = ''
  398. def __init__(self, name):
  399. self.name = name
  400. if name is None:
  401. return
  402. self.normalized = name.lower().replace('-', '_')
  403. self.prefix = self.normalized + '-'
  404. self.exact_matches = [
  405. self.normalized + suffix for suffix in self.suffixes]
  406. self.versionless_egg_name = self.normalized + '.egg'
  407. class MetadataPathFinder(DistributionFinder):
  408. @classmethod
  409. def find_distributions(cls, context=DistributionFinder.Context()):
  410. """
  411. Find distributions.
  412. Return an iterable of all Distribution instances capable of
  413. loading the metadata for packages matching ``context.name``
  414. (or all names if ``None`` indicated) along the paths in the list
  415. of directories ``context.path``.
  416. """
  417. found = cls._search_paths(context.name, context.path)
  418. return map(PathDistribution, found)
  419. @classmethod
  420. def _search_paths(cls, name, paths):
  421. """Find metadata directories in paths heuristically."""
  422. return itertools.chain.from_iterable(
  423. path.search(Prepared(name))
  424. for path in map(FastPath, paths)
  425. )
  426. class PathDistribution(Distribution):
  427. def __init__(self, path):
  428. """Construct a distribution from a path to the metadata directory.
  429. :param path: A pathlib.Path or similar object supporting
  430. .joinpath(), __div__, .parent, and .read_text().
  431. """
  432. self._path = path
  433. def read_text(self, filename):
  434. with suppress(FileNotFoundError, IsADirectoryError, KeyError,
  435. NotADirectoryError, PermissionError):
  436. return self._path.joinpath(filename).read_text(encoding='utf-8')
  437. read_text.__doc__ = Distribution.read_text.__doc__
  438. def locate_file(self, path):
  439. return self._path.parent / path
  440. def distribution(distribution_name):
  441. """Get the ``Distribution`` instance for the named package.
  442. :param distribution_name: The name of the distribution package as a string.
  443. :return: A ``Distribution`` instance (or subclass thereof).
  444. """
  445. return Distribution.from_name(distribution_name)
  446. def distributions(**kwargs):
  447. """Get all ``Distribution`` instances in the current environment.
  448. :return: An iterable of ``Distribution`` instances.
  449. """
  450. return Distribution.discover(**kwargs)
  451. def metadata(distribution_name):
  452. """Get the metadata for the named package.
  453. :param distribution_name: The name of the distribution package to query.
  454. :return: An email.Message containing the parsed metadata.
  455. """
  456. return Distribution.from_name(distribution_name).metadata
  457. def version(distribution_name):
  458. """Get the version string for the named package.
  459. :param distribution_name: The name of the distribution package to query.
  460. :return: The version string for the package as defined in the package's
  461. "Version" metadata key.
  462. """
  463. return distribution(distribution_name).version
  464. def entry_points():
  465. """Return EntryPoint objects for all installed packages.
  466. :return: EntryPoint objects for all installed packages.
  467. """
  468. eps = itertools.chain.from_iterable(
  469. dist.entry_points for dist in distributions())
  470. by_group = operator.attrgetter('group')
  471. ordered = sorted(eps, key=by_group)
  472. grouped = itertools.groupby(ordered, by_group)
  473. return {
  474. group: tuple(eps)
  475. for group, eps in grouped
  476. }
  477. def files(distribution_name):
  478. """Return a list of files for the named package.
  479. :param distribution_name: The name of the distribution package to query.
  480. :return: List of files composing the distribution.
  481. """
  482. return distribution(distribution_name).files
  483. def requires(distribution_name):
  484. """
  485. Return a list of requirements for the named package.
  486. :return: An iterator of requirements, suitable for
  487. packaging.requirement.Requirement.
  488. """
  489. return distribution(distribution_name).requires