modulefinder.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
  1. """Find modules used by a script, using introspection."""
  2. import dis
  3. import importlib._bootstrap_external
  4. import importlib.machinery
  5. import marshal
  6. import os
  7. import io
  8. import sys
  9. # Old imp constants:
  10. _SEARCH_ERROR = 0
  11. _PY_SOURCE = 1
  12. _PY_COMPILED = 2
  13. _C_EXTENSION = 3
  14. _PKG_DIRECTORY = 5
  15. _C_BUILTIN = 6
  16. _PY_FROZEN = 7
  17. # Modulefinder does a good job at simulating Python's, but it can not
  18. # handle __path__ modifications packages make at runtime. Therefore there
  19. # is a mechanism whereby you can register extra paths in this map for a
  20. # package, and it will be honored.
  21. # Note this is a mapping is lists of paths.
  22. packagePathMap = {}
  23. # A Public interface
  24. def AddPackagePath(packagename, path):
  25. packagePathMap.setdefault(packagename, []).append(path)
  26. replacePackageMap = {}
  27. # This ReplacePackage mechanism allows modulefinder to work around
  28. # situations in which a package injects itself under the name
  29. # of another package into sys.modules at runtime by calling
  30. # ReplacePackage("real_package_name", "faked_package_name")
  31. # before running ModuleFinder.
  32. def ReplacePackage(oldname, newname):
  33. replacePackageMap[oldname] = newname
  34. def _find_module(name, path=None):
  35. """An importlib reimplementation of imp.find_module (for our purposes)."""
  36. # It's necessary to clear the caches for our Finder first, in case any
  37. # modules are being added/deleted/modified at runtime. In particular,
  38. # test_modulefinder.py changes file tree contents in a cache-breaking way:
  39. importlib.machinery.PathFinder.invalidate_caches()
  40. spec = importlib.machinery.PathFinder.find_spec(name, path)
  41. if spec is None:
  42. raise ImportError("No module named {name!r}".format(name=name), name=name)
  43. # Some special cases:
  44. if spec.loader is importlib.machinery.BuiltinImporter:
  45. return None, None, ("", "", _C_BUILTIN)
  46. if spec.loader is importlib.machinery.FrozenImporter:
  47. return None, None, ("", "", _PY_FROZEN)
  48. file_path = spec.origin
  49. if spec.loader.is_package(name):
  50. return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
  51. if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
  52. kind = _PY_SOURCE
  53. elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
  54. kind = _C_EXTENSION
  55. elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
  56. kind = _PY_COMPILED
  57. else: # Should never happen.
  58. return None, None, ("", "", _SEARCH_ERROR)
  59. file = io.open_code(file_path)
  60. suffix = os.path.splitext(file_path)[-1]
  61. return file, file_path, (suffix, "rb", kind)
  62. class Module:
  63. def __init__(self, name, file=None, path=None):
  64. self.__name__ = name
  65. self.__file__ = file
  66. self.__path__ = path
  67. self.__code__ = None
  68. # The set of global names that are assigned to in the module.
  69. # This includes those names imported through starimports of
  70. # Python modules.
  71. self.globalnames = {}
  72. # The set of starimports this module did that could not be
  73. # resolved, ie. a starimport from a non-Python module.
  74. self.starimports = {}
  75. def __repr__(self):
  76. s = "Module(%r" % (self.__name__,)
  77. if self.__file__ is not None:
  78. s = s + ", %r" % (self.__file__,)
  79. if self.__path__ is not None:
  80. s = s + ", %r" % (self.__path__,)
  81. s = s + ")"
  82. return s
  83. class ModuleFinder:
  84. def __init__(self, path=None, debug=0, excludes=None, replace_paths=None):
  85. if path is None:
  86. path = sys.path
  87. self.path = path
  88. self.modules = {}
  89. self.badmodules = {}
  90. self.debug = debug
  91. self.indent = 0
  92. self.excludes = excludes if excludes is not None else []
  93. self.replace_paths = replace_paths if replace_paths is not None else []
  94. self.processed_paths = [] # Used in debugging only
  95. def msg(self, level, str, *args):
  96. if level <= self.debug:
  97. for i in range(self.indent):
  98. print(" ", end=' ')
  99. print(str, end=' ')
  100. for arg in args:
  101. print(repr(arg), end=' ')
  102. print()
  103. def msgin(self, *args):
  104. level = args[0]
  105. if level <= self.debug:
  106. self.indent = self.indent + 1
  107. self.msg(*args)
  108. def msgout(self, *args):
  109. level = args[0]
  110. if level <= self.debug:
  111. self.indent = self.indent - 1
  112. self.msg(*args)
  113. def run_script(self, pathname):
  114. self.msg(2, "run_script", pathname)
  115. with io.open_code(pathname) as fp:
  116. stuff = ("", "rb", _PY_SOURCE)
  117. self.load_module('__main__', fp, pathname, stuff)
  118. def load_file(self, pathname):
  119. dir, name = os.path.split(pathname)
  120. name, ext = os.path.splitext(name)
  121. with io.open_code(pathname) as fp:
  122. stuff = (ext, "rb", _PY_SOURCE)
  123. self.load_module(name, fp, pathname, stuff)
  124. def import_hook(self, name, caller=None, fromlist=None, level=-1):
  125. self.msg(3, "import_hook", name, caller, fromlist, level)
  126. parent = self.determine_parent(caller, level=level)
  127. q, tail = self.find_head_package(parent, name)
  128. m = self.load_tail(q, tail)
  129. if not fromlist:
  130. return q
  131. if m.__path__:
  132. self.ensure_fromlist(m, fromlist)
  133. return None
  134. def determine_parent(self, caller, level=-1):
  135. self.msgin(4, "determine_parent", caller, level)
  136. if not caller or level == 0:
  137. self.msgout(4, "determine_parent -> None")
  138. return None
  139. pname = caller.__name__
  140. if level >= 1: # relative import
  141. if caller.__path__:
  142. level -= 1
  143. if level == 0:
  144. parent = self.modules[pname]
  145. assert parent is caller
  146. self.msgout(4, "determine_parent ->", parent)
  147. return parent
  148. if pname.count(".") < level:
  149. raise ImportError("relative importpath too deep")
  150. pname = ".".join(pname.split(".")[:-level])
  151. parent = self.modules[pname]
  152. self.msgout(4, "determine_parent ->", parent)
  153. return parent
  154. if caller.__path__:
  155. parent = self.modules[pname]
  156. assert caller is parent
  157. self.msgout(4, "determine_parent ->", parent)
  158. return parent
  159. if '.' in pname:
  160. i = pname.rfind('.')
  161. pname = pname[:i]
  162. parent = self.modules[pname]
  163. assert parent.__name__ == pname
  164. self.msgout(4, "determine_parent ->", parent)
  165. return parent
  166. self.msgout(4, "determine_parent -> None")
  167. return None
  168. def find_head_package(self, parent, name):
  169. self.msgin(4, "find_head_package", parent, name)
  170. if '.' in name:
  171. i = name.find('.')
  172. head = name[:i]
  173. tail = name[i+1:]
  174. else:
  175. head = name
  176. tail = ""
  177. if parent:
  178. qname = "%s.%s" % (parent.__name__, head)
  179. else:
  180. qname = head
  181. q = self.import_module(head, qname, parent)
  182. if q:
  183. self.msgout(4, "find_head_package ->", (q, tail))
  184. return q, tail
  185. if parent:
  186. qname = head
  187. parent = None
  188. q = self.import_module(head, qname, parent)
  189. if q:
  190. self.msgout(4, "find_head_package ->", (q, tail))
  191. return q, tail
  192. self.msgout(4, "raise ImportError: No module named", qname)
  193. raise ImportError("No module named " + qname)
  194. def load_tail(self, q, tail):
  195. self.msgin(4, "load_tail", q, tail)
  196. m = q
  197. while tail:
  198. i = tail.find('.')
  199. if i < 0: i = len(tail)
  200. head, tail = tail[:i], tail[i+1:]
  201. mname = "%s.%s" % (m.__name__, head)
  202. m = self.import_module(head, mname, m)
  203. if not m:
  204. self.msgout(4, "raise ImportError: No module named", mname)
  205. raise ImportError("No module named " + mname)
  206. self.msgout(4, "load_tail ->", m)
  207. return m
  208. def ensure_fromlist(self, m, fromlist, recursive=0):
  209. self.msg(4, "ensure_fromlist", m, fromlist, recursive)
  210. for sub in fromlist:
  211. if sub == "*":
  212. if not recursive:
  213. all = self.find_all_submodules(m)
  214. if all:
  215. self.ensure_fromlist(m, all, 1)
  216. elif not hasattr(m, sub):
  217. subname = "%s.%s" % (m.__name__, sub)
  218. submod = self.import_module(sub, subname, m)
  219. if not submod:
  220. raise ImportError("No module named " + subname)
  221. def find_all_submodules(self, m):
  222. if not m.__path__:
  223. return
  224. modules = {}
  225. # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
  226. # But we must also collect Python extension modules - although
  227. # we cannot separate normal dlls from Python extensions.
  228. suffixes = []
  229. suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
  230. suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
  231. suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
  232. for dir in m.__path__:
  233. try:
  234. names = os.listdir(dir)
  235. except OSError:
  236. self.msg(2, "can't list directory", dir)
  237. continue
  238. for name in names:
  239. mod = None
  240. for suff in suffixes:
  241. n = len(suff)
  242. if name[-n:] == suff:
  243. mod = name[:-n]
  244. break
  245. if mod and mod != "__init__":
  246. modules[mod] = mod
  247. return modules.keys()
  248. def import_module(self, partname, fqname, parent):
  249. self.msgin(3, "import_module", partname, fqname, parent)
  250. try:
  251. m = self.modules[fqname]
  252. except KeyError:
  253. pass
  254. else:
  255. self.msgout(3, "import_module ->", m)
  256. return m
  257. if fqname in self.badmodules:
  258. self.msgout(3, "import_module -> None")
  259. return None
  260. if parent and parent.__path__ is None:
  261. self.msgout(3, "import_module -> None")
  262. return None
  263. try:
  264. fp, pathname, stuff = self.find_module(partname,
  265. parent and parent.__path__, parent)
  266. except ImportError:
  267. self.msgout(3, "import_module ->", None)
  268. return None
  269. try:
  270. m = self.load_module(fqname, fp, pathname, stuff)
  271. finally:
  272. if fp:
  273. fp.close()
  274. if parent:
  275. setattr(parent, partname, m)
  276. self.msgout(3, "import_module ->", m)
  277. return m
  278. def load_module(self, fqname, fp, pathname, file_info):
  279. suffix, mode, type = file_info
  280. self.msgin(2, "load_module", fqname, fp and "fp", pathname)
  281. if type == _PKG_DIRECTORY:
  282. m = self.load_package(fqname, pathname)
  283. self.msgout(2, "load_module ->", m)
  284. return m
  285. if type == _PY_SOURCE:
  286. co = compile(fp.read(), pathname, 'exec')
  287. elif type == _PY_COMPILED:
  288. try:
  289. data = fp.read()
  290. importlib._bootstrap_external._classify_pyc(data, fqname, {})
  291. except ImportError as exc:
  292. self.msgout(2, "raise ImportError: " + str(exc), pathname)
  293. raise
  294. co = marshal.loads(memoryview(data)[16:])
  295. else:
  296. co = None
  297. m = self.add_module(fqname)
  298. m.__file__ = pathname
  299. if co:
  300. if self.replace_paths:
  301. co = self.replace_paths_in_code(co)
  302. m.__code__ = co
  303. self.scan_code(co, m)
  304. self.msgout(2, "load_module ->", m)
  305. return m
  306. def _add_badmodule(self, name, caller):
  307. if name not in self.badmodules:
  308. self.badmodules[name] = {}
  309. if caller:
  310. self.badmodules[name][caller.__name__] = 1
  311. else:
  312. self.badmodules[name]["-"] = 1
  313. def _safe_import_hook(self, name, caller, fromlist, level=-1):
  314. # wrapper for self.import_hook() that won't raise ImportError
  315. if name in self.badmodules:
  316. self._add_badmodule(name, caller)
  317. return
  318. try:
  319. self.import_hook(name, caller, level=level)
  320. except ImportError as msg:
  321. self.msg(2, "ImportError:", str(msg))
  322. self._add_badmodule(name, caller)
  323. except SyntaxError as msg:
  324. self.msg(2, "SyntaxError:", str(msg))
  325. self._add_badmodule(name, caller)
  326. else:
  327. if fromlist:
  328. for sub in fromlist:
  329. fullname = name + "." + sub
  330. if fullname in self.badmodules:
  331. self._add_badmodule(fullname, caller)
  332. continue
  333. try:
  334. self.import_hook(name, caller, [sub], level=level)
  335. except ImportError as msg:
  336. self.msg(2, "ImportError:", str(msg))
  337. self._add_badmodule(fullname, caller)
  338. def scan_opcodes(self, co):
  339. # Scan the code, and yield 'interesting' opcode combinations
  340. for name in dis._find_store_names(co):
  341. yield "store", (name,)
  342. for name, level, fromlist in dis._find_imports(co):
  343. if level == 0: # absolute import
  344. yield "absolute_import", (fromlist, name)
  345. else: # relative import
  346. yield "relative_import", (level, fromlist, name)
  347. def scan_code(self, co, m):
  348. code = co.co_code
  349. scanner = self.scan_opcodes
  350. for what, args in scanner(co):
  351. if what == "store":
  352. name, = args
  353. m.globalnames[name] = 1
  354. elif what == "absolute_import":
  355. fromlist, name = args
  356. have_star = 0
  357. if fromlist is not None:
  358. if "*" in fromlist:
  359. have_star = 1
  360. fromlist = [f for f in fromlist if f != "*"]
  361. self._safe_import_hook(name, m, fromlist, level=0)
  362. if have_star:
  363. # We've encountered an "import *". If it is a Python module,
  364. # the code has already been parsed and we can suck out the
  365. # global names.
  366. mm = None
  367. if m.__path__:
  368. # At this point we don't know whether 'name' is a
  369. # submodule of 'm' or a global module. Let's just try
  370. # the full name first.
  371. mm = self.modules.get(m.__name__ + "." + name)
  372. if mm is None:
  373. mm = self.modules.get(name)
  374. if mm is not None:
  375. m.globalnames.update(mm.globalnames)
  376. m.starimports.update(mm.starimports)
  377. if mm.__code__ is None:
  378. m.starimports[name] = 1
  379. else:
  380. m.starimports[name] = 1
  381. elif what == "relative_import":
  382. level, fromlist, name = args
  383. if name:
  384. self._safe_import_hook(name, m, fromlist, level=level)
  385. else:
  386. parent = self.determine_parent(m, level=level)
  387. self._safe_import_hook(parent.__name__, None, fromlist, level=0)
  388. else:
  389. # We don't expect anything else from the generator.
  390. raise RuntimeError(what)
  391. for c in co.co_consts:
  392. if isinstance(c, type(co)):
  393. self.scan_code(c, m)
  394. def load_package(self, fqname, pathname):
  395. self.msgin(2, "load_package", fqname, pathname)
  396. newname = replacePackageMap.get(fqname)
  397. if newname:
  398. fqname = newname
  399. m = self.add_module(fqname)
  400. m.__file__ = pathname
  401. m.__path__ = [pathname]
  402. # As per comment at top of file, simulate runtime __path__ additions.
  403. m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
  404. fp, buf, stuff = self.find_module("__init__", m.__path__)
  405. try:
  406. self.load_module(fqname, fp, buf, stuff)
  407. self.msgout(2, "load_package ->", m)
  408. return m
  409. finally:
  410. if fp:
  411. fp.close()
  412. def add_module(self, fqname):
  413. if fqname in self.modules:
  414. return self.modules[fqname]
  415. self.modules[fqname] = m = Module(fqname)
  416. return m
  417. def find_module(self, name, path, parent=None):
  418. if parent is not None:
  419. # assert path is not None
  420. fullname = parent.__name__+'.'+name
  421. else:
  422. fullname = name
  423. if fullname in self.excludes:
  424. self.msgout(3, "find_module -> Excluded", fullname)
  425. raise ImportError(name)
  426. if path is None:
  427. if name in sys.builtin_module_names:
  428. return (None, None, ("", "", _C_BUILTIN))
  429. path = self.path
  430. return _find_module(name, path)
  431. def report(self):
  432. """Print a report to stdout, listing the found modules with their
  433. paths, as well as modules that are missing, or seem to be missing.
  434. """
  435. print()
  436. print(" %-25s %s" % ("Name", "File"))
  437. print(" %-25s %s" % ("----", "----"))
  438. # Print modules found
  439. keys = sorted(self.modules.keys())
  440. for key in keys:
  441. m = self.modules[key]
  442. if m.__path__:
  443. print("P", end=' ')
  444. else:
  445. print("m", end=' ')
  446. print("%-25s" % key, m.__file__ or "")
  447. # Print missing modules
  448. missing, maybe = self.any_missing_maybe()
  449. if missing:
  450. print()
  451. print("Missing modules:")
  452. for name in missing:
  453. mods = sorted(self.badmodules[name].keys())
  454. print("?", name, "imported from", ', '.join(mods))
  455. # Print modules that may be missing, but then again, maybe not...
  456. if maybe:
  457. print()
  458. print("Submodules that appear to be missing, but could also be", end=' ')
  459. print("global names in the parent package:")
  460. for name in maybe:
  461. mods = sorted(self.badmodules[name].keys())
  462. print("?", name, "imported from", ', '.join(mods))
  463. def any_missing(self):
  464. """Return a list of modules that appear to be missing. Use
  465. any_missing_maybe() if you want to know which modules are
  466. certain to be missing, and which *may* be missing.
  467. """
  468. missing, maybe = self.any_missing_maybe()
  469. return missing + maybe
  470. def any_missing_maybe(self):
  471. """Return two lists, one with modules that are certainly missing
  472. and one with modules that *may* be missing. The latter names could
  473. either be submodules *or* just global names in the package.
  474. The reason it can't always be determined is that it's impossible to
  475. tell which names are imported when "from module import *" is done
  476. with an extension module, short of actually importing it.
  477. """
  478. missing = []
  479. maybe = []
  480. for name in self.badmodules:
  481. if name in self.excludes:
  482. continue
  483. i = name.rfind(".")
  484. if i < 0:
  485. missing.append(name)
  486. continue
  487. subname = name[i+1:]
  488. pkgname = name[:i]
  489. pkg = self.modules.get(pkgname)
  490. if pkg is not None:
  491. if pkgname in self.badmodules[name]:
  492. # The package tried to import this module itself and
  493. # failed. It's definitely missing.
  494. missing.append(name)
  495. elif subname in pkg.globalnames:
  496. # It's a global in the package: definitely not missing.
  497. pass
  498. elif pkg.starimports:
  499. # It could be missing, but the package did an "import *"
  500. # from a non-Python module, so we simply can't be sure.
  501. maybe.append(name)
  502. else:
  503. # It's not a global in the package, the package didn't
  504. # do funny star imports, it's very likely to be missing.
  505. # The symbol could be inserted into the package from the
  506. # outside, but since that's not good style we simply list
  507. # it missing.
  508. missing.append(name)
  509. else:
  510. missing.append(name)
  511. missing.sort()
  512. maybe.sort()
  513. return missing, maybe
  514. def replace_paths_in_code(self, co):
  515. new_filename = original_filename = os.path.normpath(co.co_filename)
  516. for f, r in self.replace_paths:
  517. if original_filename.startswith(f):
  518. new_filename = r + original_filename[len(f):]
  519. break
  520. if self.debug and original_filename not in self.processed_paths:
  521. if new_filename != original_filename:
  522. self.msgout(2, "co_filename %r changed to %r" \
  523. % (original_filename,new_filename,))
  524. else:
  525. self.msgout(2, "co_filename %r remains unchanged" \
  526. % (original_filename,))
  527. self.processed_paths.append(original_filename)
  528. consts = list(co.co_consts)
  529. for i in range(len(consts)):
  530. if isinstance(consts[i], type(co)):
  531. consts[i] = self.replace_paths_in_code(consts[i])
  532. return co.replace(co_consts=tuple(consts), co_filename=new_filename)
  533. def test():
  534. # Parse command line
  535. import getopt
  536. try:
  537. opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
  538. except getopt.error as msg:
  539. print(msg)
  540. return
  541. # Process options
  542. debug = 1
  543. domods = 0
  544. addpath = []
  545. exclude = []
  546. for o, a in opts:
  547. if o == '-d':
  548. debug = debug + 1
  549. if o == '-m':
  550. domods = 1
  551. if o == '-p':
  552. addpath = addpath + a.split(os.pathsep)
  553. if o == '-q':
  554. debug = 0
  555. if o == '-x':
  556. exclude.append(a)
  557. # Provide default arguments
  558. if not args:
  559. script = "hello.py"
  560. else:
  561. script = args[0]
  562. # Set the path based on sys.path and the script directory
  563. path = sys.path[:]
  564. path[0] = os.path.dirname(script)
  565. path = addpath + path
  566. if debug > 1:
  567. print("path:")
  568. for item in path:
  569. print(" ", repr(item))
  570. # Create the module finder and turn its crank
  571. mf = ModuleFinder(path, debug, exclude)
  572. for arg in args[1:]:
  573. if arg == '-m':
  574. domods = 1
  575. continue
  576. if domods:
  577. if arg[-2:] == '.*':
  578. mf.import_hook(arg[:-2], None, ["*"])
  579. else:
  580. mf.import_hook(arg)
  581. else:
  582. mf.load_file(arg)
  583. mf.run_script(script)
  584. mf.report()
  585. return mf # for -i debugging
  586. if __name__ == '__main__':
  587. try:
  588. mf = test()
  589. except KeyboardInterrupt:
  590. print("\n[interrupted]")