pyclbr.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. """Parse a Python module and describe its classes and functions.
  2. Parse enough of a Python file to recognize imports and class and
  3. function definitions, and to find out the superclasses of a class.
  4. The interface consists of a single function:
  5. readmodule_ex(module, path=None)
  6. where module is the name of a Python module, and path is an optional
  7. list of directories where the module is to be searched. If present,
  8. path is prepended to the system search path sys.path. The return value
  9. is a dictionary. The keys of the dictionary are the names of the
  10. classes and functions defined in the module (including classes that are
  11. defined via the from XXX import YYY construct). The values are
  12. instances of classes Class and Function. One special key/value pair is
  13. present for packages: the key '__path__' has a list as its value which
  14. contains the package search path.
  15. Classes and Functions have a common superclass: _Object. Every instance
  16. has the following attributes:
  17. module -- name of the module;
  18. name -- name of the object;
  19. file -- file in which the object is defined;
  20. lineno -- line in the file where the object's definition starts;
  21. end_lineno -- line in the file where the object's definition ends;
  22. parent -- parent of this object, if any;
  23. children -- nested objects contained in this object.
  24. The 'children' attribute is a dictionary mapping names to objects.
  25. Instances of Function describe functions with the attributes from _Object,
  26. plus the following:
  27. is_async -- if a function is defined with an 'async' prefix
  28. Instances of Class describe classes with the attributes from _Object,
  29. plus the following:
  30. super -- list of super classes (Class instances if possible);
  31. methods -- mapping of method names to beginning line numbers.
  32. If the name of a super class is not recognized, the corresponding
  33. entry in the list of super classes is not a class instance but a
  34. string giving the name of the super class. Since import statements
  35. are recognized and imported modules are scanned as well, this
  36. shouldn't happen often.
  37. """
  38. import ast
  39. import sys
  40. import importlib.util
  41. __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
  42. _modules = {} # Initialize cache of modules we've seen.
  43. class _Object:
  44. "Information about Python class or function."
  45. def __init__(self, module, name, file, lineno, end_lineno, parent):
  46. self.module = module
  47. self.name = name
  48. self.file = file
  49. self.lineno = lineno
  50. self.end_lineno = end_lineno
  51. self.parent = parent
  52. self.children = {}
  53. if parent is not None:
  54. parent.children[name] = self
  55. # Odd Function and Class signatures are for back-compatibility.
  56. class Function(_Object):
  57. "Information about a Python function, including methods."
  58. def __init__(self, module, name, file, lineno,
  59. parent=None, is_async=False, *, end_lineno=None):
  60. super().__init__(module, name, file, lineno, end_lineno, parent)
  61. self.is_async = is_async
  62. if isinstance(parent, Class):
  63. parent.methods[name] = lineno
  64. class Class(_Object):
  65. "Information about a Python class."
  66. def __init__(self, module, name, super_, file, lineno,
  67. parent=None, *, end_lineno=None):
  68. super().__init__(module, name, file, lineno, end_lineno, parent)
  69. self.super = super_ or []
  70. self.methods = {}
  71. # These 2 functions are used in these tests
  72. # Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py
  73. def _nest_function(ob, func_name, lineno, end_lineno, is_async=False):
  74. "Return a Function after nesting within ob."
  75. return Function(ob.module, func_name, ob.file, lineno,
  76. parent=ob, is_async=is_async, end_lineno=end_lineno)
  77. def _nest_class(ob, class_name, lineno, end_lineno, super=None):
  78. "Return a Class after nesting within ob."
  79. return Class(ob.module, class_name, super, ob.file, lineno,
  80. parent=ob, end_lineno=end_lineno)
  81. def readmodule(module, path=None):
  82. """Return Class objects for the top-level classes in module.
  83. This is the original interface, before Functions were added.
  84. """
  85. res = {}
  86. for key, value in _readmodule(module, path or []).items():
  87. if isinstance(value, Class):
  88. res[key] = value
  89. return res
  90. def readmodule_ex(module, path=None):
  91. """Return a dictionary with all functions and classes in module.
  92. Search for module in PATH + sys.path.
  93. If possible, include imported superclasses.
  94. Do this by reading source, without importing (and executing) it.
  95. """
  96. return _readmodule(module, path or [])
  97. def _readmodule(module, path, inpackage=None):
  98. """Do the hard work for readmodule[_ex].
  99. If inpackage is given, it must be the dotted name of the package in
  100. which we are searching for a submodule, and then PATH must be the
  101. package search path; otherwise, we are searching for a top-level
  102. module, and path is combined with sys.path.
  103. """
  104. # Compute the full module name (prepending inpackage if set).
  105. if inpackage is not None:
  106. fullmodule = "%s.%s" % (inpackage, module)
  107. else:
  108. fullmodule = module
  109. # Check in the cache.
  110. if fullmodule in _modules:
  111. return _modules[fullmodule]
  112. # Initialize the dict for this module's contents.
  113. tree = {}
  114. # Check if it is a built-in module; we don't do much for these.
  115. if module in sys.builtin_module_names and inpackage is None:
  116. _modules[module] = tree
  117. return tree
  118. # Check for a dotted module name.
  119. i = module.rfind('.')
  120. if i >= 0:
  121. package = module[:i]
  122. submodule = module[i+1:]
  123. parent = _readmodule(package, path, inpackage)
  124. if inpackage is not None:
  125. package = "%s.%s" % (inpackage, package)
  126. if not '__path__' in parent:
  127. raise ImportError('No package named {}'.format(package))
  128. return _readmodule(submodule, parent['__path__'], package)
  129. # Search the path for the module.
  130. f = None
  131. if inpackage is not None:
  132. search_path = path
  133. else:
  134. search_path = path + sys.path
  135. spec = importlib.util._find_spec_from_path(fullmodule, search_path)
  136. if spec is None:
  137. raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule)
  138. _modules[fullmodule] = tree
  139. # Is module a package?
  140. if spec.submodule_search_locations is not None:
  141. tree['__path__'] = spec.submodule_search_locations
  142. try:
  143. source = spec.loader.get_source(fullmodule)
  144. except (AttributeError, ImportError):
  145. # If module is not Python source, we cannot do anything.
  146. return tree
  147. else:
  148. if source is None:
  149. return tree
  150. fname = spec.loader.get_filename(fullmodule)
  151. return _create_tree(fullmodule, path, fname, source, tree, inpackage)
  152. class _ModuleBrowser(ast.NodeVisitor):
  153. def __init__(self, module, path, file, tree, inpackage):
  154. self.path = path
  155. self.tree = tree
  156. self.file = file
  157. self.module = module
  158. self.inpackage = inpackage
  159. self.stack = []
  160. def visit_ClassDef(self, node):
  161. bases = []
  162. for base in node.bases:
  163. name = ast.unparse(base)
  164. if name in self.tree:
  165. # We know this super class.
  166. bases.append(self.tree[name])
  167. elif len(names := name.split(".")) > 1:
  168. # Super class form is module.class:
  169. # look in module for class.
  170. *_, module, class_ = names
  171. if module in _modules:
  172. bases.append(_modules[module].get(class_, name))
  173. else:
  174. bases.append(name)
  175. parent = self.stack[-1] if self.stack else None
  176. class_ = Class(self.module, node.name, bases, self.file, node.lineno,
  177. parent=parent, end_lineno=node.end_lineno)
  178. if parent is None:
  179. self.tree[node.name] = class_
  180. self.stack.append(class_)
  181. self.generic_visit(node)
  182. self.stack.pop()
  183. def visit_FunctionDef(self, node, *, is_async=False):
  184. parent = self.stack[-1] if self.stack else None
  185. function = Function(self.module, node.name, self.file, node.lineno,
  186. parent, is_async, end_lineno=node.end_lineno)
  187. if parent is None:
  188. self.tree[node.name] = function
  189. self.stack.append(function)
  190. self.generic_visit(node)
  191. self.stack.pop()
  192. def visit_AsyncFunctionDef(self, node):
  193. self.visit_FunctionDef(node, is_async=True)
  194. def visit_Import(self, node):
  195. if node.col_offset != 0:
  196. return
  197. for module in node.names:
  198. try:
  199. try:
  200. _readmodule(module.name, self.path, self.inpackage)
  201. except ImportError:
  202. _readmodule(module.name, [])
  203. except (ImportError, SyntaxError):
  204. # If we can't find or parse the imported module,
  205. # too bad -- don't die here.
  206. continue
  207. def visit_ImportFrom(self, node):
  208. if node.col_offset != 0:
  209. return
  210. try:
  211. module = "." * node.level
  212. if node.module:
  213. module += node.module
  214. module = _readmodule(module, self.path, self.inpackage)
  215. except (ImportError, SyntaxError):
  216. return
  217. for name in node.names:
  218. if name.name in module:
  219. self.tree[name.asname or name.name] = module[name.name]
  220. elif name.name == "*":
  221. for import_name, import_value in module.items():
  222. if import_name.startswith("_"):
  223. continue
  224. self.tree[import_name] = import_value
  225. def _create_tree(fullmodule, path, fname, source, tree, inpackage):
  226. mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage)
  227. mbrowser.visit(ast.parse(source))
  228. return mbrowser.tree
  229. def _main():
  230. "Print module output (default this file) for quick visual check."
  231. import os
  232. try:
  233. mod = sys.argv[1]
  234. except:
  235. mod = __file__
  236. if os.path.exists(mod):
  237. path = [os.path.dirname(mod)]
  238. mod = os.path.basename(mod)
  239. if mod.lower().endswith(".py"):
  240. mod = mod[:-3]
  241. else:
  242. path = []
  243. tree = readmodule_ex(mod, path)
  244. lineno_key = lambda a: getattr(a, 'lineno', 0)
  245. objs = sorted(tree.values(), key=lineno_key, reverse=True)
  246. indent_level = 2
  247. while objs:
  248. obj = objs.pop()
  249. if isinstance(obj, list):
  250. # Value is a __path__ key.
  251. continue
  252. if not hasattr(obj, 'indent'):
  253. obj.indent = 0
  254. if isinstance(obj, _Object):
  255. new_objs = sorted(obj.children.values(),
  256. key=lineno_key, reverse=True)
  257. for ob in new_objs:
  258. ob.indent = obj.indent + indent_level
  259. objs.extend(new_objs)
  260. if isinstance(obj, Class):
  261. print("{}class {} {} {}"
  262. .format(' ' * obj.indent, obj.name, obj.super, obj.lineno))
  263. elif isinstance(obj, Function):
  264. print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno))
  265. if __name__ == "__main__":
  266. _main()