graphlib.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. from types import GenericAlias
  2. __all__ = ["TopologicalSorter", "CycleError"]
  3. _NODE_OUT = -1
  4. _NODE_DONE = -2
  5. class _NodeInfo:
  6. __slots__ = "node", "npredecessors", "successors"
  7. def __init__(self, node):
  8. # The node this class is augmenting.
  9. self.node = node
  10. # Number of predecessors, generally >= 0. When this value falls to 0,
  11. # and is returned by get_ready(), this is set to _NODE_OUT and when the
  12. # node is marked done by a call to done(), set to _NODE_DONE.
  13. self.npredecessors = 0
  14. # List of successor nodes. The list can contain duplicated elements as
  15. # long as they're all reflected in the successor's npredecessors attribute.
  16. self.successors = []
  17. class CycleError(ValueError):
  18. """Subclass of ValueError raised by TopologicalSorter.prepare if cycles
  19. exist in the working graph.
  20. If multiple cycles exist, only one undefined choice among them will be reported
  21. and included in the exception. The detected cycle can be accessed via the second
  22. element in the *args* attribute of the exception instance and consists in a list
  23. of nodes, such that each node is, in the graph, an immediate predecessor of the
  24. next node in the list. In the reported list, the first and the last node will be
  25. the same, to make it clear that it is cyclic.
  26. """
  27. pass
  28. class TopologicalSorter:
  29. """Provides functionality to topologically sort a graph of hashable nodes"""
  30. def __init__(self, graph=None):
  31. self._node2info = {}
  32. self._ready_nodes = None
  33. self._npassedout = 0
  34. self._nfinished = 0
  35. if graph is not None:
  36. for node, predecessors in graph.items():
  37. self.add(node, *predecessors)
  38. def _get_nodeinfo(self, node):
  39. if (result := self._node2info.get(node)) is None:
  40. self._node2info[node] = result = _NodeInfo(node)
  41. return result
  42. def add(self, node, *predecessors):
  43. """Add a new node and its predecessors to the graph.
  44. Both the *node* and all elements in *predecessors* must be hashable.
  45. If called multiple times with the same node argument, the set of dependencies
  46. will be the union of all dependencies passed in.
  47. It is possible to add a node with no dependencies (*predecessors* is not provided)
  48. as well as provide a dependency twice. If a node that has not been provided before
  49. is included among *predecessors* it will be automatically added to the graph with
  50. no predecessors of its own.
  51. Raises ValueError if called after "prepare".
  52. """
  53. if self._ready_nodes is not None:
  54. raise ValueError("Nodes cannot be added after a call to prepare()")
  55. # Create the node -> predecessor edges
  56. nodeinfo = self._get_nodeinfo(node)
  57. nodeinfo.npredecessors += len(predecessors)
  58. # Create the predecessor -> node edges
  59. for pred in predecessors:
  60. pred_info = self._get_nodeinfo(pred)
  61. pred_info.successors.append(node)
  62. def prepare(self):
  63. """Mark the graph as finished and check for cycles in the graph.
  64. If any cycle is detected, "CycleError" will be raised, but "get_ready" can
  65. still be used to obtain as many nodes as possible until cycles block more
  66. progress. After a call to this function, the graph cannot be modified and
  67. therefore no more nodes can be added using "add".
  68. """
  69. if self._ready_nodes is not None:
  70. raise ValueError("cannot prepare() more than once")
  71. self._ready_nodes = [
  72. i.node for i in self._node2info.values() if i.npredecessors == 0
  73. ]
  74. # ready_nodes is set before we look for cycles on purpose:
  75. # if the user wants to catch the CycleError, that's fine,
  76. # they can continue using the instance to grab as many
  77. # nodes as possible before cycles block more progress
  78. cycle = self._find_cycle()
  79. if cycle:
  80. raise CycleError(f"nodes are in a cycle", cycle)
  81. def get_ready(self):
  82. """Return a tuple of all the nodes that are ready.
  83. Initially it returns all nodes with no predecessors; once those are marked
  84. as processed by calling "done", further calls will return all new nodes that
  85. have all their predecessors already processed. Once no more progress can be made,
  86. empty tuples are returned.
  87. Raises ValueError if called without calling "prepare" previously.
  88. """
  89. if self._ready_nodes is None:
  90. raise ValueError("prepare() must be called first")
  91. # Get the nodes that are ready and mark them
  92. result = tuple(self._ready_nodes)
  93. n2i = self._node2info
  94. for node in result:
  95. n2i[node].npredecessors = _NODE_OUT
  96. # Clean the list of nodes that are ready and update
  97. # the counter of nodes that we have returned.
  98. self._ready_nodes.clear()
  99. self._npassedout += len(result)
  100. return result
  101. def is_active(self):
  102. """Return ``True`` if more progress can be made and ``False`` otherwise.
  103. Progress can be made if cycles do not block the resolution and either there
  104. are still nodes ready that haven't yet been returned by "get_ready" or the
  105. number of nodes marked "done" is less than the number that have been returned
  106. by "get_ready".
  107. Raises ValueError if called without calling "prepare" previously.
  108. """
  109. if self._ready_nodes is None:
  110. raise ValueError("prepare() must be called first")
  111. return self._nfinished < self._npassedout or bool(self._ready_nodes)
  112. def __bool__(self):
  113. return self.is_active()
  114. def done(self, *nodes):
  115. """Marks a set of nodes returned by "get_ready" as processed.
  116. This method unblocks any successor of each node in *nodes* for being returned
  117. in the future by a call to "get_ready".
  118. Raises :exec:`ValueError` if any node in *nodes* has already been marked as
  119. processed by a previous call to this method, if a node was not added to the
  120. graph by using "add" or if called without calling "prepare" previously or if
  121. node has not yet been returned by "get_ready".
  122. """
  123. if self._ready_nodes is None:
  124. raise ValueError("prepare() must be called first")
  125. n2i = self._node2info
  126. for node in nodes:
  127. # Check if we know about this node (it was added previously using add()
  128. if (nodeinfo := n2i.get(node)) is None:
  129. raise ValueError(f"node {node!r} was not added using add()")
  130. # If the node has not being returned (marked as ready) previously, inform the user.
  131. stat = nodeinfo.npredecessors
  132. if stat != _NODE_OUT:
  133. if stat >= 0:
  134. raise ValueError(
  135. f"node {node!r} was not passed out (still not ready)"
  136. )
  137. elif stat == _NODE_DONE:
  138. raise ValueError(f"node {node!r} was already marked done")
  139. else:
  140. assert False, f"node {node!r}: unknown status {stat}"
  141. # Mark the node as processed
  142. nodeinfo.npredecessors = _NODE_DONE
  143. # Go to all the successors and reduce the number of predecessors, collecting all the ones
  144. # that are ready to be returned in the next get_ready() call.
  145. for successor in nodeinfo.successors:
  146. successor_info = n2i[successor]
  147. successor_info.npredecessors -= 1
  148. if successor_info.npredecessors == 0:
  149. self._ready_nodes.append(successor)
  150. self._nfinished += 1
  151. def _find_cycle(self):
  152. n2i = self._node2info
  153. stack = []
  154. itstack = []
  155. seen = set()
  156. node2stacki = {}
  157. for node in n2i:
  158. if node in seen:
  159. continue
  160. while True:
  161. if node in seen:
  162. # If we have seen already the node and is in the
  163. # current stack we have found a cycle.
  164. if node in node2stacki:
  165. return stack[node2stacki[node] :] + [node]
  166. # else go on to get next successor
  167. else:
  168. seen.add(node)
  169. itstack.append(iter(n2i[node].successors).__next__)
  170. node2stacki[node] = len(stack)
  171. stack.append(node)
  172. # Backtrack to the topmost stack entry with
  173. # at least another successor.
  174. while stack:
  175. try:
  176. node = itstack[-1]()
  177. break
  178. except StopIteration:
  179. del node2stacki[stack.pop()]
  180. itstack.pop()
  181. else:
  182. break
  183. return None
  184. def static_order(self):
  185. """Returns an iterable of nodes in a topological order.
  186. The particular order that is returned may depend on the specific
  187. order in which the items were inserted in the graph.
  188. Using this method does not require to call "prepare" or "done". If any
  189. cycle is detected, :exc:`CycleError` will be raised.
  190. """
  191. self.prepare()
  192. while self.is_active():
  193. node_group = self.get_ready()
  194. yield from node_group
  195. self.done(*node_group)
  196. __class_getitem__ = classmethod(GenericAlias)