123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- """A bottom-up tree matching algorithm implementation meant to speed
- up 2to3's matching process. After the tree patterns are reduced to
- their rarest linear path, a linear Aho-Corasick automaton is
- created. The linear automaton traverses the linear paths from the
- leaves to the root of the AST and returns a set of nodes for further
- matching. This reduces significantly the number of candidate nodes."""
- __author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
- import logging
- import itertools
- from collections import defaultdict
- from . import pytree
- from .btm_utils import reduce_tree
- class BMNode(object):
- """Class for a node of the Aho-Corasick automaton used in matching"""
- count = itertools.count()
- def __init__(self):
- self.transition_table = {}
- self.fixers = []
- self.id = next(BMNode.count)
- self.content = ''
- class BottomMatcher(object):
- """The main matcher class. After instantiating the patterns should
- be added using the add_fixer method"""
- def __init__(self):
- self.match = set()
- self.root = BMNode()
- self.nodes = [self.root]
- self.fixers = []
- self.logger = logging.getLogger("RefactoringTool")
- def add_fixer(self, fixer):
- """Reduces a fixer's pattern tree to a linear path and adds it
- to the matcher(a common Aho-Corasick automaton). The fixer is
- appended on the matching states and called when they are
- reached"""
- self.fixers.append(fixer)
- tree = reduce_tree(fixer.pattern_tree)
- linear = tree.get_linear_subpattern()
- match_nodes = self.add(linear, start=self.root)
- for match_node in match_nodes:
- match_node.fixers.append(fixer)
- def add(self, pattern, start):
- "Recursively adds a linear pattern to the AC automaton"
- #print("adding pattern", pattern, "to", start)
- if not pattern:
- #print("empty pattern")
- return [start]
- if isinstance(pattern[0], tuple):
- #alternatives
- #print("alternatives")
- match_nodes = []
- for alternative in pattern[0]:
- #add all alternatives, and add the rest of the pattern
- #to each end node
- end_nodes = self.add(alternative, start=start)
- for end in end_nodes:
- match_nodes.extend(self.add(pattern[1:], end))
- return match_nodes
- else:
- #single token
- #not last
- if pattern[0] not in start.transition_table:
- #transition did not exist, create new
- next_node = BMNode()
- start.transition_table[pattern[0]] = next_node
- else:
- #transition exists already, follow
- next_node = start.transition_table[pattern[0]]
- if pattern[1:]:
- end_nodes = self.add(pattern[1:], start=next_node)
- else:
- end_nodes = [next_node]
- return end_nodes
- def run(self, leaves):
- """The main interface with the bottom matcher. The tree is
- traversed from the bottom using the constructed
- automaton. Nodes are only checked once as the tree is
- retraversed. When the automaton fails, we give it one more
- shot(in case the above tree matches as a whole with the
- rejected leaf), then we break for the next leaf. There is the
- special case of multiple arguments(see code comments) where we
- recheck the nodes
- Args:
- The leaves of the AST tree to be matched
- Returns:
- A dictionary of node matches with fixers as the keys
- """
- current_ac_node = self.root
- results = defaultdict(list)
- for leaf in leaves:
- current_ast_node = leaf
- while current_ast_node:
- current_ast_node.was_checked = True
- for child in current_ast_node.children:
- # multiple statements, recheck
- if isinstance(child, pytree.Leaf) and child.value == ";":
- current_ast_node.was_checked = False
- break
- if current_ast_node.type == 1:
- #name
- node_token = current_ast_node.value
- else:
- node_token = current_ast_node.type
- if node_token in current_ac_node.transition_table:
- #token matches
- current_ac_node = current_ac_node.transition_table[node_token]
- for fixer in current_ac_node.fixers:
- results[fixer].append(current_ast_node)
- else:
- #matching failed, reset automaton
- current_ac_node = self.root
- if (current_ast_node.parent is not None
- and current_ast_node.parent.was_checked):
- #the rest of the tree upwards has been checked, next leaf
- break
- #recheck the rejected node once from the root
- if node_token in current_ac_node.transition_table:
- #token matches
- current_ac_node = current_ac_node.transition_table[node_token]
- for fixer in current_ac_node.fixers:
- results[fixer].append(current_ast_node)
- current_ast_node = current_ast_node.parent
- return results
- def print_ac(self):
- "Prints a graphviz diagram of the BM automaton(for debugging)"
- print("digraph g{")
- def print_node(node):
- for subnode_key in node.transition_table.keys():
- subnode = node.transition_table[subnode_key]
- print("%d -> %d [label=%s] //%s" %
- (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
- if subnode_key == 1:
- print(subnode.content)
- print_node(subnode)
- print_node(self.root)
- print("}")
- # taken from pytree.py for debugging; only used by print_ac
- _type_reprs = {}
- def type_repr(type_num):
- global _type_reprs
- if not _type_reprs:
- from .pygram import python_symbols
- # printing tokens is possible but not as useful
- # from .pgen2 import token // token.__dict__.items():
- for name, val in python_symbols.__dict__.items():
- if type(val) == int: _type_reprs[val] = name
- return _type_reprs.setdefault(type_num, type_num)
|