123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255 |
- #=======================================================================
- #
- # Python Lexical Analyser
- #
- # Classes for building NFAs and DFAs
- #
- #=======================================================================
- from __future__ import absolute_import
- import sys
- from .Transitions import TransitionMap
- try:
- from sys import maxsize as maxint
- except ImportError:
- from sys import maxint
- try:
- unichr
- except NameError:
- unichr = chr
- LOWEST_PRIORITY = -maxint
- class Machine(object):
- """A collection of Nodes representing an NFA or DFA."""
- states = None # [Node]
- next_state_number = 1
- initial_states = None # {(name, bol): Node}
- def __init__(self):
- self.states = []
- self.initial_states = {}
- def __del__(self):
- #print "Destroying", self ###
- for state in self.states:
- state.destroy()
- def new_state(self):
- """Add a new state to the machine and return it."""
- s = Node()
- n = self.next_state_number
- self.next_state_number = n + 1
- s.number = n
- self.states.append(s)
- return s
- def new_initial_state(self, name):
- state = self.new_state()
- self.make_initial_state(name, state)
- return state
- def make_initial_state(self, name, state):
- self.initial_states[name] = state
- def get_initial_state(self, name):
- return self.initial_states[name]
- def dump(self, file):
- file.write("Plex.Machine:\n")
- if self.initial_states is not None:
- file.write(" Initial states:\n")
- for (name, state) in sorted(self.initial_states.items()):
- file.write(" '%s': %d\n" % (name, state.number))
- for s in self.states:
- s.dump(file)
- class Node(object):
- """A state of an NFA or DFA."""
- transitions = None # TransitionMap
- action = None # Action
- action_priority = None # integer
- number = 0 # for debug output
- epsilon_closure = None # used by nfa_to_dfa()
- def __init__(self):
- # Preinitialise the list of empty transitions, because
- # the nfa-to-dfa algorithm needs it
- #self.transitions = {'':[]}
- self.transitions = TransitionMap()
- self.action_priority = LOWEST_PRIORITY
- def destroy(self):
- #print "Destroying", self ###
- self.transitions = None
- self.action = None
- self.epsilon_closure = None
- def add_transition(self, event, new_state):
- self.transitions.add(event, new_state)
- def link_to(self, state):
- """Add an epsilon-move from this state to another state."""
- self.add_transition('', state)
- def set_action(self, action, priority):
- """Make this an accepting state with the given action. If
- there is already an action, choose the action with highest
- priority."""
- if priority > self.action_priority:
- self.action = action
- self.action_priority = priority
- def get_action(self):
- return self.action
- def get_action_priority(self):
- return self.action_priority
- def is_accepting(self):
- return self.action is not None
- def __str__(self):
- return "State %d" % self.number
- def dump(self, file):
- # Header
- file.write(" State %d:\n" % self.number)
- # Transitions
- # self.dump_transitions(file)
- self.transitions.dump(file)
- # Action
- action = self.action
- priority = self.action_priority
- if action is not None:
- file.write(" %s [priority %d]\n" % (action, priority))
- def __lt__(self, other):
- return self.number < other.number
- class FastMachine(object):
- """
- FastMachine is a deterministic machine represented in a way that
- allows fast scanning.
- """
- initial_states = None # {state_name:state}
- states = None # [state] where state = {event:state, 'else':state, 'action':Action}
- next_number = 1 # for debugging
- new_state_template = {
- '': None, 'bol': None, 'eol': None, 'eof': None, 'else': None
- }
- def __init__(self):
- self.initial_states = {}
- self.states = []
- def __del__(self):
- for state in self.states:
- state.clear()
- def new_state(self, action=None):
- number = self.next_number
- self.next_number = number + 1
- result = self.new_state_template.copy()
- result['number'] = number
- result['action'] = action
- self.states.append(result)
- return result
- def make_initial_state(self, name, state):
- self.initial_states[name] = state
- def add_transitions(self, state, event, new_state, maxint=maxint):
- if type(event) is tuple:
- code0, code1 = event
- if code0 == -maxint:
- state['else'] = new_state
- elif code1 != maxint:
- while code0 < code1:
- state[unichr(code0)] = new_state
- code0 += 1
- else:
- state[event] = new_state
- def get_initial_state(self, name):
- return self.initial_states[name]
- def dump(self, file):
- file.write("Plex.FastMachine:\n")
- file.write(" Initial states:\n")
- for name, state in sorted(self.initial_states.items()):
- file.write(" %s: %s\n" % (repr(name), state['number']))
- for state in self.states:
- self.dump_state(state, file)
- def dump_state(self, state, file):
- # Header
- file.write(" State %d:\n" % state['number'])
- # Transitions
- self.dump_transitions(state, file)
- # Action
- action = state['action']
- if action is not None:
- file.write(" %s\n" % action)
- def dump_transitions(self, state, file):
- chars_leading_to_state = {}
- special_to_state = {}
- for (c, s) in state.items():
- if len(c) == 1:
- chars = chars_leading_to_state.get(id(s), None)
- if chars is None:
- chars = []
- chars_leading_to_state[id(s)] = chars
- chars.append(c)
- elif len(c) <= 4:
- special_to_state[c] = s
- ranges_to_state = {}
- for state in self.states:
- char_list = chars_leading_to_state.get(id(state), None)
- if char_list:
- ranges = self.chars_to_ranges(char_list)
- ranges_to_state[ranges] = state
- ranges_list = ranges_to_state.keys()
- ranges_list.sort()
- for ranges in ranges_list:
- key = self.ranges_to_string(ranges)
- state = ranges_to_state[ranges]
- file.write(" %s --> State %d\n" % (key, state['number']))
- for key in ('bol', 'eol', 'eof', 'else'):
- state = special_to_state.get(key, None)
- if state:
- file.write(" %s --> State %d\n" % (key, state['number']))
- def chars_to_ranges(self, char_list):
- char_list.sort()
- i = 0
- n = len(char_list)
- result = []
- while i < n:
- c1 = ord(char_list[i])
- c2 = c1
- i += 1
- while i < n and ord(char_list[i]) == c2 + 1:
- i += 1
- c2 += 1
- result.append((chr(c1), chr(c2)))
- return tuple(result)
- def ranges_to_string(self, range_list):
- return ','.join(map(self.range_to_string, range_list))
- def range_to_string(self, range_tuple):
- (c1, c2) = range_tuple
- if c1 == c2:
- return repr(c1)
- else:
- return "%s..%s" % (repr(c1), repr(c2))
|