Transitions.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. #
  2. # Plex - Transition Maps
  3. #
  4. # This version represents state sets directly as dicts for speed.
  5. #
  6. from __future__ import absolute_import
  7. try:
  8. from sys import maxsize as maxint
  9. except ImportError:
  10. from sys import maxint
  11. class TransitionMap(object):
  12. """
  13. A TransitionMap maps an input event to a set of states.
  14. An input event is one of: a range of character codes,
  15. the empty string (representing an epsilon move), or one
  16. of the special symbols BOL, EOL, EOF.
  17. For characters, this implementation compactly represents
  18. the map by means of a list:
  19. [code_0, states_0, code_1, states_1, code_2, states_2,
  20. ..., code_n-1, states_n-1, code_n]
  21. where |code_i| is a character code, and |states_i| is a
  22. set of states corresponding to characters with codes |c|
  23. in the range |code_i| <= |c| <= |code_i+1|.
  24. The following invariants hold:
  25. n >= 1
  26. code_0 == -maxint
  27. code_n == maxint
  28. code_i < code_i+1 for i in 0..n-1
  29. states_0 == states_n-1
  30. Mappings for the special events '', BOL, EOL, EOF are
  31. kept separately in a dictionary.
  32. """
  33. map = None # The list of codes and states
  34. special = None # Mapping for special events
  35. def __init__(self, map=None, special=None):
  36. if not map:
  37. map = [-maxint, {}, maxint]
  38. if not special:
  39. special = {}
  40. self.map = map
  41. self.special = special
  42. #self.check() ###
  43. def add(self, event, new_state,
  44. TupleType=tuple):
  45. """
  46. Add transition to |new_state| on |event|.
  47. """
  48. if type(event) is TupleType:
  49. code0, code1 = event
  50. i = self.split(code0)
  51. j = self.split(code1)
  52. map = self.map
  53. while i < j:
  54. map[i + 1][new_state] = 1
  55. i += 2
  56. else:
  57. self.get_special(event)[new_state] = 1
  58. def add_set(self, event, new_set,
  59. TupleType=tuple):
  60. """
  61. Add transitions to the states in |new_set| on |event|.
  62. """
  63. if type(event) is TupleType:
  64. code0, code1 = event
  65. i = self.split(code0)
  66. j = self.split(code1)
  67. map = self.map
  68. while i < j:
  69. map[i + 1].update(new_set)
  70. i += 2
  71. else:
  72. self.get_special(event).update(new_set)
  73. def get_epsilon(self,
  74. none=None):
  75. """
  76. Return the mapping for epsilon, or None.
  77. """
  78. return self.special.get('', none)
  79. def iteritems(self,
  80. len=len):
  81. """
  82. Return the mapping as an iterable of ((code1, code2), state_set) and
  83. (special_event, state_set) pairs.
  84. """
  85. result = []
  86. map = self.map
  87. else_set = map[1]
  88. i = 0
  89. n = len(map) - 1
  90. code0 = map[0]
  91. while i < n:
  92. set = map[i + 1]
  93. code1 = map[i + 2]
  94. if set or else_set:
  95. result.append(((code0, code1), set))
  96. code0 = code1
  97. i += 2
  98. for event, set in self.special.items():
  99. if set:
  100. result.append((event, set))
  101. return iter(result)
  102. items = iteritems
  103. # ------------------- Private methods --------------------
  104. def split(self, code,
  105. len=len, maxint=maxint):
  106. """
  107. Search the list for the position of the split point for |code|,
  108. inserting a new split point if necessary. Returns index |i| such
  109. that |code| == |map[i]|.
  110. """
  111. # We use a funky variation on binary search.
  112. map = self.map
  113. hi = len(map) - 1
  114. # Special case: code == map[-1]
  115. if code == maxint:
  116. return hi
  117. # General case
  118. lo = 0
  119. # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
  120. while hi - lo >= 4:
  121. # Find midpoint truncated to even index
  122. mid = ((lo + hi) // 2) & ~1
  123. if code < map[mid]:
  124. hi = mid
  125. else:
  126. lo = mid
  127. # map[lo] <= code < map[hi] and hi - lo == 2
  128. if map[lo] == code:
  129. return lo
  130. else:
  131. map[hi:hi] = [code, map[hi - 1].copy()]
  132. #self.check() ###
  133. return hi
  134. def get_special(self, event):
  135. """
  136. Get state set for special event, adding a new entry if necessary.
  137. """
  138. special = self.special
  139. set = special.get(event, None)
  140. if not set:
  141. set = {}
  142. special[event] = set
  143. return set
  144. # --------------------- Conversion methods -----------------------
  145. def __str__(self):
  146. map_strs = []
  147. map = self.map
  148. n = len(map)
  149. i = 0
  150. while i < n:
  151. code = map[i]
  152. if code == -maxint:
  153. code_str = "-inf"
  154. elif code == maxint:
  155. code_str = "inf"
  156. else:
  157. code_str = str(code)
  158. map_strs.append(code_str)
  159. i += 1
  160. if i < n:
  161. map_strs.append(state_set_str(map[i]))
  162. i += 1
  163. special_strs = {}
  164. for event, set in self.special.items():
  165. special_strs[event] = state_set_str(set)
  166. return "[%s]+%s" % (
  167. ','.join(map_strs),
  168. special_strs
  169. )
  170. # --------------------- Debugging methods -----------------------
  171. def check(self):
  172. """Check data structure integrity."""
  173. if not self.map[-3] < self.map[-1]:
  174. print(self)
  175. assert 0
  176. def dump(self, file):
  177. map = self.map
  178. i = 0
  179. n = len(map) - 1
  180. while i < n:
  181. self.dump_range(map[i], map[i + 2], map[i + 1], file)
  182. i += 2
  183. for event, set in self.special.items():
  184. if set:
  185. if not event:
  186. event = 'empty'
  187. self.dump_trans(event, set, file)
  188. def dump_range(self, code0, code1, set, file):
  189. if set:
  190. if code0 == -maxint:
  191. if code1 == maxint:
  192. k = "any"
  193. else:
  194. k = "< %s" % self.dump_char(code1)
  195. elif code1 == maxint:
  196. k = "> %s" % self.dump_char(code0 - 1)
  197. elif code0 == code1 - 1:
  198. k = self.dump_char(code0)
  199. else:
  200. k = "%s..%s" % (self.dump_char(code0),
  201. self.dump_char(code1 - 1))
  202. self.dump_trans(k, set, file)
  203. def dump_char(self, code):
  204. if 0 <= code <= 255:
  205. return repr(chr(code))
  206. else:
  207. return "chr(%d)" % code
  208. def dump_trans(self, key, set, file):
  209. file.write(" %s --> %s\n" % (key, self.dump_set(set)))
  210. def dump_set(self, set):
  211. return state_set_str(set)
  212. #
  213. # State set manipulation functions
  214. #
  215. #def merge_state_sets(set1, set2):
  216. # for state in set2.keys():
  217. # set1[state] = 1
  218. def state_set_str(set):
  219. return "[%s]" % ','.join(["S%d" % state.number for state in set])