actions.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. # actions.py
  2. from .exceptions import ParseException
  3. from .util import col, replaced_by_pep8
  4. class OnlyOnce:
  5. """
  6. Wrapper for parse actions, to ensure they are only called once.
  7. """
  8. def __init__(self, method_call):
  9. from .core import _trim_arity
  10. self.callable = _trim_arity(method_call)
  11. self.called = False
  12. def __call__(self, s, l, t):
  13. if not self.called:
  14. results = self.callable(s, l, t)
  15. self.called = True
  16. return results
  17. raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
  18. def reset(self):
  19. """
  20. Allow the associated parse action to be called once more.
  21. """
  22. self.called = False
  23. def match_only_at_col(n):
  24. """
  25. Helper method for defining parse actions that require matching at
  26. a specific column in the input text.
  27. """
  28. def verify_col(strg, locn, toks):
  29. if col(locn, strg) != n:
  30. raise ParseException(strg, locn, f"matched token not at column {n}")
  31. return verify_col
  32. def replace_with(repl_str):
  33. """
  34. Helper method for common parse actions that simply return
  35. a literal value. Especially useful when used with
  36. :class:`transform_string<ParserElement.transform_string>` ().
  37. Example::
  38. num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
  39. na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
  40. term = na | num
  41. term[1, ...].parse_string("324 234 N/A 234") # -> [324, 234, nan, 234]
  42. """
  43. return lambda s, l, t: [repl_str]
  44. def remove_quotes(s, l, t):
  45. """
  46. Helper parse action for removing quotation marks from parsed
  47. quoted strings.
  48. Example::
  49. # by default, quotation marks are included in parsed results
  50. quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
  51. # use remove_quotes to strip quotation marks from parsed results
  52. quoted_string.set_parse_action(remove_quotes)
  53. quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
  54. """
  55. return t[0][1:-1]
  56. def with_attribute(*args, **attr_dict):
  57. """
  58. Helper to create a validating parse action to be used with start
  59. tags created with :class:`make_xml_tags` or
  60. :class:`make_html_tags`. Use ``with_attribute`` to qualify
  61. a starting tag with a required attribute value, to avoid false
  62. matches on common tags such as ``<TD>`` or ``<DIV>``.
  63. Call ``with_attribute`` with a series of attribute names and
  64. values. Specify the list of filter attributes names and values as:
  65. - keyword arguments, as in ``(align="right")``, or
  66. - as an explicit dict with ``**`` operator, when an attribute
  67. name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
  68. - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
  69. For attribute names with a namespace prefix, you must use the second
  70. form. Attribute names are matched insensitive to upper/lower case.
  71. If just testing for ``class`` (with or without a namespace), use
  72. :class:`with_class`.
  73. To verify that the attribute exists, but without specifying a value,
  74. pass ``with_attribute.ANY_VALUE`` as the value.
  75. Example::
  76. html = '''
  77. <div>
  78. Some text
  79. <div type="grid">1 4 0 1 0</div>
  80. <div type="graph">1,3 2,3 1,1</div>
  81. <div>this has no type</div>
  82. </div>
  83. '''
  84. div,div_end = make_html_tags("div")
  85. # only match div tag having a type attribute with value "grid"
  86. div_grid = div().set_parse_action(with_attribute(type="grid"))
  87. grid_expr = div_grid + SkipTo(div | div_end)("body")
  88. for grid_header in grid_expr.search_string(html):
  89. print(grid_header.body)
  90. # construct a match with any div tag having a type attribute, regardless of the value
  91. div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE))
  92. div_expr = div_any_type + SkipTo(div | div_end)("body")
  93. for div_header in div_expr.search_string(html):
  94. print(div_header.body)
  95. prints::
  96. 1 4 0 1 0
  97. 1 4 0 1 0
  98. 1,3 2,3 1,1
  99. """
  100. if args:
  101. attrs = args[:]
  102. else:
  103. attrs = attr_dict.items()
  104. attrs = [(k, v) for k, v in attrs]
  105. def pa(s, l, tokens):
  106. for attrName, attrValue in attrs:
  107. if attrName not in tokens:
  108. raise ParseException(s, l, "no matching attribute " + attrName)
  109. if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue:
  110. raise ParseException(
  111. s,
  112. l,
  113. f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}",
  114. )
  115. return pa
  116. with_attribute.ANY_VALUE = object() # type: ignore [attr-defined]
  117. def with_class(classname, namespace=""):
  118. """
  119. Simplified version of :class:`with_attribute` when
  120. matching on a div class - made difficult because ``class`` is
  121. a reserved word in Python.
  122. Example::
  123. html = '''
  124. <div>
  125. Some text
  126. <div class="grid">1 4 0 1 0</div>
  127. <div class="graph">1,3 2,3 1,1</div>
  128. <div>this &lt;div&gt; has no class</div>
  129. </div>
  130. '''
  131. div,div_end = make_html_tags("div")
  132. div_grid = div().set_parse_action(with_class("grid"))
  133. grid_expr = div_grid + SkipTo(div | div_end)("body")
  134. for grid_header in grid_expr.search_string(html):
  135. print(grid_header.body)
  136. div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE))
  137. div_expr = div_any_type + SkipTo(div | div_end)("body")
  138. for div_header in div_expr.search_string(html):
  139. print(div_header.body)
  140. prints::
  141. 1 4 0 1 0
  142. 1 4 0 1 0
  143. 1,3 2,3 1,1
  144. """
  145. classattr = f"{namespace}:class" if namespace else "class"
  146. return with_attribute(**{classattr: classname})
  147. # pre-PEP8 compatibility symbols
  148. # fmt: off
  149. @replaced_by_pep8(replace_with)
  150. def replaceWith(): ...
  151. @replaced_by_pep8(remove_quotes)
  152. def removeQuotes(): ...
  153. @replaced_by_pep8(with_attribute)
  154. def withAttribute(): ...
  155. @replaced_by_pep8(with_class)
  156. def withClass(): ...
  157. @replaced_by_pep8(match_only_at_col)
  158. def matchOnlyAtCol(): ...
  159. # fmt: on