fnmatch.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. """Filename matching with shell patterns.
  2. fnmatch(FILENAME, PATTERN) matches according to the local convention.
  3. fnmatchcase(FILENAME, PATTERN) always takes case in account.
  4. The functions operate by translating the pattern into a regular
  5. expression. They cache the compiled regular expressions for speed.
  6. The function translate(PATTERN) returns a regular expression
  7. corresponding to PATTERN. (It does not compile it.)
  8. """
  9. import os
  10. import posixpath
  11. import re
  12. import functools
  13. __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
  14. # Build a thread-safe incrementing counter to help create unique regexp group
  15. # names across calls.
  16. from itertools import count
  17. _nextgroupnum = count().__next__
  18. del count
  19. def fnmatch(name, pat):
  20. """Test whether FILENAME matches PATTERN.
  21. Patterns are Unix shell style:
  22. * matches everything
  23. ? matches any single character
  24. [seq] matches any character in seq
  25. [!seq] matches any char not in seq
  26. An initial period in FILENAME is not special.
  27. Both FILENAME and PATTERN are first case-normalized
  28. if the operating system requires it.
  29. If you don't want this, use fnmatchcase(FILENAME, PATTERN).
  30. """
  31. name = os.path.normcase(name)
  32. pat = os.path.normcase(pat)
  33. return fnmatchcase(name, pat)
  34. @functools.lru_cache(maxsize=256, typed=True)
  35. def _compile_pattern(pat):
  36. if isinstance(pat, bytes):
  37. pat_str = str(pat, 'ISO-8859-1')
  38. res_str = translate(pat_str)
  39. res = bytes(res_str, 'ISO-8859-1')
  40. else:
  41. res = translate(pat)
  42. return re.compile(res).match
  43. def filter(names, pat):
  44. """Construct a list from those elements of the iterable NAMES that match PAT."""
  45. result = []
  46. pat = os.path.normcase(pat)
  47. match = _compile_pattern(pat)
  48. if os.path is posixpath:
  49. # normcase on posix is NOP. Optimize it away from the loop.
  50. for name in names:
  51. if match(name):
  52. result.append(name)
  53. else:
  54. for name in names:
  55. if match(os.path.normcase(name)):
  56. result.append(name)
  57. return result
  58. def fnmatchcase(name, pat):
  59. """Test whether FILENAME matches PATTERN, including case.
  60. This is a version of fnmatch() which doesn't case-normalize
  61. its arguments.
  62. """
  63. match = _compile_pattern(pat)
  64. return match(name) is not None
  65. def translate(pat):
  66. """Translate a shell PATTERN to a regular expression.
  67. There is no way to quote meta-characters.
  68. """
  69. STAR = object()
  70. res = []
  71. add = res.append
  72. i, n = 0, len(pat)
  73. while i < n:
  74. c = pat[i]
  75. i = i+1
  76. if c == '*':
  77. # compress consecutive `*` into one
  78. if (not res) or res[-1] is not STAR:
  79. add(STAR)
  80. elif c == '?':
  81. add('.')
  82. elif c == '[':
  83. j = i
  84. if j < n and pat[j] == '!':
  85. j = j+1
  86. if j < n and pat[j] == ']':
  87. j = j+1
  88. while j < n and pat[j] != ']':
  89. j = j+1
  90. if j >= n:
  91. add('\\[')
  92. else:
  93. stuff = pat[i:j]
  94. if '--' not in stuff:
  95. stuff = stuff.replace('\\', r'\\')
  96. else:
  97. chunks = []
  98. k = i+2 if pat[i] == '!' else i+1
  99. while True:
  100. k = pat.find('-', k, j)
  101. if k < 0:
  102. break
  103. chunks.append(pat[i:k])
  104. i = k+1
  105. k = k+3
  106. chunks.append(pat[i:j])
  107. # Escape backslashes and hyphens for set difference (--).
  108. # Hyphens that create ranges shouldn't be escaped.
  109. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
  110. for s in chunks)
  111. # Escape set operations (&&, ~~ and ||).
  112. stuff = re.sub(r'([&~|])', r'\\\1', stuff)
  113. i = j+1
  114. if stuff[0] == '!':
  115. stuff = '^' + stuff[1:]
  116. elif stuff[0] in ('^', '['):
  117. stuff = '\\' + stuff
  118. add(f'[{stuff}]')
  119. else:
  120. add(re.escape(c))
  121. assert i == n
  122. # Deal with STARs.
  123. inp = res
  124. res = []
  125. add = res.append
  126. i, n = 0, len(inp)
  127. # Fixed pieces at the start?
  128. while i < n and inp[i] is not STAR:
  129. add(inp[i])
  130. i += 1
  131. # Now deal with STAR fixed STAR fixed ...
  132. # For an interior `STAR fixed` pairing, we want to do a minimal
  133. # .*? match followed by `fixed`, with no possibility of backtracking.
  134. # We can't spell that directly, but can trick it into working by matching
  135. # .*?fixed
  136. # in a lookahead assertion, save the matched part in a group, then
  137. # consume that group via a backreference. If the overall match fails,
  138. # the lookahead assertion won't try alternatives. So the translation is:
  139. # (?=(?P<name>.*?fixed))(?P=name)
  140. # Group names are created as needed: g0, g1, g2, ...
  141. # The numbers are obtained from _nextgroupnum() to ensure they're unique
  142. # across calls and across threads. This is because people rely on the
  143. # undocumented ability to join multiple translate() results together via
  144. # "|" to build large regexps matching "one of many" shell patterns.
  145. while i < n:
  146. assert inp[i] is STAR
  147. i += 1
  148. if i == n:
  149. add(".*")
  150. break
  151. assert inp[i] is not STAR
  152. fixed = []
  153. while i < n and inp[i] is not STAR:
  154. fixed.append(inp[i])
  155. i += 1
  156. fixed = "".join(fixed)
  157. if i == n:
  158. add(".*")
  159. add(fixed)
  160. else:
  161. groupnum = _nextgroupnum()
  162. add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
  163. assert i == n
  164. res = "".join(res)
  165. return fr'(?s:{res})\Z'