fnmatch.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. """Filename matching with shell patterns.
  2. fnmatch(FILENAME, PATTERN) matches according to the local convention.
  3. fnmatchcase(FILENAME, PATTERN) always takes case in account.
  4. The functions operate by translating the pattern into a regular
  5. expression. They cache the compiled regular expressions for speed.
  6. The function translate(PATTERN) returns a regular expression
  7. corresponding to PATTERN. (It does not compile it.)
  8. """
  9. import os
  10. import posixpath
  11. import re
  12. import functools
  13. __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
  14. def fnmatch(name, pat):
  15. """Test whether FILENAME matches PATTERN.
  16. Patterns are Unix shell style:
  17. * matches everything
  18. ? matches any single character
  19. [seq] matches any character in seq
  20. [!seq] matches any char not in seq
  21. An initial period in FILENAME is not special.
  22. Both FILENAME and PATTERN are first case-normalized
  23. if the operating system requires it.
  24. If you don't want this, use fnmatchcase(FILENAME, PATTERN).
  25. """
  26. name = os.path.normcase(name)
  27. pat = os.path.normcase(pat)
  28. return fnmatchcase(name, pat)
  29. @functools.lru_cache(maxsize=32768, typed=True)
  30. def _compile_pattern(pat):
  31. if isinstance(pat, bytes):
  32. pat_str = str(pat, 'ISO-8859-1')
  33. res_str = translate(pat_str)
  34. res = bytes(res_str, 'ISO-8859-1')
  35. else:
  36. res = translate(pat)
  37. return re.compile(res).match
  38. def filter(names, pat):
  39. """Construct a list from those elements of the iterable NAMES that match PAT."""
  40. result = []
  41. pat = os.path.normcase(pat)
  42. match = _compile_pattern(pat)
  43. if os.path is posixpath:
  44. # normcase on posix is NOP. Optimize it away from the loop.
  45. for name in names:
  46. if match(name):
  47. result.append(name)
  48. else:
  49. for name in names:
  50. if match(os.path.normcase(name)):
  51. result.append(name)
  52. return result
  53. def fnmatchcase(name, pat):
  54. """Test whether FILENAME matches PATTERN, including case.
  55. This is a version of fnmatch() which doesn't case-normalize
  56. its arguments.
  57. """
  58. match = _compile_pattern(pat)
  59. return match(name) is not None
  60. def translate(pat):
  61. """Translate a shell PATTERN to a regular expression.
  62. There is no way to quote meta-characters.
  63. """
  64. STAR = object()
  65. res = []
  66. add = res.append
  67. i, n = 0, len(pat)
  68. while i < n:
  69. c = pat[i]
  70. i = i+1
  71. if c == '*':
  72. # compress consecutive `*` into one
  73. if (not res) or res[-1] is not STAR:
  74. add(STAR)
  75. elif c == '?':
  76. add('.')
  77. elif c == '[':
  78. j = i
  79. if j < n and pat[j] == '!':
  80. j = j+1
  81. if j < n and pat[j] == ']':
  82. j = j+1
  83. while j < n and pat[j] != ']':
  84. j = j+1
  85. if j >= n:
  86. add('\\[')
  87. else:
  88. stuff = pat[i:j]
  89. if '-' not in stuff:
  90. stuff = stuff.replace('\\', r'\\')
  91. else:
  92. chunks = []
  93. k = i+2 if pat[i] == '!' else i+1
  94. while True:
  95. k = pat.find('-', k, j)
  96. if k < 0:
  97. break
  98. chunks.append(pat[i:k])
  99. i = k+1
  100. k = k+3
  101. chunk = pat[i:j]
  102. if chunk:
  103. chunks.append(chunk)
  104. else:
  105. chunks[-1] += '-'
  106. # Remove empty ranges -- invalid in RE.
  107. for k in range(len(chunks)-1, 0, -1):
  108. if chunks[k-1][-1] > chunks[k][0]:
  109. chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
  110. del chunks[k]
  111. # Escape backslashes and hyphens for set difference (--).
  112. # Hyphens that create ranges shouldn't be escaped.
  113. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
  114. for s in chunks)
  115. # Escape set operations (&&, ~~ and ||).
  116. stuff = re.sub(r'([&~|])', r'\\\1', stuff)
  117. i = j+1
  118. if not stuff:
  119. # Empty range: never match.
  120. add('(?!)')
  121. elif stuff == '!':
  122. # Negated empty range: match any character.
  123. add('.')
  124. else:
  125. if stuff[0] == '!':
  126. stuff = '^' + stuff[1:]
  127. elif stuff[0] in ('^', '['):
  128. stuff = '\\' + stuff
  129. add(f'[{stuff}]')
  130. else:
  131. add(re.escape(c))
  132. assert i == n
  133. # Deal with STARs.
  134. inp = res
  135. res = []
  136. add = res.append
  137. i, n = 0, len(inp)
  138. # Fixed pieces at the start?
  139. while i < n and inp[i] is not STAR:
  140. add(inp[i])
  141. i += 1
  142. # Now deal with STAR fixed STAR fixed ...
  143. # For an interior `STAR fixed` pairing, we want to do a minimal
  144. # .*? match followed by `fixed`, with no possibility of backtracking.
  145. # Atomic groups ("(?>...)") allow us to spell that directly.
  146. # Note: people rely on the undocumented ability to join multiple
  147. # translate() results together via "|" to build large regexps matching
  148. # "one of many" shell patterns.
  149. while i < n:
  150. assert inp[i] is STAR
  151. i += 1
  152. if i == n:
  153. add(".*")
  154. break
  155. assert inp[i] is not STAR
  156. fixed = []
  157. while i < n and inp[i] is not STAR:
  158. fixed.append(inp[i])
  159. i += 1
  160. fixed = "".join(fixed)
  161. if i == n:
  162. add(".*")
  163. add(fixed)
  164. else:
  165. add(f"(?>.*?{fixed})")
  166. assert i == n
  167. res = "".join(res)
  168. return fr'(?s:{res})\Z'