123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183 |
- """Filename matching with shell patterns.
- fnmatch(FILENAME, PATTERN) matches according to the local convention.
- fnmatchcase(FILENAME, PATTERN) always takes case in account.
- The functions operate by translating the pattern into a regular
- expression. They cache the compiled regular expressions for speed.
- The function translate(PATTERN) returns a regular expression
- corresponding to PATTERN. (It does not compile it.)
- """
- import os
- import posixpath
- import re
- import functools
- __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
- # Build a thread-safe incrementing counter to help create unique regexp group
- # names across calls.
- from itertools import count
- _nextgroupnum = count().__next__
- del count
- def fnmatch(name, pat):
- """Test whether FILENAME matches PATTERN.
- Patterns are Unix shell style:
- * matches everything
- ? matches any single character
- [seq] matches any character in seq
- [!seq] matches any char not in seq
- An initial period in FILENAME is not special.
- Both FILENAME and PATTERN are first case-normalized
- if the operating system requires it.
- If you don't want this, use fnmatchcase(FILENAME, PATTERN).
- """
- name = os.path.normcase(name)
- pat = os.path.normcase(pat)
- return fnmatchcase(name, pat)
- @functools.lru_cache(maxsize=256, typed=True)
- def _compile_pattern(pat):
- if isinstance(pat, bytes):
- pat_str = str(pat, 'ISO-8859-1')
- res_str = translate(pat_str)
- res = bytes(res_str, 'ISO-8859-1')
- else:
- res = translate(pat)
- return re.compile(res).match
- def filter(names, pat):
- """Construct a list from those elements of the iterable NAMES that match PAT."""
- result = []
- pat = os.path.normcase(pat)
- match = _compile_pattern(pat)
- if os.path is posixpath:
- # normcase on posix is NOP. Optimize it away from the loop.
- for name in names:
- if match(name):
- result.append(name)
- else:
- for name in names:
- if match(os.path.normcase(name)):
- result.append(name)
- return result
- def fnmatchcase(name, pat):
- """Test whether FILENAME matches PATTERN, including case.
- This is a version of fnmatch() which doesn't case-normalize
- its arguments.
- """
- match = _compile_pattern(pat)
- return match(name) is not None
- def translate(pat):
- """Translate a shell PATTERN to a regular expression.
- There is no way to quote meta-characters.
- """
- STAR = object()
- res = []
- add = res.append
- i, n = 0, len(pat)
- while i < n:
- c = pat[i]
- i = i+1
- if c == '*':
- # compress consecutive `*` into one
- if (not res) or res[-1] is not STAR:
- add(STAR)
- elif c == '?':
- add('.')
- elif c == '[':
- j = i
- if j < n and pat[j] == '!':
- j = j+1
- if j < n and pat[j] == ']':
- j = j+1
- while j < n and pat[j] != ']':
- j = j+1
- if j >= n:
- add('\\[')
- else:
- stuff = pat[i:j]
- if '--' not in stuff:
- stuff = stuff.replace('\\', r'\\')
- else:
- chunks = []
- k = i+2 if pat[i] == '!' else i+1
- while True:
- k = pat.find('-', k, j)
- if k < 0:
- break
- chunks.append(pat[i:k])
- i = k+1
- k = k+3
- chunks.append(pat[i:j])
- # Escape backslashes and hyphens for set difference (--).
- # Hyphens that create ranges shouldn't be escaped.
- stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
- for s in chunks)
- # Escape set operations (&&, ~~ and ||).
- stuff = re.sub(r'([&~|])', r'\\\1', stuff)
- i = j+1
- if stuff[0] == '!':
- stuff = '^' + stuff[1:]
- elif stuff[0] in ('^', '['):
- stuff = '\\' + stuff
- add(f'[{stuff}]')
- else:
- add(re.escape(c))
- assert i == n
- # Deal with STARs.
- inp = res
- res = []
- add = res.append
- i, n = 0, len(inp)
- # Fixed pieces at the start?
- while i < n and inp[i] is not STAR:
- add(inp[i])
- i += 1
- # Now deal with STAR fixed STAR fixed ...
- # For an interior `STAR fixed` pairing, we want to do a minimal
- # .*? match followed by `fixed`, with no possibility of backtracking.
- # We can't spell that directly, but can trick it into working by matching
- # .*?fixed
- # in a lookahead assertion, save the matched part in a group, then
- # consume that group via a backreference. If the overall match fails,
- # the lookahead assertion won't try alternatives. So the translation is:
- # (?=(?P<name>.*?fixed))(?P=name)
- # Group names are created as needed: g0, g1, g2, ...
- # The numbers are obtained from _nextgroupnum() to ensure they're unique
- # across calls and across threads. This is because people rely on the
- # undocumented ability to join multiple translate() results together via
- # "|" to build large regexps matching "one of many" shell patterns.
- while i < n:
- assert inp[i] is STAR
- i += 1
- if i == n:
- add(".*")
- break
- assert inp[i] is not STAR
- fixed = []
- while i < n and inp[i] is not STAR:
- fixed.append(inp[i])
- i += 1
- fixed = "".join(fixed)
- if i == n:
- add(".*")
- add(fixed)
- else:
- groupnum = _nextgroupnum()
- add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
- assert i == n
- res = "".join(res)
- return fr'(?s:{res})\Z'
|