glob.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. """Filename globbing utility."""
  2. import contextlib
  3. import os
  4. import re
  5. import fnmatch
  6. import sys
  7. __all__ = ["glob", "iglob", "escape"]
  8. def glob(pathname, *, recursive=False):
  9. """Return a list of paths matching a pathname pattern.
  10. The pattern may contain simple shell-style wildcards a la
  11. fnmatch. However, unlike fnmatch, filenames starting with a
  12. dot are special cases that are not matched by '*' and '?'
  13. patterns.
  14. If recursive is true, the pattern '**' will match any files and
  15. zero or more directories and subdirectories.
  16. """
  17. return list(iglob(pathname, recursive=recursive))
  18. def iglob(pathname, *, recursive=False):
  19. """Return an iterator which yields the paths matching a pathname pattern.
  20. The pattern may contain simple shell-style wildcards a la
  21. fnmatch. However, unlike fnmatch, filenames starting with a
  22. dot are special cases that are not matched by '*' and '?'
  23. patterns.
  24. If recursive is true, the pattern '**' will match any files and
  25. zero or more directories and subdirectories.
  26. """
  27. sys.audit("glob.glob", pathname, recursive)
  28. it = _iglob(pathname, recursive, False)
  29. if recursive and _isrecursive(pathname):
  30. s = next(it) # skip empty string
  31. assert not s
  32. return it
  33. def _iglob(pathname, recursive, dironly):
  34. dirname, basename = os.path.split(pathname)
  35. if not has_magic(pathname):
  36. assert not dironly
  37. if basename:
  38. if os.path.lexists(pathname):
  39. yield pathname
  40. else:
  41. # Patterns ending with a slash should match only directories
  42. if os.path.isdir(dirname):
  43. yield pathname
  44. return
  45. if not dirname:
  46. if recursive and _isrecursive(basename):
  47. yield from _glob2(dirname, basename, dironly)
  48. else:
  49. yield from _glob1(dirname, basename, dironly)
  50. return
  51. # `os.path.split()` returns the argument itself as a dirname if it is a
  52. # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
  53. # contains magic characters (i.e. r'\\?\C:').
  54. if dirname != pathname and has_magic(dirname):
  55. dirs = _iglob(dirname, recursive, True)
  56. else:
  57. dirs = [dirname]
  58. if has_magic(basename):
  59. if recursive and _isrecursive(basename):
  60. glob_in_dir = _glob2
  61. else:
  62. glob_in_dir = _glob1
  63. else:
  64. glob_in_dir = _glob0
  65. for dirname in dirs:
  66. for name in glob_in_dir(dirname, basename, dironly):
  67. yield os.path.join(dirname, name)
  68. # These 2 helper functions non-recursively glob inside a literal directory.
  69. # They return a list of basenames. _glob1 accepts a pattern while _glob0
  70. # takes a literal basename (so it only has to check for its existence).
  71. def _glob1(dirname, pattern, dironly):
  72. names = _listdir(dirname, dironly)
  73. if not _ishidden(pattern):
  74. names = (x for x in names if not _ishidden(x))
  75. return fnmatch.filter(names, pattern)
  76. def _glob0(dirname, basename, dironly):
  77. if not basename:
  78. # `os.path.split()` returns an empty basename for paths ending with a
  79. # directory separator. 'q*x/' should match only directories.
  80. if os.path.isdir(dirname):
  81. return [basename]
  82. else:
  83. if os.path.lexists(os.path.join(dirname, basename)):
  84. return [basename]
  85. return []
  86. # Following functions are not public but can be used by third-party code.
  87. def glob0(dirname, pattern):
  88. return _glob0(dirname, pattern, False)
  89. def glob1(dirname, pattern):
  90. return _glob1(dirname, pattern, False)
  91. # This helper function recursively yields relative pathnames inside a literal
  92. # directory.
  93. def _glob2(dirname, pattern, dironly):
  94. assert _isrecursive(pattern)
  95. yield pattern[:0]
  96. yield from _rlistdir(dirname, dironly)
  97. # If dironly is false, yields all file names inside a directory.
  98. # If dironly is true, yields only directory names.
  99. def _iterdir(dirname, dironly):
  100. if not dirname:
  101. if isinstance(dirname, bytes):
  102. dirname = bytes(os.curdir, 'ASCII')
  103. else:
  104. dirname = os.curdir
  105. try:
  106. with os.scandir(dirname) as it:
  107. for entry in it:
  108. try:
  109. if not dironly or entry.is_dir():
  110. yield entry.name
  111. except OSError:
  112. pass
  113. except OSError:
  114. return
  115. def _listdir(dirname, dironly):
  116. with contextlib.closing(_iterdir(dirname, dironly)) as it:
  117. return list(it)
  118. # Recursively yields relative pathnames inside a literal directory.
  119. def _rlistdir(dirname, dironly):
  120. names = _listdir(dirname, dironly)
  121. for x in names:
  122. if not _ishidden(x):
  123. yield x
  124. path = os.path.join(dirname, x) if dirname else x
  125. for y in _rlistdir(path, dironly):
  126. yield os.path.join(x, y)
  127. magic_check = re.compile('([*?[])')
  128. magic_check_bytes = re.compile(b'([*?[])')
  129. def has_magic(s):
  130. if isinstance(s, bytes):
  131. match = magic_check_bytes.search(s)
  132. else:
  133. match = magic_check.search(s)
  134. return match is not None
  135. def _ishidden(path):
  136. return path[0] in ('.', b'.'[0])
  137. def _isrecursive(pattern):
  138. if isinstance(pattern, bytes):
  139. return pattern == b'**'
  140. else:
  141. return pattern == '**'
  142. def escape(pathname):
  143. """Escape all special characters.
  144. """
  145. # Escaping is done by wrapping any of "*?[" between square brackets.
  146. # Metacharacters do not work in the drive part and shouldn't be escaped.
  147. drive, pathname = os.path.splitdrive(pathname)
  148. if isinstance(pathname, bytes):
  149. pathname = magic_check_bytes.sub(br'[\1]', pathname)
  150. else:
  151. pathname = magic_check.sub(r'[\1]', pathname)
  152. return drive + pathname