66"""
77
88import contextlib
9- import fnmatch
109import functools
10+ import glob
1111import io
1212import ntpath
1313import os
@@ -76,78 +76,16 @@ def _is_case_sensitive(pathmod):
7676#
7777
7878
79- # fnmatch.translate() returns a regular expression that includes a prefix and
80- # a suffix, which enable matching newlines and ensure the end of the string is
81- # matched, respectively. These features are undesirable for our implementation
82- # of PurePatch.match(), which represents path separators as newlines and joins
83- # pattern segments together. As a workaround, we define a slice object that
84- # can remove the prefix and suffix from any translate() result. See the
85- # _compile_pattern_lines() function for more details.
86- _FNMATCH_PREFIX , _FNMATCH_SUFFIX = fnmatch .translate ('_' ).split ('_' )
87- _FNMATCH_SLICE = slice (len (_FNMATCH_PREFIX ), - len (_FNMATCH_SUFFIX ))
88- _SWAP_SEP_AND_NEWLINE = {
89- '/' : str .maketrans ({'/' : '\n ' , '\n ' : '/' }),
90- '\\ ' : str .maketrans ({'\\ ' : '\n ' , '\n ' : '\\ ' }),
91- }
92-
93-
9479@functools .lru_cache (maxsize = 256 )
95- def _compile_pattern (pat , case_sensitive ):
80+ def _compile_pattern (pat , sep , case_sensitive ):
9681 """Compile given glob pattern to a re.Pattern object (observing case
97- sensitivity), or None if the pattern should match everything."""
98- if pat == '*' :
99- return None
82+ sensitivity)."""
10083 flags = re .NOFLAG if case_sensitive else re .IGNORECASE
101- return re .compile (fnmatch .translate (pat ), flags ).match
102-
103-
104- @functools .lru_cache ()
105- def _compile_pattern_lines (pattern_lines , case_sensitive ):
106- """Compile the given pattern lines to an `re.Pattern` object.
107-
108- The *pattern_lines* argument is a glob-style pattern (e.g. '**/*.py') with
109- its path separators and newlines swapped (e.g. '**\n *.py`). By using
110- newlines to separate path components, and not setting `re.DOTALL`, we
111- ensure that the `*` wildcard cannot match path separators.
112-
113- The returned `re.Pattern` object may have its `match()` method called to
114- match a complete pattern, or `search()` to match from the right. The
115- argument supplied to these methods must also have its path separators and
116- newlines swapped.
117- """
118-
119- # Match the start of the path, or just after a path separator
120- parts = ['^' ]
121- for part in pattern_lines .splitlines (keepends = True ):
122- if part == '*\n ' :
123- part = r'.+\n'
124- elif part == '*' :
125- part = r'.+'
126- elif part == '**\n ' :
127- # '**/' component: we use '(?s:.)' rather than '.' so that path
128- # separators (i.e. newlines) are matched. The trailing '^' ensures
129- # we terminate after a path separator (i.e. on a new line).
130- part = r'(?s:.)*^'
131- elif part == '**' :
132- # '**' component.
133- part = r'(?s:.)*'
134- elif '**' in part :
135- raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
136- else :
137- # Any other component: pass to fnmatch.translate(). We slice off
138- # the common prefix and suffix added by translate() to ensure that
139- # re.DOTALL is not set, and the end of the string not matched,
140- # respectively. With DOTALL not set, '*' wildcards will not match
141- # path separators, because the '.' characters in the pattern will
142- # not match newlines.
143- part = fnmatch .translate (part )[_FNMATCH_SLICE ]
144- parts .append (part )
145- # Match the end of the path, always.
146- parts .append (r'\Z' )
147- flags = re .MULTILINE
148- if not case_sensitive :
149- flags |= re .IGNORECASE
150- return re .compile ('' .join (parts ), flags = flags )
84+ regex = glob .translate (pat , recursive = True , include_hidden = True , seps = sep )
85+ # The string representation of an empty path is a single dot ('.'). Empty
86+ # paths shouldn't match wildcards, so we consume it with an atomic group.
87+ regex = r'(\.\Z)?+' + regex
88+ return re .compile (regex , flags ).match
15189
15290
15391def _select_children (parent_paths , dir_only , follow_symlinks , match ):
@@ -171,7 +109,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
171109 except OSError :
172110 continue
173111 name = entry .name
174- if match is None or match (name ):
112+ if match (name ):
175113 yield parent_path ._make_child_relpath (name )
176114
177115
@@ -297,10 +235,6 @@ class PurePath:
297235 # to implement comparison methods like `__lt__()`.
298236 '_parts_normcase_cached' ,
299237
300- # The `_lines_cached` slot stores the string path with path separators
301- # and newlines swapped. This is used to implement `match()`.
302- '_lines_cached' ,
303-
304238 # The `_hash` slot stores the hash of the case-normalized string
305239 # path. It's set when `__hash__()` is called for the first time.
306240 '_hash' ,
@@ -475,20 +409,6 @@ def _parts_normcase(self):
475409 self ._parts_normcase_cached = self ._str_normcase .split (self .pathmod .sep )
476410 return self ._parts_normcase_cached
477411
478- @property
479- def _lines (self ):
480- # Path with separators and newlines swapped, for pattern matching.
481- try :
482- return self ._lines_cached
483- except AttributeError :
484- path_str = str (self )
485- if path_str == '.' :
486- self ._lines_cached = ''
487- else :
488- trans = _SWAP_SEP_AND_NEWLINE [self .pathmod .sep ]
489- self ._lines_cached = path_str .translate (trans )
490- return self ._lines_cached
491-
492412 def __eq__ (self , other ):
493413 if not isinstance (other , PurePath ):
494414 return NotImplemented
@@ -763,13 +683,16 @@ def match(self, path_pattern, *, case_sensitive=None):
763683 path_pattern = self .with_segments (path_pattern )
764684 if case_sensitive is None :
765685 case_sensitive = _is_case_sensitive (self .pathmod )
766- pattern = _compile_pattern_lines (path_pattern ._lines , case_sensitive )
686+ sep = path_pattern .pathmod .sep
687+ pattern_str = str (path_pattern )
767688 if path_pattern .drive or path_pattern .root :
768- return pattern . match ( self . _lines ) is not None
689+ pass
769690 elif path_pattern ._tail :
770- return pattern . search ( self . _lines ) is not None
691+ pattern_str = f'** { sep } { pattern_str } '
771692 else :
772693 raise ValueError ("empty pattern" )
694+ match = _compile_pattern (pattern_str , sep , case_sensitive )
695+ return match (str (self )) is not None
773696
774697
775698# Subclassing os.PathLike makes isinstance() checks slower,
@@ -1069,26 +992,19 @@ def _scandir(self):
1069992 return contextlib .nullcontext (self .iterdir ())
1070993
1071994 def _make_child_relpath (self , name ):
1072- sep = self .pathmod .sep
1073- lines_name = name .replace ('\n ' , sep )
1074- lines_str = self ._lines
1075995 path_str = str (self )
1076996 tail = self ._tail
1077997 if tail :
1078- path_str = f'{ path_str } { sep } { name } '
1079- lines_str = f'{ lines_str } \n { lines_name } '
998+ path_str = f'{ path_str } { self .pathmod .sep } { name } '
1080999 elif path_str != '.' :
10811000 path_str = f'{ path_str } { name } '
1082- lines_str = f'{ lines_str } { lines_name } '
10831001 else :
10841002 path_str = name
1085- lines_str = lines_name
10861003 path = self .with_segments (path_str )
10871004 path ._str = path_str
10881005 path ._drv = self .drive
10891006 path ._root = self .root
10901007 path ._tail_cached = tail + [name ]
1091- path ._lines_cached = lines_str
10921008 return path
10931009
10941010 def glob (self , pattern , * , case_sensitive = None , follow_symlinks = None ):
@@ -1139,6 +1055,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
11391055 # do not perform any filesystem access, which can be much faster!
11401056 filter_paths = follow_symlinks is not None and '..' not in pattern_parts
11411057 deduplicate_paths = False
1058+ sep = self .pathmod .sep
11421059 paths = iter ([self ] if self .is_dir () else [])
11431060 part_idx = 0
11441061 while part_idx < len (pattern_parts ):
@@ -1159,9 +1076,9 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
11591076 paths = _select_recursive (paths , dir_only , follow_symlinks )
11601077
11611078 # Filter out paths that don't match pattern.
1162- prefix_len = len (self ._make_child_relpath ('_' ). _lines ) - 1
1163- match = _compile_pattern_lines ( path_pattern . _lines , case_sensitive ). match
1164- paths = (path for path in paths if match (path . _lines [ prefix_len :] ))
1079+ prefix_len = len (str ( self ._make_child_relpath ('_' )) ) - 1
1080+ match = _compile_pattern ( str ( path_pattern ), sep , case_sensitive )
1081+ paths = (path for path in paths if match (str ( path ), prefix_len ))
11651082 return paths
11661083
11671084 dir_only = part_idx < len (pattern_parts )
@@ -1174,7 +1091,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
11741091 raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
11751092 else :
11761093 dir_only = part_idx < len (pattern_parts )
1177- match = _compile_pattern (part , case_sensitive )
1094+ match = _compile_pattern (part , sep , case_sensitive )
11781095 paths = _select_children (paths , dir_only , follow_symlinks , match )
11791096 return paths
11801097
0 commit comments