|
2 | 2 |
|
3 | 3 | import contextlib |
4 | 4 | import os |
5 | | -import re |
6 | 5 | import fnmatch |
7 | | -import functools |
8 | 6 | import itertools |
9 | | -import operator |
10 | 7 | import stat |
11 | 8 | import sys |
12 | 9 |
|
13 | | -__all__ = ["glob", "iglob", "escape"] |
| 10 | +from pathlib._glob import translate, magic_check, magic_check_bytes |
| 11 | + |
| 12 | +__all__ = ["glob", "iglob", "escape", "translate"] |
14 | 13 |
|
15 | 14 | def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, |
16 | 15 | include_hidden=False): |
@@ -226,9 +225,6 @@ def _join(dirname, basename): |
226 | 225 | return dirname or basename |
227 | 226 | return os.path.join(dirname, basename) |
228 | 227 |
|
229 | | -magic_check = re.compile('([*?[])') |
230 | | -magic_check_bytes = re.compile(b'([*?[])') |
231 | | - |
232 | 228 | def has_magic(s): |
233 | 229 | if isinstance(s, bytes): |
234 | 230 | match = magic_check_bytes.search(s) |
@@ -258,300 +254,4 @@ def escape(pathname): |
258 | 254 | return drive + pathname |
259 | 255 |
|
260 | 256 |
|
261 | | -_special_parts = ('', '.', '..') |
262 | 257 | _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) |
263 | | -_no_recurse_symlinks = object() |
264 | | - |
265 | | - |
266 | | -def translate(pat, *, recursive=False, include_hidden=False, seps=None): |
267 | | - """Translate a pathname with shell wildcards to a regular expression. |
268 | | -
|
269 | | - If `recursive` is true, the pattern segment '**' will match any number of |
270 | | - path segments. |
271 | | -
|
272 | | - If `include_hidden` is true, wildcards can match path segments beginning |
273 | | - with a dot ('.'). |
274 | | -
|
275 | | - If a sequence of separator characters is given to `seps`, they will be |
276 | | - used to split the pattern into segments and match path separators. If not |
277 | | - given, os.path.sep and os.path.altsep (where available) are used. |
278 | | - """ |
279 | | - if not seps: |
280 | | - if os.path.altsep: |
281 | | - seps = (os.path.sep, os.path.altsep) |
282 | | - else: |
283 | | - seps = os.path.sep |
284 | | - escaped_seps = ''.join(map(re.escape, seps)) |
285 | | - any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps |
286 | | - not_sep = f'[^{escaped_seps}]' |
287 | | - if include_hidden: |
288 | | - one_last_segment = f'{not_sep}+' |
289 | | - one_segment = f'{one_last_segment}{any_sep}' |
290 | | - any_segments = f'(?:.+{any_sep})?' |
291 | | - any_last_segments = '.*' |
292 | | - else: |
293 | | - one_last_segment = f'[^{escaped_seps}.]{not_sep}*' |
294 | | - one_segment = f'{one_last_segment}{any_sep}' |
295 | | - any_segments = f'(?:{one_segment})*' |
296 | | - any_last_segments = f'{any_segments}(?:{one_last_segment})?' |
297 | | - |
298 | | - results = [] |
299 | | - parts = re.split(any_sep, pat) |
300 | | - last_part_idx = len(parts) - 1 |
301 | | - for idx, part in enumerate(parts): |
302 | | - if part == '*': |
303 | | - results.append(one_segment if idx < last_part_idx else one_last_segment) |
304 | | - elif recursive and part == '**': |
305 | | - if idx < last_part_idx: |
306 | | - if parts[idx + 1] != '**': |
307 | | - results.append(any_segments) |
308 | | - else: |
309 | | - results.append(any_last_segments) |
310 | | - else: |
311 | | - if part: |
312 | | - if not include_hidden and part[0] in '*?': |
313 | | - results.append(r'(?!\.)') |
314 | | - results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)) |
315 | | - if idx < last_part_idx: |
316 | | - results.append(any_sep) |
317 | | - res = ''.join(results) |
318 | | - return fr'(?s:{res})\Z' |
319 | | - |
320 | | - |
321 | | -@functools.lru_cache(maxsize=512) |
322 | | -def _compile_pattern(pat, sep, case_sensitive, recursive=True): |
323 | | - """Compile given glob pattern to a re.Pattern object (observing case |
324 | | - sensitivity).""" |
325 | | - flags = re.NOFLAG if case_sensitive else re.IGNORECASE |
326 | | - regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) |
327 | | - return re.compile(regex, flags=flags).match |
328 | | - |
329 | | - |
330 | | -class _Globber: |
331 | | - """Class providing shell-style pattern matching and globbing. |
332 | | - """ |
333 | | - |
334 | | - def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): |
335 | | - self.sep = sep |
336 | | - self.case_sensitive = case_sensitive |
337 | | - self.case_pedantic = case_pedantic |
338 | | - self.recursive = recursive |
339 | | - |
340 | | - # Low-level methods |
341 | | - |
342 | | - lstat = staticmethod(os.lstat) |
343 | | - scandir = staticmethod(os.scandir) |
344 | | - parse_entry = operator.attrgetter('path') |
345 | | - concat_path = operator.add |
346 | | - |
347 | | - if os.name == 'nt': |
348 | | - @staticmethod |
349 | | - def add_slash(pathname): |
350 | | - tail = os.path.splitroot(pathname)[2] |
351 | | - if not tail or tail[-1] in '\\/': |
352 | | - return pathname |
353 | | - return f'{pathname}\\' |
354 | | - else: |
355 | | - @staticmethod |
356 | | - def add_slash(pathname): |
357 | | - if not pathname or pathname[-1] == '/': |
358 | | - return pathname |
359 | | - return f'{pathname}/' |
360 | | - |
361 | | - # High-level methods |
362 | | - |
363 | | - def compile(self, pat): |
364 | | - return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) |
365 | | - |
366 | | - def selector(self, parts): |
367 | | - """Returns a function that selects from a given path, walking and |
368 | | - filtering according to the glob-style pattern parts in *parts*. |
369 | | - """ |
370 | | - if not parts: |
371 | | - return self.select_exists |
372 | | - part = parts.pop() |
373 | | - if self.recursive and part == '**': |
374 | | - selector = self.recursive_selector |
375 | | - elif part in _special_parts: |
376 | | - selector = self.special_selector |
377 | | - elif not self.case_pedantic and magic_check.search(part) is None: |
378 | | - selector = self.literal_selector |
379 | | - else: |
380 | | - selector = self.wildcard_selector |
381 | | - return selector(part, parts) |
382 | | - |
383 | | - def special_selector(self, part, parts): |
384 | | - """Returns a function that selects special children of the given path. |
385 | | - """ |
386 | | - select_next = self.selector(parts) |
387 | | - |
388 | | - def select_special(path, exists=False): |
389 | | - path = self.concat_path(self.add_slash(path), part) |
390 | | - return select_next(path, exists) |
391 | | - return select_special |
392 | | - |
393 | | - def literal_selector(self, part, parts): |
394 | | - """Returns a function that selects a literal descendant of a path. |
395 | | - """ |
396 | | - |
397 | | - # Optimization: consume and join any subsequent literal parts here, |
398 | | - # rather than leaving them for the next selector. This reduces the |
399 | | - # number of string concatenation operations and calls to add_slash(). |
400 | | - while parts and magic_check.search(parts[-1]) is None: |
401 | | - part += self.sep + parts.pop() |
402 | | - |
403 | | - select_next = self.selector(parts) |
404 | | - |
405 | | - def select_literal(path, exists=False): |
406 | | - path = self.concat_path(self.add_slash(path), part) |
407 | | - return select_next(path, exists=False) |
408 | | - return select_literal |
409 | | - |
410 | | - def wildcard_selector(self, part, parts): |
411 | | - """Returns a function that selects direct children of a given path, |
412 | | - filtering by pattern. |
413 | | - """ |
414 | | - |
415 | | - match = None if part == '*' else self.compile(part) |
416 | | - dir_only = bool(parts) |
417 | | - if dir_only: |
418 | | - select_next = self.selector(parts) |
419 | | - |
420 | | - def select_wildcard(path, exists=False): |
421 | | - try: |
422 | | - # We must close the scandir() object before proceeding to |
423 | | - # avoid exhausting file descriptors when globbing deep trees. |
424 | | - with self.scandir(path) as scandir_it: |
425 | | - entries = list(scandir_it) |
426 | | - except OSError: |
427 | | - pass |
428 | | - else: |
429 | | - for entry in entries: |
430 | | - if match is None or match(entry.name): |
431 | | - if dir_only: |
432 | | - try: |
433 | | - if not entry.is_dir(): |
434 | | - continue |
435 | | - except OSError: |
436 | | - continue |
437 | | - entry_path = self.parse_entry(entry) |
438 | | - if dir_only: |
439 | | - yield from select_next(entry_path, exists=True) |
440 | | - else: |
441 | | - yield entry_path |
442 | | - return select_wildcard |
443 | | - |
444 | | - def recursive_selector(self, part, parts): |
445 | | - """Returns a function that selects a given path and all its children, |
446 | | - recursively, filtering by pattern. |
447 | | - """ |
448 | | - # Optimization: consume following '**' parts, which have no effect. |
449 | | - while parts and parts[-1] == '**': |
450 | | - parts.pop() |
451 | | - |
452 | | - # Optimization: consume and join any following non-special parts here, |
453 | | - # rather than leaving them for the next selector. They're used to |
454 | | - # build a regular expression, which we use to filter the results of |
455 | | - # the recursive walk. As a result, non-special pattern segments |
456 | | - # following a '**' wildcard don't require additional filesystem access |
457 | | - # to expand. |
458 | | - follow_symlinks = self.recursive is not _no_recurse_symlinks |
459 | | - if follow_symlinks: |
460 | | - while parts and parts[-1] not in _special_parts: |
461 | | - part += self.sep + parts.pop() |
462 | | - |
463 | | - match = None if part == '**' else self.compile(part) |
464 | | - dir_only = bool(parts) |
465 | | - select_next = self.selector(parts) |
466 | | - |
467 | | - def select_recursive(path, exists=False): |
468 | | - path = self.add_slash(path) |
469 | | - match_pos = len(str(path)) |
470 | | - if match is None or match(str(path), match_pos): |
471 | | - yield from select_next(path, exists) |
472 | | - stack = [path] |
473 | | - while stack: |
474 | | - yield from select_recursive_step(stack, match_pos) |
475 | | - |
476 | | - def select_recursive_step(stack, match_pos): |
477 | | - path = stack.pop() |
478 | | - try: |
479 | | - # We must close the scandir() object before proceeding to |
480 | | - # avoid exhausting file descriptors when globbing deep trees. |
481 | | - with self.scandir(path) as scandir_it: |
482 | | - entries = list(scandir_it) |
483 | | - except OSError: |
484 | | - pass |
485 | | - else: |
486 | | - for entry in entries: |
487 | | - is_dir = False |
488 | | - try: |
489 | | - if entry.is_dir(follow_symlinks=follow_symlinks): |
490 | | - is_dir = True |
491 | | - except OSError: |
492 | | - pass |
493 | | - |
494 | | - if is_dir or not dir_only: |
495 | | - entry_path = self.parse_entry(entry) |
496 | | - if match is None or match(str(entry_path), match_pos): |
497 | | - if dir_only: |
498 | | - yield from select_next(entry_path, exists=True) |
499 | | - else: |
500 | | - # Optimization: directly yield the path if this is |
501 | | - # last pattern part. |
502 | | - yield entry_path |
503 | | - if is_dir: |
504 | | - stack.append(entry_path) |
505 | | - |
506 | | - return select_recursive |
507 | | - |
508 | | - def select_exists(self, path, exists=False): |
509 | | - """Yields the given path, if it exists. |
510 | | - """ |
511 | | - if exists: |
512 | | - # Optimization: this path is already known to exist, e.g. because |
513 | | - # it was returned from os.scandir(), so we skip calling lstat(). |
514 | | - yield path |
515 | | - else: |
516 | | - try: |
517 | | - self.lstat(path) |
518 | | - yield path |
519 | | - except OSError: |
520 | | - pass |
521 | | - |
522 | | - @classmethod |
523 | | - def walk(cls, root, top_down, on_error, follow_symlinks): |
524 | | - """Walk the directory tree from the given root, similar to os.walk(). |
525 | | - """ |
526 | | - paths = [root] |
527 | | - while paths: |
528 | | - path = paths.pop() |
529 | | - if isinstance(path, tuple): |
530 | | - yield path |
531 | | - continue |
532 | | - try: |
533 | | - with cls.scandir(path) as scandir_it: |
534 | | - dirnames = [] |
535 | | - filenames = [] |
536 | | - if not top_down: |
537 | | - paths.append((path, dirnames, filenames)) |
538 | | - for entry in scandir_it: |
539 | | - name = entry.name |
540 | | - try: |
541 | | - if entry.is_dir(follow_symlinks=follow_symlinks): |
542 | | - if not top_down: |
543 | | - paths.append(cls.parse_entry(entry)) |
544 | | - dirnames.append(name) |
545 | | - else: |
546 | | - filenames.append(name) |
547 | | - except OSError: |
548 | | - filenames.append(name) |
549 | | - except OSError as error: |
550 | | - if on_error is not None: |
551 | | - on_error(error) |
552 | | - else: |
553 | | - if top_down: |
554 | | - yield path, dirnames, filenames |
555 | | - if dirnames: |
556 | | - prefix = cls.add_slash(path) |
557 | | - paths += [cls.concat_path(prefix, d) for d in reversed(dirnames)] |
0 commit comments