https://github.com/python/cpython/commit/68a51e0178e86be8b697683fd108aa795f235507
commit: 68a51e0178e86be8b697683fd108aa795f235507
branch: main
author: Barney Gale <[email protected]>
committer: barneygale <[email protected]>
date: 2024-11-01T17:48:58Z
summary:
GH-125413: pathlib ABCs: use `scandir()` to speed up `glob()` (#126261)
Use the new `PathBase.scandir()` method in `PathBase.glob()`, which greatly
reduces the number of `PathBase.stat()` calls needed when globbing.
There are no user-facing changes, because the pathlib ABCs are still
private and `Path.glob()` doesn't use the implementation in its superclass.
files:
M Lib/glob.py
M Lib/pathlib/_abc.py
M Lib/test/test_pathlib/test_pathlib_abc.py
diff --git a/Lib/glob.py b/Lib/glob.py
index 574e5ad51b601d..ce9b3698888dd9 100644
--- a/Lib/glob.py
+++ b/Lib/glob.py
@@ -364,12 +364,6 @@ def concat_path(path, text):
"""
raise NotImplementedError
- @staticmethod
- def parse_entry(entry):
- """Returns the path of an entry yielded from scandir().
- """
- raise NotImplementedError
-
# High-level methods
def compile(self, pat):
@@ -438,6 +432,7 @@ def select_wildcard(path, exists=False):
except OSError:
pass
else:
+ prefix = self.add_slash(path)
for entry in entries:
if match is None or match(entry.name):
if dir_only:
@@ -446,7 +441,7 @@ def select_wildcard(path, exists=False):
continue
except OSError:
continue
- entry_path = self.parse_entry(entry)
+ entry_path = self.concat_path(prefix, entry.name)
if dir_only:
yield from select_next(entry_path, exists=True)
else:
@@ -495,6 +490,7 @@ def select_recursive_step(stack, match_pos):
except OSError:
pass
else:
+ prefix = self.add_slash(path)
for entry in entries:
is_dir = False
try:
@@ -504,7 +500,7 @@ def select_recursive_step(stack, match_pos):
pass
if is_dir or not dir_only:
- entry_path = self.parse_entry(entry)
+ entry_path = self.concat_path(prefix, entry.name)
if match is None or match(str(entry_path), match_pos):
if dir_only:
yield from select_next(entry_path, exists=True)
@@ -533,7 +529,6 @@ class _StringGlobber(_GlobberBase):
"""
lexists = staticmethod(os.path.lexists)
scandir = staticmethod(os.scandir)
- parse_entry = operator.attrgetter('path')
concat_path = operator.add
if os.name == 'nt':
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py
index dfff8b460d1bf1..cc7c1991d0e528 100644
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -94,25 +94,13 @@ class PathGlobber(_GlobberBase):
lexists = operator.methodcaller('exists', follow_symlinks=False)
add_slash = operator.methodcaller('joinpath', '')
-
- @staticmethod
- def scandir(path):
- """Emulates os.scandir(), which returns an object that can be used as
- a context manager. This method is called by walk() and glob().
- """
- import contextlib
- return contextlib.nullcontext(path.iterdir())
+ scandir = operator.methodcaller('scandir')
@staticmethod
def concat_path(path, text):
"""Appends text to the given path."""
return path.with_segments(path._raw_path + text)
- @staticmethod
- def parse_entry(entry):
- """Returns the path of an entry yielded from scandir()."""
- return entry
-
class PurePathBase:
"""Base class for pure path objects.
diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py
b/Lib/test/test_pathlib/test_pathlib_abc.py
index 11e34f5d378a58..4596d0b0e26763 100644
--- a/Lib/test/test_pathlib/test_pathlib_abc.py
+++ b/Lib/test/test_pathlib/test_pathlib_abc.py
@@ -1633,8 +1633,10 @@ def setUp(self):
p.joinpath('linkA').symlink_to('fileA')
p.joinpath('brokenLink').symlink_to('non-existing')
p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
- p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
- p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
+ p.joinpath('dirA', 'linkC').symlink_to(
+ parser.join('..', 'dirB'), target_is_directory=True)
+ p.joinpath('dirB', 'linkD').symlink_to(
+ parser.join('..', 'dirB'), target_is_directory=True)
p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
def tearDown(self):
@@ -2479,7 +2481,7 @@ def test_glob_permissions(self):
if i % 2:
link.symlink_to(P(self.base, "dirE", "nonexistent"))
else:
- link.symlink_to(P(self.base, "dirC"))
+ link.symlink_to(P(self.base, "dirC"), target_is_directory=True)
self.assertEqual(len(set(base.glob("*"))), 100)
self.assertEqual(len(set(base.glob("*/"))), 50)
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]