https://github.com/python/cpython/commit/68a51e0178e86be8b697683fd108aa795f235507
commit: 68a51e0178e86be8b697683fd108aa795f235507
branch: main
author: Barney Gale <[email protected]>
committer: barneygale <[email protected]>
date: 2024-11-01T17:48:58Z
summary:

GH-125413: pathlib ABCs: use `scandir()` to speed up `glob()` (#126261)

Use the new `PathBase.scandir()` method in `PathBase.glob()`, which greatly
reduces the number of `PathBase.stat()` calls needed when globbing.

There are no user-facing changes, because the pathlib ABCs are still
private and `Path.glob()` doesn't use the implementation in its superclass.

files:
M Lib/glob.py
M Lib/pathlib/_abc.py
M Lib/test/test_pathlib/test_pathlib_abc.py

diff --git a/Lib/glob.py b/Lib/glob.py
index 574e5ad51b601d..ce9b3698888dd9 100644
--- a/Lib/glob.py
+++ b/Lib/glob.py
@@ -364,12 +364,6 @@ def concat_path(path, text):
         """
         raise NotImplementedError
 
-    @staticmethod
-    def parse_entry(entry):
-        """Returns the path of an entry yielded from scandir().
-        """
-        raise NotImplementedError
-
     # High-level methods
 
     def compile(self, pat):
@@ -438,6 +432,7 @@ def select_wildcard(path, exists=False):
             except OSError:
                 pass
             else:
+                prefix = self.add_slash(path)
                 for entry in entries:
                     if match is None or match(entry.name):
                         if dir_only:
@@ -446,7 +441,7 @@ def select_wildcard(path, exists=False):
                                     continue
                             except OSError:
                                 continue
-                        entry_path = self.parse_entry(entry)
+                        entry_path = self.concat_path(prefix, entry.name)
                         if dir_only:
                             yield from select_next(entry_path, exists=True)
                         else:
@@ -495,6 +490,7 @@ def select_recursive_step(stack, match_pos):
             except OSError:
                 pass
             else:
+                prefix = self.add_slash(path)
                 for entry in entries:
                     is_dir = False
                     try:
@@ -504,7 +500,7 @@ def select_recursive_step(stack, match_pos):
                         pass
 
                     if is_dir or not dir_only:
-                        entry_path = self.parse_entry(entry)
+                        entry_path = self.concat_path(prefix, entry.name)
                         if match is None or match(str(entry_path), match_pos):
                             if dir_only:
                                 yield from select_next(entry_path, exists=True)
@@ -533,7 +529,6 @@ class _StringGlobber(_GlobberBase):
     """
     lexists = staticmethod(os.path.lexists)
     scandir = staticmethod(os.scandir)
-    parse_entry = operator.attrgetter('path')
     concat_path = operator.add
 
     if os.name == 'nt':
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py
index dfff8b460d1bf1..cc7c1991d0e528 100644
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -94,25 +94,13 @@ class PathGlobber(_GlobberBase):
 
     lexists = operator.methodcaller('exists', follow_symlinks=False)
     add_slash = operator.methodcaller('joinpath', '')
-
-    @staticmethod
-    def scandir(path):
-        """Emulates os.scandir(), which returns an object that can be used as
-        a context manager. This method is called by walk() and glob().
-        """
-        import contextlib
-        return contextlib.nullcontext(path.iterdir())
+    scandir = operator.methodcaller('scandir')
 
     @staticmethod
     def concat_path(path, text):
         """Appends text to the given path."""
         return path.with_segments(path._raw_path + text)
 
-    @staticmethod
-    def parse_entry(entry):
-        """Returns the path of an entry yielded from scandir()."""
-        return entry
-
 
 class PurePathBase:
     """Base class for pure path objects.
diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py 
b/Lib/test/test_pathlib/test_pathlib_abc.py
index 11e34f5d378a58..4596d0b0e26763 100644
--- a/Lib/test/test_pathlib/test_pathlib_abc.py
+++ b/Lib/test/test_pathlib/test_pathlib_abc.py
@@ -1633,8 +1633,10 @@ def setUp(self):
             p.joinpath('linkA').symlink_to('fileA')
             p.joinpath('brokenLink').symlink_to('non-existing')
             p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
-            p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
-            p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
+            p.joinpath('dirA', 'linkC').symlink_to(
+                parser.join('..', 'dirB'), target_is_directory=True)
+            p.joinpath('dirB', 'linkD').symlink_to(
+                parser.join('..', 'dirB'), target_is_directory=True)
             p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
 
     def tearDown(self):
@@ -2479,7 +2481,7 @@ def test_glob_permissions(self):
             if i % 2:
                 link.symlink_to(P(self.base, "dirE", "nonexistent"))
             else:
-                link.symlink_to(P(self.base, "dirC"))
+                link.symlink_to(P(self.base, "dirC"), target_is_directory=True)
 
         self.assertEqual(len(set(base.glob("*"))), 100)
         self.assertEqual(len(set(base.glob("*/"))), 50)

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to