https://github.com/python/cpython/commit/a74f117dab369e6c54156c7b2256769fed0c23d0
commit: a74f117dab369e6c54156c7b2256769fed0c23d0
branch: main
author: Barney Gale <[email protected]>
committer: barneygale <[email protected]>
date: 2024-04-14T00:08:03+01:00
summary:

GH-115060: Speed up `pathlib.Path.glob()` by omitting initial `stat()` (#117831)

Since 6258844c, paths that might not exist can be fed into pathlib's
globbing implementation, which will call `os.scandir()` / `os.lstat()` only
when strictly necessary. This allows us to drop an initial `self.is_dir()`
call, which saves a `stat()`.

Co-authored-by: Shantanu <[email protected]>

files:
A Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst
M Doc/library/pathlib.rst
M Lib/pathlib/__init__.py
M Lib/pathlib/_abc.py
M Lib/test/test_pathlib/test_pathlib.py
M Lib/test/test_pathlib/test_pathlib_abc.py

diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index f4ed479401f65c..2e18e41869376e 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -1004,10 +1004,6 @@ call fails (for example because the path doesn't exist).
    .. seealso::
       :ref:`pathlib-pattern-language` documentation.
 
-   This method calls :meth:`Path.is_dir` on the top-level directory and
-   propagates any :exc:`OSError` exception that is raised. Subsequent
-   :exc:`OSError` exceptions from scanning directories are suppressed.
-
    By default, or when the *case_sensitive* keyword-only argument is set to
    ``None``, this method matches paths using platform-specific casing rules:
    typically, case-sensitive on POSIX, and case-insensitive on Windows.
@@ -1028,6 +1024,11 @@ call fails (for example because the path doesn't exist).
    .. versionchanged:: 3.13
       The *pattern* parameter accepts a :term:`path-like object`.
 
+   .. versionchanged:: 3.13
+      Any :exc:`OSError` exceptions raised from scanning the filesystem are
+      suppressed. In previous versions, such exceptions are suppressed in many
+      cases, but not all.
+
 
 .. method:: Path.rglob(pattern, *, case_sensitive=None, recurse_symlinks=False)
 
diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py
index 66eb08a45b1bb3..a4721fbe813962 100644
--- a/Lib/pathlib/__init__.py
+++ b/Lib/pathlib/__init__.py
@@ -607,11 +607,9 @@ def glob(self, pattern, *, case_sensitive=None, 
recurse_symlinks=False):
         if raw[-1] in (self.parser.sep, self.parser.altsep):
             # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
             parts.append('')
-        if not self.is_dir():
-            return iter([])
         select = self._glob_selector(parts[::-1], case_sensitive, 
recurse_symlinks)
         root = str(self)
-        paths = select(root, exists=True)
+        paths = select(root)
 
         # Normalize results
         if root == '.':
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py
index b51ad6f46d292a..05698d5de24afb 100644
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -705,10 +705,8 @@ def glob(self, pattern, *, case_sensitive=None, 
recurse_symlinks=True):
         anchor, parts = pattern._stack
         if anchor:
             raise NotImplementedError("Non-relative patterns are unsupported")
-        if not self.is_dir():
-            return iter([])
         select = self._glob_selector(parts, case_sensitive, recurse_symlinks)
-        return select(self, exists=True)
+        return select(self)
 
     def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
         """Recursively yield all existing files (of any kind, including
diff --git a/Lib/test/test_pathlib/test_pathlib.py 
b/Lib/test/test_pathlib/test_pathlib.py
index 651d66656cbd61..5fd1a41cbee17b 100644
--- a/Lib/test/test_pathlib/test_pathlib.py
+++ b/Lib/test/test_pathlib/test_pathlib.py
@@ -1263,6 +1263,13 @@ def test_glob_dot(self):
             self.assertEqual(
                 set(P('.').glob('**/*/*')), {P("dirD/fileD")})
 
+    def test_glob_inaccessible(self):
+        P = self.cls
+        p = P(self.base, "mydir1", "mydir2")
+        p.mkdir(parents=True)
+        p.parent.chmod(0)
+        self.assertEqual(set(p.glob('*')), set())
+
     def test_rglob_pathlike(self):
         P = self.cls
         p = P(self.base, "dirC")
diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py 
b/Lib/test/test_pathlib/test_pathlib_abc.py
index 6656b032cde28e..aadecbc142cca6 100644
--- a/Lib/test/test_pathlib/test_pathlib_abc.py
+++ b/Lib/test/test_pathlib/test_pathlib_abc.py
@@ -8,6 +8,7 @@
 from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, 
PathBase
 import posixpath
 
+from test.support import is_wasi
 from test.support.os_helper import TESTFN
 
 
@@ -1920,6 +1921,8 @@ def test_rglob_symlink_loop(self):
                   }
         self.assertEqual(given, {p / x for x in expect})
 
+    # See https://github.com/WebAssembly/wasi-filesystem/issues/26
+    @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match 
POSIX")
     def test_glob_dotdot(self):
         # ".." is not special in globs.
         P = self.cls
diff --git 
a/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst 
b/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst
new file mode 100644
index 00000000000000..50b374acb90ad0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst
@@ -0,0 +1,3 @@
+Speed up :meth:`pathlib.Path.glob` by omitting an initial
+:meth:`~pathlib.Path.is_dir` call. As a result of this change,
+:meth:`~pathlib.Path.glob` can no longer raise :exc:`OSError`.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to