https://github.com/python/cpython/commit/260843df1bd8a28596b9a377d8266e2547f7eedc
commit: 260843df1bd8a28596b9a377d8266e2547f7eedc
branch: main
author: Barney Gale <[email protected]>
committer: barneygale <[email protected]>
date: 2024-11-01T01:19:01Z
summary:
GH-125413: Add `pathlib.Path.scandir()` method (#126060)
Add `pathlib.Path.scandir()` as a trivial wrapper of `os.scandir()`. This
will be used to implement several `PathBase` methods more efficiently,
including methods that provide `Path.copy()`.
files:
A Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst
M Doc/library/pathlib.rst
M Doc/whatsnew/3.14.rst
M Lib/pathlib/_abc.py
M Lib/pathlib/_local.py
M Lib/test/test_pathlib/test_pathlib_abc.py
diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index 4380122eb1be7d..b6fb36554f7cec 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -1289,6 +1289,35 @@ Reading directories
raised.
+.. method:: Path.scandir()
+
+ When the path points to a directory, return an iterator of
+ :class:`os.DirEntry` objects corresponding to entries in the directory. The
+ returned iterator supports the :term:`context manager` protocol. It is
+ implemented using :func:`os.scandir` and gives the same guarantees.
+
+ Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can
+ significantly increase the performance of code that also needs file type or
+ file attribute information, because :class:`os.DirEntry` objects expose
+ this information if the operating system provides it when scanning a
+ directory.
+
+ The following example displays the names of subdirectories. The
+ ``entry.is_dir()`` check will generally not make an additional system call::
+
+ >>> p = Path('docs')
+ >>> with p.scandir() as entries:
+ ... for entry in entries:
+ ... if entry.is_dir():
+ ... entry.name
+ ...
+ '_templates'
+ '_build'
+ '_static'
+
+ .. versionadded:: 3.14
+
+
.. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)
Glob the given relative *pattern* in the directory represented by this path,
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 7f9e3107a6e1a0..48314f9c98c036 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -380,6 +380,12 @@ pathlib
(Contributed by Barney Gale in :gh:`73991`.)
+* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator
+ of :class:`os.DirEntry` objects. This is exactly equivalent to calling
+ :func:`os.scandir` on a path object.
+
+ (Contributed by Barney Gale in :gh:`125413`.)
+
pdb
---
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py
index 11c8018b28f26b..dfff8b460d1bf1 100644
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -639,13 +639,23 @@ def write_text(self, data, encoding=None, errors=None,
newline=None):
with self.open(mode='w', encoding=encoding, errors=errors,
newline=newline) as f:
return f.write(data)
+ def scandir(self):
+ """Yield os.DirEntry objects of the directory contents.
+
+ The children are yielded in arbitrary order, and the
+ special entries '.' and '..' are not included.
+ """
+ raise UnsupportedOperation(self._unsupported_msg('scandir()'))
+
def iterdir(self):
"""Yield path objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
- raise UnsupportedOperation(self._unsupported_msg('iterdir()'))
+ with self.scandir() as entries:
+ names = [entry.name for entry in entries]
+ return map(self.joinpath, names)
def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
if case_sensitive is None:
diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py
index a78997179820b1..ef072b83d96904 100644
--- a/Lib/pathlib/_local.py
+++ b/Lib/pathlib/_local.py
@@ -615,6 +615,14 @@ def _filter_trailing_slash(self, paths):
path_str = path_str[:-1]
yield path_str
+ def scandir(self):
+ """Yield os.DirEntry objects of the directory contents.
+
+ The children are yielded in arbitrary order, and the
+ special entries '.' and '..' are not included.
+ """
+ return os.scandir(self)
+
def iterdir(self):
"""Yield path objects of the directory contents.
diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py
b/Lib/test/test_pathlib/test_pathlib_abc.py
index 08355a71453807..11e34f5d378a58 100644
--- a/Lib/test/test_pathlib/test_pathlib_abc.py
+++ b/Lib/test/test_pathlib/test_pathlib_abc.py
@@ -1,4 +1,5 @@
import collections
+import contextlib
import io
import os
import errno
@@ -1424,6 +1425,24 @@ def close(self):
'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime
st_ctime')
+class DummyDirEntry:
+ """
+ Minimal os.DirEntry-like object. Returned from DummyPath.scandir().
+ """
+ __slots__ = ('name', '_is_symlink', '_is_dir')
+
+ def __init__(self, name, is_symlink, is_dir):
+ self.name = name
+ self._is_symlink = is_symlink
+ self._is_dir = is_dir
+
+ def is_symlink(self):
+ return self._is_symlink
+
+ def is_dir(self, *, follow_symlinks=True):
+ return self._is_dir and (follow_symlinks or not self._is_symlink)
+
+
class DummyPath(PathBase):
"""
Simple implementation of PathBase that keeps files and directories in
@@ -1491,14 +1510,25 @@ def open(self, mode='r', buffering=-1, encoding=None,
stream = io.TextIOWrapper(stream, encoding=encoding,
errors=errors, newline=newline)
return stream
- def iterdir(self):
- path = str(self.resolve())
- if path in self._files:
- raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path)
- elif path in self._directories:
- return iter([self / name for name in self._directories[path]])
+ @contextlib.contextmanager
+ def scandir(self):
+ path = self.resolve()
+ path_str = str(path)
+ if path_str in self._files:
+ raise NotADirectoryError(errno.ENOTDIR, "Not a directory",
path_str)
+ elif path_str in self._directories:
+ yield iter([path.joinpath(name)._dir_entry for name in
self._directories[path_str]])
else:
- raise FileNotFoundError(errno.ENOENT, "File not found", path)
+ raise FileNotFoundError(errno.ENOENT, "File not found", path_str)
+
+ @property
+ def _dir_entry(self):
+ path_str = str(self)
+ is_symlink = path_str in self._symlinks
+ is_directory = (path_str in self._directories
+ if not is_symlink
+ else self._symlinks[path_str][1])
+ return DummyDirEntry(self.name, is_symlink, is_directory)
def mkdir(self, mode=0o777, parents=False, exist_ok=False):
path = str(self.parent.resolve() / self.name)
@@ -1602,7 +1632,7 @@ def setUp(self):
if self.can_symlink:
p.joinpath('linkA').symlink_to('fileA')
p.joinpath('brokenLink').symlink_to('non-existing')
- p.joinpath('linkB').symlink_to('dirB')
+ p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
@@ -2187,6 +2217,23 @@ def test_iterdir_nodir(self):
self.assertIn(cm.exception.errno, (errno.ENOTDIR,
errno.ENOENT, errno.EINVAL))
+ def test_scandir(self):
+ p = self.cls(self.base)
+ with p.scandir() as entries:
+ self.assertTrue(list(entries))
+ with p.scandir() as entries:
+ for entry in entries:
+ child = p / entry.name
+ self.assertIsNotNone(entry)
+ self.assertEqual(entry.name, child.name)
+ self.assertEqual(entry.is_symlink(),
+ child.is_symlink())
+ self.assertEqual(entry.is_dir(follow_symlinks=False),
+ child.is_dir(follow_symlinks=False))
+ if entry.name != 'brokenLinkLoop':
+ self.assertEqual(entry.is_dir(), child.is_dir())
+
+
def test_glob_common(self):
def _check(glob, expected):
self.assertEqual(set(glob), { P(self.base, q) for q in expected })
@@ -3038,7 +3085,7 @@ class DummyPathWithSymlinks(DummyPath):
def readlink(self):
path = str(self.parent.resolve() / self.name)
if path in self._symlinks:
- return self.with_segments(self._symlinks[path])
+ return self.with_segments(self._symlinks[path][0])
elif path in self._files or path in self._directories:
raise OSError(errno.EINVAL, "Not a symlink", path)
else:
@@ -3050,7 +3097,7 @@ def symlink_to(self, target, target_is_directory=False):
if path in self._symlinks:
raise FileExistsError(errno.EEXIST, "File exists", path)
self._directories[parent].add(self.name)
- self._symlinks[path] = str(target)
+ self._symlinks[path] = str(target), target_is_directory
class DummyPathWithSymlinksTest(DummyPathTest):
diff --git
a/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst
b/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst
new file mode 100644
index 00000000000000..ddf1f9725d9695
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst
@@ -0,0 +1,3 @@
+Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory
+children and their file attributes. This is a trivial wrapper of
+:func:`os.scandir`.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]