https://github.com/python/cpython/commit/b69548a0f52418b8a2cf7c7a885fdd7d3bfb1b0b
commit: b69548a0f52418b8a2cf7c7a885fdd7d3bfb1b0b
branch: main
author: Barney Gale <[email protected]>
committer: barneygale <[email protected]>
date: 2024-01-26T01:12:46Z
summary:

GH-73435: Add `pathlib.PurePath.full_match()` (#114350)

In 49f90ba we added support for the recursive wildcard `**` in
`pathlib.PurePath.match()`. This should allow arbitrary prefix and suffix
matching, like `p.match('foo/**')` or `p.match('**/foo')`, but there's a
problem: for relative patterns only, `match()` implicitly inserts a `**`
token on the left hand side, causing all patterns to match from the right.
As a result, it's impossible to match relative patterns from the left:
`PurePath('foo/bar').match('bar/**')` is true!

This commit reverts the changes to `match()`, and instead adds a new
`full_match()` method that:

- Allows empty patterns
- Supports the recursive wildcard `**`
- Matches the *entire* path when given a relative pattern

files:
M Doc/library/glob.rst
M Doc/library/pathlib.rst
M Doc/whatsnew/3.13.rst
M Lib/pathlib/__init__.py
M Lib/pathlib/_abc.py
M Lib/test/test_pathlib/test_pathlib_abc.py

diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst
index 6e4f72c19ff4c9..19a0bbba8966ba 100644
--- a/Doc/library/glob.rst
+++ b/Doc/library/glob.rst
@@ -147,8 +147,9 @@ The :mod:`glob` module defines the following functions:
 
    .. seealso::
 
-     :meth:`pathlib.PurePath.match` and :meth:`pathlib.Path.glob` methods,
-     which call this function to implement pattern matching and globbing.
+     :meth:`pathlib.PurePath.full_match` and :meth:`pathlib.Path.glob`
+     methods, which call this function to implement pattern matching and
+     globbing.
 
    .. versionadded:: 3.13
 
diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index fcbc0bf489b344..2f4ff4efec47f8 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -559,55 +559,55 @@ Pure paths provide the following methods and properties:
       PureWindowsPath('c:/Program Files')
 
 
-.. method:: PurePath.match(pattern, *, case_sensitive=None)
+.. method:: PurePath.full_match(pattern, *, case_sensitive=None)
 
    Match this path against the provided glob-style pattern.  Return ``True``
-   if matching is successful, ``False`` otherwise.
-
-   If *pattern* is relative, the path can be either relative or absolute,
-   and matching is done from the right::
+   if matching is successful, ``False`` otherwise.  For example::
 
-      >>> PurePath('a/b.py').match('*.py')
-      True
-      >>> PurePath('/a/b/c.py').match('b/*.py')
+      >>> PurePath('a/b.py').full_match('a/*.py')
       True
-      >>> PurePath('/a/b/c.py').match('a/*.py')
+      >>> PurePath('a/b.py').full_match('*.py')
       False
+      >>> PurePath('/a/b/c.py').full_match('/a/**')
+      True
+      >>> PurePath('/a/b/c.py').full_match('**/*.py')
+      True
 
-   If *pattern* is absolute, the path must be absolute, and the whole path
-   must match::
+   As with other methods, case-sensitivity follows platform defaults::
 
-      >>> PurePath('/a.py').match('/*.py')
-      True
-      >>> PurePath('a/b.py').match('/*.py')
+      >>> PurePosixPath('b.py').full_match('*.PY')
       False
+      >>> PureWindowsPath('b.py').full_match('*.PY')
+      True
 
-   The *pattern* may be another path object; this speeds up matching the same
-   pattern against multiple files::
+   Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
 
-      >>> pattern = PurePath('*.py')
-      >>> PurePath('a/b.py').match(pattern)
-      True
+   .. versionadded:: 3.13
 
-   .. versionchanged:: 3.12
-      Accepts an object implementing the :class:`os.PathLike` interface.
 
-   As with other methods, case-sensitivity follows platform defaults::
+.. method:: PurePath.match(pattern, *, case_sensitive=None)
 
-      >>> PurePosixPath('b.py').match('*.PY')
-      False
-      >>> PureWindowsPath('b.py').match('*.PY')
+   Match this path against the provided non-recursive glob-style pattern.
+   Return ``True`` if matching is successful, ``False`` otherwise.
+
+   This method is similar to :meth:`~PurePath.full_match`, but empty patterns
+   aren't allowed (:exc:`ValueError` is raised), the recursive wildcard
+   "``**``" isn't supported (it acts like non-recursive "``*``"), and if a
+   relative pattern is provided, then matching is done from the right::
+
+      >>> PurePath('a/b.py').match('*.py')
+      True
+      >>> PurePath('/a/b/c.py').match('b/*.py')
       True
+      >>> PurePath('/a/b/c.py').match('a/*.py')
+      False
 
-   Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
+   .. versionchanged:: 3.12
+      The *pattern* parameter accepts a :term:`path-like object`.
 
    .. versionchanged:: 3.12
       The *case_sensitive* parameter was added.
 
-   .. versionchanged:: 3.13
-      Support for the recursive wildcard "``**``" was added. In previous
-      versions, it acted like the non-recursive wildcard "``*``".
-
 
 .. method:: PurePath.relative_to(other, walk_up=False)
 
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 40f0cd37fe9318..8c2bb05920d5b6 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -336,7 +336,8 @@ pathlib
   object from a 'file' URI (``file:/``).
   (Contributed by Barney Gale in :gh:`107465`.)
 
-* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
+* Add :meth:`pathlib.PurePath.full_match` for matching paths with
+  shell-style wildcards, including the recursive wildcard "``**``".
   (Contributed by Barney Gale in :gh:`73435`.)
 
 * Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob`,
diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py
index b043aed12b3849..eee82ef26bc7e7 100644
--- a/Lib/pathlib/__init__.py
+++ b/Lib/pathlib/__init__.py
@@ -490,6 +490,13 @@ def _pattern_stack(self):
         parts.reverse()
         return parts
 
+    @property
+    def _pattern_str(self):
+        """The path expressed as a string, for use in pattern-matching."""
+        # The string representation of an empty path is a single dot ('.'). 
Empty
+        # paths shouldn't match wildcards, so we change it to the empty string.
+        path_str = str(self)
+        return '' if path_str == '.' else path_str
 
 # Subclassing os.PathLike makes isinstance() checks slower,
 # which in turn makes Path construction slower. Register instead!
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py
index 553e1a399061d3..6303a18680befc 100644
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -47,8 +47,8 @@ def _is_case_sensitive(pathmod):
 re = glob = None
 
 
[email protected]_cache(maxsize=256)
-def _compile_pattern(pat, sep, case_sensitive):
[email protected]_cache(maxsize=512)
+def _compile_pattern(pat, sep, case_sensitive, recursive=True):
     """Compile given glob pattern to a re.Pattern object (observing case
     sensitivity)."""
     global re, glob
@@ -56,10 +56,7 @@ def _compile_pattern(pat, sep, case_sensitive):
         import re, glob
 
     flags = re.NOFLAG if case_sensitive else re.IGNORECASE
-    regex = glob.translate(pat, recursive=True, include_hidden=True, seps=sep)
-    # The string representation of an empty path is a single dot ('.'). Empty
-    # paths shouldn't match wildcards, so we consume it with an atomic group.
-    regex = r'(\.\Z)?+' + regex
+    regex = glob.translate(pat, recursive=recursive, include_hidden=True, 
seps=sep)
     return re.compile(regex, flags=flags).match
 
 
@@ -441,23 +438,48 @@ def _pattern_stack(self):
             raise NotImplementedError("Non-relative patterns are unsupported")
         return parts
 
+    @property
+    def _pattern_str(self):
+        """The path expressed as a string, for use in pattern-matching."""
+        return str(self)
+
     def match(self, path_pattern, *, case_sensitive=None):
         """
-        Return True if this path matches the given pattern.
+        Return True if this path matches the given pattern. If the pattern is
+        relative, matching is done from the right; otherwise, the entire path
+        is matched. The recursive wildcard '**' is *not* supported by this
+        method.
         """
         if not isinstance(path_pattern, PurePathBase):
             path_pattern = self.with_segments(path_pattern)
         if case_sensitive is None:
             case_sensitive = _is_case_sensitive(self.pathmod)
         sep = path_pattern.pathmod.sep
-        if path_pattern.anchor:
-            pattern_str = str(path_pattern)
-        elif path_pattern.parts:
-            pattern_str = str('**' / path_pattern)
-        else:
+        path_parts = self.parts[::-1]
+        pattern_parts = path_pattern.parts[::-1]
+        if not pattern_parts:
             raise ValueError("empty pattern")
-        match = _compile_pattern(pattern_str, sep, case_sensitive)
-        return match(str(self)) is not None
+        if len(path_parts) < len(pattern_parts):
+            return False
+        if len(path_parts) > len(pattern_parts) and path_pattern.anchor:
+            return False
+        for path_part, pattern_part in zip(path_parts, pattern_parts):
+            match = _compile_pattern(pattern_part, sep, case_sensitive, 
recursive=False)
+            if match(path_part) is None:
+                return False
+        return True
+
+    def full_match(self, pattern, *, case_sensitive=None):
+        """
+        Return True if this path matches the given glob-style pattern. The
+        pattern is matched against the entire path.
+        """
+        if not isinstance(pattern, PurePathBase):
+            pattern = self.with_segments(pattern)
+        if case_sensitive is None:
+            case_sensitive = _is_case_sensitive(self.pathmod)
+        match = _compile_pattern(pattern._pattern_str, pattern.pathmod.sep, 
case_sensitive)
+        return match(self._pattern_str) is not None
 
 
 
@@ -781,8 +803,8 @@ def glob(self, pattern, *, case_sensitive=None, 
follow_symlinks=None):
         if filter_paths:
             # Filter out paths that don't match pattern.
             prefix_len = len(str(self._make_child_relpath('_'))) - 1
-            match = _compile_pattern(str(pattern), sep, case_sensitive)
-            paths = (path for path in paths if match(str(path), prefix_len))
+            match = _compile_pattern(pattern._pattern_str, sep, case_sensitive)
+            paths = (path for path in paths if match(path._pattern_str, 
prefix_len))
         return paths
 
     def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py 
b/Lib/test/test_pathlib/test_pathlib_abc.py
index 199718a8a69c5a..364f776dbb1413 100644
--- a/Lib/test/test_pathlib/test_pathlib_abc.py
+++ b/Lib/test/test_pathlib/test_pathlib_abc.py
@@ -249,29 +249,8 @@ def test_match_common(self):
         self.assertFalse(P('/ab.py').match('/a/*.py'))
         self.assertFalse(P('/a/b/c.py').match('/a/*.py'))
         # Multi-part glob-style pattern.
-        self.assertTrue(P('a').match('**'))
-        self.assertTrue(P('c.py').match('**'))
-        self.assertTrue(P('a/b/c.py').match('**'))
-        self.assertTrue(P('/a/b/c.py').match('**'))
-        self.assertTrue(P('/a/b/c.py').match('/**'))
-        self.assertTrue(P('/a/b/c.py').match('/a/**'))
-        self.assertTrue(P('/a/b/c.py').match('**/*.py'))
-        self.assertTrue(P('/a/b/c.py').match('/**/*.py'))
+        self.assertFalse(P('/a/b/c.py').match('/**/*.py'))
         self.assertTrue(P('/a/b/c.py').match('/a/**/*.py'))
-        self.assertTrue(P('/a/b/c.py').match('/a/b/**/*.py'))
-        self.assertTrue(P('/a/b/c.py').match('/**/**/**/**/*.py'))
-        self.assertFalse(P('c.py').match('**/a.py'))
-        self.assertFalse(P('c.py').match('c/**'))
-        self.assertFalse(P('a/b/c.py').match('**/a'))
-        self.assertFalse(P('a/b/c.py').match('**/a/b'))
-        self.assertFalse(P('a/b/c.py').match('**/a/b/c'))
-        self.assertFalse(P('a/b/c.py').match('**/a/b/c.'))
-        self.assertFalse(P('a/b/c.py').match('**/a/b/c./**'))
-        self.assertFalse(P('a/b/c.py').match('**/a/b/c./**'))
-        self.assertFalse(P('a/b/c.py').match('/a/b/c.py/**'))
-        self.assertFalse(P('a/b/c.py').match('/**/a/b/c.py'))
-        self.assertRaises(ValueError, P('a').match, '**a/b/c')
-        self.assertRaises(ValueError, P('a').match, 'a/b/c**')
         # Case-sensitive flag
         self.assertFalse(P('A.py').match('a.PY', case_sensitive=True))
         self.assertTrue(P('A.py').match('a.PY', case_sensitive=False))
@@ -279,9 +258,82 @@ def test_match_common(self):
         self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', 
case_sensitive=False))
         # Matching against empty path
         self.assertFalse(P('').match('*'))
-        self.assertTrue(P('').match('**'))
+        self.assertFalse(P('').match('**'))
         self.assertFalse(P('').match('**/*'))
 
+    def test_full_match_common(self):
+        P = self.cls
+        # Simple relative pattern.
+        self.assertTrue(P('b.py').full_match('b.py'))
+        self.assertFalse(P('a/b.py').full_match('b.py'))
+        self.assertFalse(P('/a/b.py').full_match('b.py'))
+        self.assertFalse(P('a.py').full_match('b.py'))
+        self.assertFalse(P('b/py').full_match('b.py'))
+        self.assertFalse(P('/a.py').full_match('b.py'))
+        self.assertFalse(P('b.py/c').full_match('b.py'))
+        # Wildcard relative pattern.
+        self.assertTrue(P('b.py').full_match('*.py'))
+        self.assertFalse(P('a/b.py').full_match('*.py'))
+        self.assertFalse(P('/a/b.py').full_match('*.py'))
+        self.assertFalse(P('b.pyc').full_match('*.py'))
+        self.assertFalse(P('b./py').full_match('*.py'))
+        self.assertFalse(P('b.py/c').full_match('*.py'))
+        # Multi-part relative pattern.
+        self.assertTrue(P('ab/c.py').full_match('a*/*.py'))
+        self.assertFalse(P('/d/ab/c.py').full_match('a*/*.py'))
+        self.assertFalse(P('a.py').full_match('a*/*.py'))
+        self.assertFalse(P('/dab/c.py').full_match('a*/*.py'))
+        self.assertFalse(P('ab/c.py/d').full_match('a*/*.py'))
+        # Absolute pattern.
+        self.assertTrue(P('/b.py').full_match('/*.py'))
+        self.assertFalse(P('b.py').full_match('/*.py'))
+        self.assertFalse(P('a/b.py').full_match('/*.py'))
+        self.assertFalse(P('/a/b.py').full_match('/*.py'))
+        # Multi-part absolute pattern.
+        self.assertTrue(P('/a/b.py').full_match('/a/*.py'))
+        self.assertFalse(P('/ab.py').full_match('/a/*.py'))
+        self.assertFalse(P('/a/b/c.py').full_match('/a/*.py'))
+        # Multi-part glob-style pattern.
+        self.assertTrue(P('a').full_match('**'))
+        self.assertTrue(P('c.py').full_match('**'))
+        self.assertTrue(P('a/b/c.py').full_match('**'))
+        self.assertTrue(P('/a/b/c.py').full_match('**'))
+        self.assertTrue(P('/a/b/c.py').full_match('/**'))
+        self.assertTrue(P('/a/b/c.py').full_match('/a/**'))
+        self.assertTrue(P('/a/b/c.py').full_match('**/*.py'))
+        self.assertTrue(P('/a/b/c.py').full_match('/**/*.py'))
+        self.assertTrue(P('/a/b/c.py').full_match('/a/**/*.py'))
+        self.assertTrue(P('/a/b/c.py').full_match('/a/b/**/*.py'))
+        self.assertTrue(P('/a/b/c.py').full_match('/**/**/**/**/*.py'))
+        self.assertFalse(P('c.py').full_match('**/a.py'))
+        self.assertFalse(P('c.py').full_match('c/**'))
+        self.assertFalse(P('a/b/c.py').full_match('**/a'))
+        self.assertFalse(P('a/b/c.py').full_match('**/a/b'))
+        self.assertFalse(P('a/b/c.py').full_match('**/a/b/c'))
+        self.assertFalse(P('a/b/c.py').full_match('**/a/b/c.'))
+        self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**'))
+        self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**'))
+        self.assertFalse(P('a/b/c.py').full_match('/a/b/c.py/**'))
+        self.assertFalse(P('a/b/c.py').full_match('/**/a/b/c.py'))
+        self.assertRaises(ValueError, P('a').full_match, '**a/b/c')
+        self.assertRaises(ValueError, P('a').full_match, 'a/b/c**')
+        # Case-sensitive flag
+        self.assertFalse(P('A.py').full_match('a.PY', case_sensitive=True))
+        self.assertTrue(P('A.py').full_match('a.PY', case_sensitive=False))
+        self.assertFalse(P('c:/a/B.Py').full_match('C:/A/*.pY', 
case_sensitive=True))
+        self.assertTrue(P('/a/b/c.py').full_match('/A/*/*.Py', 
case_sensitive=False))
+        # Matching against empty path
+        self.assertFalse(P('').full_match('*'))
+        self.assertTrue(P('').full_match('**'))
+        self.assertFalse(P('').full_match('**/*'))
+        # Matching with empty pattern
+        self.assertTrue(P('').full_match(''))
+        self.assertTrue(P('.').full_match('.'))
+        self.assertFalse(P('/').full_match(''))
+        self.assertFalse(P('/').full_match('.'))
+        self.assertFalse(P('foo').full_match(''))
+        self.assertFalse(P('foo').full_match('.'))
+
     def test_parts_common(self):
         # `parts` returns a tuple.
         sep = self.sep

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to