https://github.com/python/cpython/commit/795f2597a4be988e2bb19b69ff9958e981cb894e
commit: 795f2597a4be988e2bb19b69ff9958e981cb894e
branch: 3.11
author: Jason R. Coombs <[email protected]>
committer: pablogsal <[email protected]>
date: 2024-08-20T00:28:20+01:00
summary:

[3.11] gh-122905: Sanitize names in zipfile.Path. (GH-122906) (#122925)

* gh-122905: Sanitize names in zipfile.Path. (#122906)

Ported from zipp 3.19.1; ref jaraco/zipp#119.

(cherry picked from commit 9cd03263100ddb1657826cc4a71470786cab3932)

* [3.11] gh-122905: Sanitize names in zipfile.Path. (GH-122906)

Ported from zipp 3.19.1; ref jaraco/zippGH-119.
(cherry picked from commit 9cd03263100ddb1657826cc4a71470786cab3932)

Co-authored-by: Jason R. Coombs <[email protected]>

files:
A Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst
M Lib/test/test_zipfile.py
M Lib/zipfile.py

diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 4de6f379a47891..8bdc7a1b7d656a 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -3651,6 +3651,23 @@ def test_extract_orig_with_implied_dirs(self, alpharep):
         zipfile.Path(zf)
         zf.extractall(source_path.parent)
 
+    def test_malformed_paths(self):
+        """
+        Path should handle malformed paths.
+        """
+        data = io.BytesIO()
+        zf = zipfile.ZipFile(data, "w")
+        zf.writestr("/one-slash.txt", b"content")
+        zf.writestr("//two-slash.txt", b"content")
+        zf.writestr("../parent.txt", b"content")
+        zf.filename = ''
+        root = zipfile.Path(zf)
+        assert list(map(str, root.iterdir())) == [
+            'one-slash.txt',
+            'two-slash.txt',
+            'parent.txt',
+        ]
+
 
 class EncodedMetadataTests(unittest.TestCase):
     file_names = ['\u4e00', '\u4e8c', '\u4e09']  # Han 'one', 'two', 'three'
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 86829abce4edaa..b7bf9ef7e37e60 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -9,6 +9,7 @@
 import itertools
 import os
 import posixpath
+import re
 import shutil
 import stat
 import struct
@@ -2243,7 +2244,65 @@ def _difference(minuend, subtrahend):
     return itertools.filterfalse(set(subtrahend).__contains__, minuend)
 
 
-class CompleteDirs(ZipFile):
+class SanitizedNames:
+    """
+    ZipFile mix-in to ensure names are sanitized.
+    """
+
+    def namelist(self):
+        return list(map(self._sanitize, super().namelist()))
+
+    @staticmethod
+    def _sanitize(name):
+        r"""
+        Ensure a relative path with posix separators and no dot names.
+        Modeled after
+        
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
+        but provides consistent cross-platform behavior.
+        >>> san = SanitizedNames._sanitize
+        >>> san('/foo/bar')
+        'foo/bar'
+        >>> san('//foo.txt')
+        'foo.txt'
+        >>> san('foo/.././bar.txt')
+        'foo/bar.txt'
+        >>> san('foo../.bar.txt')
+        'foo../.bar.txt'
+        >>> san('\\foo\\bar.txt')
+        'foo/bar.txt'
+        >>> san('D:\\foo.txt')
+        'D/foo.txt'
+        >>> san('\\\\server\\share\\file.txt')
+        'server/share/file.txt'
+        >>> san('\\\\?\\GLOBALROOT\\Volume3')
+        '?/GLOBALROOT/Volume3'
+        >>> san('\\\\.\\PhysicalDrive1\\root')
+        'PhysicalDrive1/root'
+        Retain any trailing slash.
+        >>> san('abc/')
+        'abc/'
+        Raises a ValueError if the result is empty.
+        >>> san('../..')
+        Traceback (most recent call last):
+        ...
+        ValueError: Empty filename
+        """
+
+        def allowed(part):
+            return part and part not in {'..', '.'}
+
+        # Remove the drive letter.
+        # Don't use ntpath.splitdrive, because that also strips UNC paths
+        bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
+        clean = bare.replace('\\', '/')
+        parts = clean.split('/')
+        joined = '/'.join(filter(allowed, parts))
+        if not joined:
+            raise ValueError("Empty filename")
+        return joined + '/' * name.endswith('/')
+
+
+class CompleteDirs(SanitizedNames, ZipFile):
     """
     A ZipFile subclass that ensures that implied directories
     are always included in the namelist.
diff --git 
a/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst 
b/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst
new file mode 100644
index 00000000000000..1be44c906c4f30
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-08-11-14-08-04.gh-issue-122905.7tDsxA.rst
@@ -0,0 +1 @@
+:class:`zipfile.Path` objects now sanitize names from the zipfile.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to