https://github.com/python/cpython/commit/a2ba0a7552580f616f74091f8976410f8a310313
commit: a2ba0a7552580f616f74091f8976410f8a310313
branch: main
author: Gregory P. Smith <[email protected]>
committer: gpshead <[email protected]>
date: 2025-09-01T17:14:23-07:00
summary:
gh-61206: Support zstandard compression in the zipimport module (GH-138254)
* gh-61206: support zstd in zipimport
* NEWS entry
* versionchanged doc
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-08-30-00-55-35.gh-issue-61206.HeFLvl.rst
M Doc/library/zipimport.rst
M Lib/test/test_zipimport.py
M Lib/zipimport.py
diff --git a/Doc/library/zipimport.rst b/Doc/library/zipimport.rst
index 851ef1128dde62..765e5cfd3bbdd6 100644
--- a/Doc/library/zipimport.rst
+++ b/Doc/library/zipimport.rst
@@ -30,6 +30,9 @@ Any files may be present in the ZIP archive, but importers
are only invoked for
corresponding :file:`.pyc` file, meaning that if a ZIP archive
doesn't contain :file:`.pyc` files, importing may be rather slow.
+.. versionchanged:: next
+ Zstandard (*zstd*) compressed zip file entries are supported.
+
.. versionchanged:: 3.13
ZIP64 is supported
diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py
index d359f0b0fbb577..dce3e1d9d38e7a 100644
--- a/Lib/test/test_zipimport.py
+++ b/Lib/test/test_zipimport.py
@@ -14,7 +14,7 @@
from test.support import import_helper
from test.support import os_helper
-from zipfile import ZipFile, ZipInfo, ZIP_STORED, ZIP_DEFLATED
+from zipfile import ZipFile, ZipInfo, ZIP_STORED, ZIP_DEFLATED, ZIP_ZSTANDARD
import zipimport
import linecache
@@ -193,19 +193,38 @@ def testAFakeZlib(self):
# occur in that case (builtin modules are always found first),
# so we'll simply skip it then. Bug #765456.
#
- if "zlib" in sys.builtin_module_names:
- self.skipTest('zlib is a builtin module')
- if "zlib" in sys.modules:
- del sys.modules["zlib"]
- files = {"zlib.py": test_src}
+ if self.compression == ZIP_DEFLATED:
+ mod_name = "zlib"
+ if zipimport._zlib_decompress: # validate attr name
+ # reset the cached import to avoid test order dependencies
+ zipimport._zlib_decompress = None # reset cache
+ elif self.compression == ZIP_ZSTANDARD:
+ mod_name = "_zstd"
+ if zipimport._zstd_decompressor_class: # validate attr name
+ # reset the cached import to avoid test order dependencies
+ zipimport._zstd_decompressor_class = None
+ else:
+ mod_name = "zlib" # the ZIP_STORED case below
+
+ if mod_name in sys.builtin_module_names:
+ self.skipTest(f"{mod_name} is a builtin module")
+ if mod_name in sys.modules:
+ del sys.modules[mod_name]
+ files = {f"{mod_name}.py": test_src}
try:
- self.doTest(".py", files, "zlib")
+ self.doTest(".py", files, mod_name)
except ImportError:
- if self.compression != ZIP_DEFLATED:
- self.fail("expected test to not raise ImportError")
- else:
if self.compression != ZIP_STORED:
- self.fail("expected test to raise ImportError")
+ # Expected - fake compression module can't decompress
+ pass
+ else:
+ self.fail("expected test to not raise ImportError for
uncompressed")
+ else:
+ if self.compression == ZIP_STORED:
+ # Expected - no compression needed, so fake module works
+ pass
+ else:
+ self.fail("expected test to raise ImportError for compressed
zip with fake compression module")
def testPy(self):
files = {TESTMOD + ".py": test_src}
@@ -1000,10 +1019,15 @@ def assertDataEntry(name):
@support.requires_zlib()
-class CompressedZipImportTestCase(UncompressedZipImportTestCase):
+class DeflateCompressedZipImportTestCase(UncompressedZipImportTestCase):
compression = ZIP_DEFLATED
[email protected]_zstd()
+class ZStdCompressedZipImportTestCase(UncompressedZipImportTestCase):
+ compression = ZIP_ZSTANDARD
+
+
class BadFileZipImportTestCase(unittest.TestCase):
def assertZipFailure(self, filename):
self.assertRaises(zipimport.ZipImportError,
diff --git a/Lib/zipimport.py b/Lib/zipimport.py
index 35820844b2561e..188c4bca97798d 100644
--- a/Lib/zipimport.py
+++ b/Lib/zipimport.py
@@ -557,11 +557,16 @@ def _read_directory(archive):
)
_importing_zlib = False
+_zlib_decompress = None
# Return the zlib.decompress function object, or NULL if zlib couldn't
# be imported. The function is cached when found, so subsequent calls
# don't import zlib again.
-def _get_decompress_func():
+def _get_zlib_decompress_func():
+ global _zlib_decompress
+ if _zlib_decompress:
+ return _zlib_decompress
+
global _importing_zlib
if _importing_zlib:
# Someone has a zlib.py[co] in their Zip file
@@ -571,7 +576,7 @@ def _get_decompress_func():
_importing_zlib = True
try:
- from zlib import decompress
+ from zlib import decompress as _zlib_decompress
except Exception:
_bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
raise ZipImportError("can't decompress data; zlib not available")
@@ -579,7 +584,54 @@ def _get_decompress_func():
_importing_zlib = False
_bootstrap._verbose_message('zipimport: zlib available')
- return decompress
+ return _zlib_decompress
+
+
+_importing_zstd = False
+_zstd_decompressor_class = None
+
+# Return the _zstd.ZstdDecompressor function object, or NULL if _zstd couldn't
+# be imported. The result is cached when found.
+def _get_zstd_decompressor_class():
+ global _zstd_decompressor_class
+ if _zstd_decompressor_class:
+ return _zstd_decompressor_class
+
+ global _importing_zstd
+ if _importing_zstd:
+ # Someone has a _zstd.py[co] in their Zip file
+ # let's avoid a stack overflow.
+ _bootstrap._verbose_message("zipimport: zstd UNAVAILABLE")
+ raise ZipImportError("can't decompress data; zstd not available")
+
+ _importing_zstd = True
+ try:
+ from _zstd import ZstdDecompressor as _zstd_decompressor_class
+ except Exception:
+ _bootstrap._verbose_message("zipimport: zstd UNAVAILABLE")
+ raise ZipImportError("can't decompress data; zstd not available")
+ finally:
+ _importing_zstd = False
+
+ _bootstrap._verbose_message("zipimport: zstd available")
+ return _zstd_decompressor_class
+
+
+def _zstd_decompress(data):
+ # A simple version of compression.zstd.decompress() as we cannot import
+ # that here as the stdlib itself could be being zipimported.
+ results = []
+ while True:
+ decomp = _get_zstd_decompressor_class()()
+ results.append(decomp.decompress(data))
+ if not decomp.eof:
+ raise ZipImportError("zipimport: zstd compressed data ended before
"
+ "the end-of-stream marker")
+ data = decomp.unused_data
+ if not data:
+ break
+ return b"".join(results)
+
# Given a path to a Zip file and a toc_entry, return the (uncompressed) data.
def _get_data(archive, toc_entry):
@@ -613,16 +665,23 @@ def _get_data(archive, toc_entry):
if len(raw_data) != data_size:
raise OSError("zipimport: can't read data")
- if compress == 0:
- # data is not compressed
- return raw_data
-
- # Decompress with zlib
- try:
- decompress = _get_decompress_func()
- except Exception:
- raise ZipImportError("can't decompress data; zlib not available")
- return decompress(raw_data, -15)
+ match compress:
+ case 0: # stored
+ return raw_data
+ case 8: # deflate aka zlib
+ try:
+ decompress = _get_zlib_decompress_func()
+ except Exception:
+ raise ZipImportError("can't decompress data; zlib not
available")
+ return decompress(raw_data, -15)
+ case 93: # zstd
+ try:
+ return _zstd_decompress(raw_data)
+ except Exception:
+ raise ZipImportError("could not decompress zstd data")
+ # bz2 and lzma could be added, but are largely obsolete.
+ case _:
+ raise ZipImportError(f"zipimport: unsupported compression
{compress}")
# Lenient date/time comparison function. The precision of the mtime
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-30-00-55-35.gh-issue-61206.HeFLvl.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-30-00-55-35.gh-issue-61206.HeFLvl.rst
new file mode 100644
index 00000000000000..88c93066d24724
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-30-00-55-35.gh-issue-61206.HeFLvl.rst
@@ -0,0 +1 @@
+:mod:`zipimport` now supports zstandard compressed zip file entries.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]