https://github.com/python/cpython/commit/dcd58c50844dae0d83517e88518a677914ea594b
commit: dcd58c50844dae0d83517e88518a677914ea594b
branch: main
author: Bernhard M. Wiedemann <[email protected]>
committer: AA-Turner <[email protected]>
date: 2024-10-12T18:18:48+01:00
summary:

gh-125260: Change the default ``gzip.compress()`` mtime to 0 (#125261)

This follows GNU gzip, which defaults to using 0 as the mtime
for compressing stdin, where no file mtime is involved.

This makes the output of gzip.compress() deterministic by default,
greatly helping reproducible builds.

Co-authored-by: Adam Turner <[email protected]>

files:
A Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst
M Doc/library/gzip.rst
M Lib/gzip.py
M Lib/test/test_gzip.py

diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst
index 6b6e158f6eba2c..f24e73517e5767 100644
--- a/Doc/library/gzip.rst
+++ b/Doc/library/gzip.rst
@@ -184,11 +184,12 @@ The module defines the following items:
       attribute instead.
 
 
-.. function:: compress(data, compresslevel=9, *, mtime=None)
+.. function:: compress(data, compresslevel=9, *, mtime=0)
 
    Compress the *data*, returning a :class:`bytes` object containing
    the compressed data.  *compresslevel* and *mtime* have the same meaning as 
in
-   the :class:`GzipFile` constructor above.
+   the :class:`GzipFile` constructor above,
+   but *mtime* defaults to 0 for reproducible output.
 
    .. versionadded:: 3.2
    .. versionchanged:: 3.8
@@ -203,6 +204,10 @@ The module defines the following items:
    .. versionchanged:: 3.13
       The gzip header OS byte is guaranteed to be set to 255 when this function
       is used as was the case in 3.10 and earlier.
+   .. versionchanged:: 3.14
+      The *mtime* parameter now defaults to 0 for reproducible output.
+      For the previous behaviour of using the current time,
+      pass ``None`` to *mtime*.
 
 .. function:: decompress(data)
 
diff --git a/Lib/gzip.py b/Lib/gzip.py
index ba753ce3050dd8..1a3c82ce7e0711 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -580,12 +580,12 @@ def _rewind(self):
         self._new_member = True
 
 
-def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
+def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0):
     """Compress data in one shot and return the compressed string.
 
     compresslevel sets the compression level in range of 0-9.
-    mtime can be used to set the modification time. The modification time is
-    set to the current time by default.
+    mtime can be used to set the modification time.
+    The modification time is set to 0 by default, for reproducibility.
     """
     # Wbits=31 automatically includes a gzip header and trailer.
     gzip_data = zlib.compress(data, level=compresslevel, wbits=31)
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
index ae384c3849d49e..bf6e1703db8451 100644
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -713,6 +713,17 @@ def test_compress_mtime(self):
                         f.read(1) # to set mtime attribute
                         self.assertEqual(f.mtime, mtime)
 
+    def test_compress_mtime_default(self):
+        # test for gh-125260
+        datac = gzip.compress(data1, mtime=0)
+        datac2 = gzip.compress(data1)
+        self.assertEqual(datac, datac2)
+        datac3 = gzip.compress(data1, mtime=None)
+        self.assertNotEqual(datac, datac3)
+        with gzip.GzipFile(fileobj=io.BytesIO(datac3), mode="rb") as f:
+            f.read(1) # to set mtime attribute
+            self.assertGreater(f.mtime, 1)
+
     def test_compress_correct_level(self):
         for mtime in (0, 42):
             with self.subTest(mtime=mtime):
diff --git 
a/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst 
b/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst
new file mode 100644
index 00000000000000..fab524ea0185c2
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst
@@ -0,0 +1,2 @@
+The :func:`gzip.compress` *mtime* parameter now defaults to 0 for reproducible 
output.
+Patch by Bernhard M. Wiedemann and Adam Turner.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to