Package: dpkg
Version: 1.19.0.5
Severity: wishlist
Tags: patch

Dear Dpkg Developers,

Please add support for Zstandard compression to dpkg and other
programs generated by the dpkg source package [1].

Tests on packages repackaged with zstd -19 show little increase in
compressed package size compared to xz -6 while decompression speed
decreased dramatically.

For the recompressed firefox .deb (Ubuntu's
firefox_58.0.2+build1-0ubuntu0.17.10.1_amd64.deb) increased ~9% in
size but decompressed in <20% of the original time:

$ du -s firefox-*deb
43960 firefox-xz.deb
47924 firefox-zstd.deb

$ rm -rf firefox-xz/* ;time  dpkg-deb -R firefox-xz.deb firefox-xz/
real 0m4,270s
user 0m4,220s
sys 0m0,630s
$ rm -rf firefox-zstd/* ;time  dpkg-deb -R firefox-zstd.deb firefox-zstd/
real 0m0,765s
user 0m0,556s
sys 0m0,462s

Tests on the full Ubuntu main archive showed ~6% average increase in
the size of the binary packages.

The patches are also available on Salsa [2].

Cheers,
Balint

--
Balint Reczey
Ubuntu & Debian Developer

[1] http://facebook.github.io/zstd/
[2] https://salsa.debian.org/rbalint/dpkg/commits/zstd
From 79aad733cbc7edd44e124702f82b8a46a3a4aea9 Mon Sep 17 00:00:00 2001
From: Balint Reczey <balint.rec...@canonical.com>
Date: Thu, 8 Mar 2018 09:53:36 +0100
Subject: [PATCH 1/4] dpkg: Add Zstandard compression support

---
 README                      |   1 +
 configure.ac                |   2 +
 debian/control              |   3 ++
 debian/rules                |   1 +
 dpkg-deb/Makefile.am        |   1 +
 dpkg-deb/extract.c          |   1 +
 dpkg-deb/main.c             |   2 +-
 lib/dpkg/compress.c         | 127 +++++++++++++++++++++++++++++++++++++++++++-
 lib/dpkg/compress.h         |   1 +
 m4/dpkg-libs.m4             |   7 +++
 man/deb.man                 |   6 ++-
 man/dpkg-deb.man            |   2 +-
 man/dpkg-source.man         |   2 +-
 scripts/Dpkg/Compression.pm |   6 +++
 14 files changed, 156 insertions(+), 6 deletions(-)

diff --git a/README b/README
index 348f8e700..b0cf0a528 100644
--- a/README
+++ b/README
@@ -72,6 +72,7 @@ To enable optional functionality or programs, this software might be needed:
 
   libmd (used by libdpkg, currently falling back to embedded code)
   libz (from zlib, used instead of gzip command-line tool)
+  libzstd (from libzstd, used instead of zstd command-line tool)
   liblzma (from xz utils, used instead of xz command-line tool)
   libbz2 (from bzip2, used instead of bzip2 command-line tool)
   libselinux
diff --git a/configure.ac b/configure.ac
index f6dff9f5e..2fbff6759 100644
--- a/configure.ac
+++ b/configure.ac
@@ -75,6 +75,7 @@ AC_SYS_LARGEFILE
 # Checks for libraries.
 DPKG_LIB_MD
 DPKG_LIB_Z
+DPKG_LIB_ZSTD
 DPKG_LIB_BZ2
 DPKG_LIB_LZMA
 DPKG_LIB_SELINUX
@@ -251,6 +252,7 @@ Configuration:
     libselinux  . . . . . . . . . : $have_libselinux
     libmd . . . . . . . . . . . . : $have_libmd
     libz  . . . . . . . . . . . . : $have_libz
+    libzstd  . . . . . . . . . .  : $have_libzstd
     liblzma . . . . . . . . . . . : $have_liblzma
     libbz2  . . . . . . . . . . . : $have_libbz2
     libcurses . . . . . . . . . . : ${have_libcurses:-no}
diff --git a/debian/control b/debian/control
index c73f79762..833f6c2b7 100644
--- a/debian/control
+++ b/debian/control
@@ -20,6 +20,7 @@ Build-Depends:
  po4a (>= 0.43),
  zlib1g-dev,
  libbz2-dev,
+ libzstd-dev,
  liblzma-dev,
  libselinux1-dev [linux-any],
  libncursesw5-dev,
@@ -67,6 +68,7 @@ Multi-Arch: same
 Depends:
  ${misc:Depends},
  zlib1g-dev,
+ libzstd-dev,
  liblzma-dev,
  libbz2-dev,
 Description: Debian package management static library
@@ -132,6 +134,7 @@ Recommends:
 # Used by Dpkg::Gettext.
  liblocale-gettext-perl,
  bzip2,
+ zstd,
  xz-utils,
 Suggests:
  debian-keyring,
diff --git a/debian/rules b/debian/rules
index 27a2499ef..92f1d1c48 100755
--- a/debian/rules
+++ b/debian/rules
@@ -64,6 +64,7 @@ build-tree/config.status: configure
 		--with-devlibdir=\$${prefix}/lib/$(DEB_HOST_MULTIARCH) \
 		--without-libmd \
 		--with-libz \
+		--with-libzstd \
 		--with-liblzma \
 		--with-libbz2
 
diff --git a/dpkg-deb/Makefile.am b/dpkg-deb/Makefile.am
index 02d79ed7d..bbd30e02c 100644
--- a/dpkg-deb/Makefile.am
+++ b/dpkg-deb/Makefile.am
@@ -21,5 +21,6 @@ dpkg_deb_LDADD = \
 	../lib/dpkg/libdpkg.la \
 	$(LIBINTL) \
 	$(Z_LIBS) \
+	$(ZSTD_LIBS) \
 	$(LZMA_LIBS) \
 	$(BZ2_LIBS)
diff --git a/dpkg-deb/extract.c b/dpkg-deb/extract.c
index dba15dedb..7fd4b2b67 100644
--- a/dpkg-deb/extract.c
+++ b/dpkg-deb/extract.c
@@ -179,6 +179,7 @@ extracthalf(const char *debar, const char *dir,
           decompressor = compressor_find_by_extension(extension);
           if (decompressor != COMPRESSOR_TYPE_NONE &&
               decompressor != COMPRESSOR_TYPE_GZIP &&
+              decompressor != COMPRESSOR_TYPE_ZSTD &&
               decompressor != COMPRESSOR_TYPE_XZ)
             ohshit(_("archive '%s' uses unknown compression for member '%.*s', "
                      "giving up"),
diff --git a/dpkg-deb/main.c b/dpkg-deb/main.c
index 52e9ce67d..7f898210e 100644
--- a/dpkg-deb/main.c
+++ b/dpkg-deb/main.c
@@ -108,7 +108,7 @@ usage(const struct cmdinfo *cip, const char *value)
 "      --[no-]uniform-compression   Use the compression params on all members.\n"
 "  -z#                              Set the compression level when building.\n"
 "  -Z<type>                         Set the compression type used when building.\n"
-"                                     Allowed types: gzip, xz, none.\n"
+"                                     Allowed types: gzip, xz, zstd, none.\n"
 "  -S<strategy>                     Set the compression strategy when building.\n"
 "                                     Allowed values: none; extreme (xz);\n"
 "                                     filtered, huffman, rle, fixed (gzip).\n"
diff --git a/lib/dpkg/compress.c b/lib/dpkg/compress.c
index 44075cdb6..e20add3b7 100644
--- a/lib/dpkg/compress.c
+++ b/lib/dpkg/compress.c
@@ -32,6 +32,9 @@
 #ifdef WITH_LIBZ
 #include <zlib.h>
 #endif
+#ifdef WITH_LIBZSTD
+#include <zstd.h>
+#endif
 #ifdef WITH_LIBLZMA
 #include <lzma.h>
 #endif
@@ -47,7 +50,7 @@
 #include <dpkg/buffer.h>
 #include <dpkg/command.h>
 #include <dpkg/compress.h>
-#if !defined(WITH_LIBZ) || !defined(WITH_LIBLZMA) || !defined(WITH_LIBBZ2)
+#if !defined(WITH_LIBZ) || !defined(WITH_LIBZSTD) || !defined(WITH_LIBLZMA) || !defined(WITH_LIBBZ2)
 #include <dpkg/subproc.h>
 
 static void DPKG_ATTR_SENTINEL
@@ -750,6 +753,127 @@ static const struct compressor compressor_lzma = {
 	.decompress = decompress_lzma,
 };
 
+/*
+ * Zstd compressor.
+ */
+
+#define ZSTD		"zstd"
+
+#ifdef WITH_LIBZSTD
+
+static void
+decompress_zstd(int fd_in, int fd_out, const char *desc)
+{
+  size_t const buf_in_size = ZSTD_DStreamInSize();
+  void*  const buf_in  = malloc(buf_in_size);
+  size_t const buf_out_size = ZSTD_DStreamOutSize();
+  void*  const buf_out = malloc(buf_out_size);
+  size_t init_result, just_read, to_read;
+  ZSTD_DStream* const dstream = ZSTD_createDStream();
+  if (dstream == NULL) {
+    ohshit(_("ZSTD_createDStream() error "));
+  }
+
+  /* TODO: a file may consist of multiple appended frames (ex : pzstd).
+   * The following implementation decompresses only the first frame */
+  init_result = ZSTD_initDStream(dstream);
+  if (ZSTD_isError(init_result)) {
+    ohshit(_("ZSTD_initDStream() error : %s"), ZSTD_getErrorName(init_result));
+  }
+  to_read = init_result;
+  while ((just_read = fd_read(fd_in, buf_in, to_read))) {
+    ZSTD_inBuffer input = { buf_in, just_read, 0 };
+    while (input.pos < input.size) {
+      ZSTD_outBuffer output = { buf_out, buf_out_size, 0 };
+      to_read = ZSTD_decompressStream(dstream, &output , &input);
+      if (ZSTD_isError(to_read)) {
+        ohshit(_("ZSTD_decompressStream() error : %s \n"),
+               ZSTD_getErrorName(to_read));
+      }
+      fd_write(fd_out, output.dst, output.pos);
+    }
+  }
+
+  ZSTD_freeDStream(dstream);
+  free(buf_in);
+  free(buf_out);
+
+}
+
+static void
+compress_zstd(int fd_in, int fd_out, struct compress_params *params, const char *desc)
+{
+  size_t const buf_in_size = ZSTD_CStreamInSize();
+  void*  const buf_in  = malloc(buf_in_size);
+  size_t const buf_out_size = ZSTD_CStreamOutSize();
+  void*  const buf_out = malloc(buf_out_size);
+  size_t init_result, end_res;
+  size_t just_read, to_read;
+  ZSTD_CStream* const cstream = ZSTD_createCStream();
+  if (cstream == NULL) {
+    ohshit(_("ZSTD_createCStream() error "));
+  }
+
+  init_result = ZSTD_initCStream(cstream, params->level);
+  if (ZSTD_isError(init_result)) {
+    ohshit(_("ZSTD_initCStream() error : %s"), ZSTD_getErrorName(init_result));
+  }
+  to_read = buf_in_size;
+  while ((just_read = fd_read(fd_in, buf_in, to_read))) {
+    ZSTD_inBuffer input = { buf_in, just_read, 0 };
+    while (input.pos < input.size) {
+      ZSTD_outBuffer output = { buf_out, buf_out_size, 0 };
+      to_read = ZSTD_compressStream(cstream, &output , &input);
+      if (ZSTD_isError(to_read)) {
+        ohshit(_("ZSTD_decompressStream() error : %s \n"), ZSTD_getErrorName(to_read));
+      }
+      fd_write(fd_out, output.dst, output.pos);
+    }
+  }
+  do {
+    ZSTD_outBuffer output = { buf_out, buf_out_size, 0 };
+    end_res = ZSTD_endStream(cstream, &output);
+    if (ZSTD_isError(end_res)) {
+      ohshit(_("ZSTD_endstreamStream() error : %s \n"), ZSTD_getErrorName(end_res));
+    }
+    fd_write(fd_out, output.dst, output.pos);
+  } while (end_res > 0);
+
+  ZSTD_freeCStream(cstream);
+  free(buf_in);
+  free(buf_out);
+}
+
+#else
+static const char *env_zstd[] = {};
+
+static void
+decompress_zstd(int fd_in, int fd_out, const char *desc)
+{
+  fd_fd_filter(fd_in, fd_out, desc, env_zstd, ZSTD, "-dcq", NULL);
+}
+
+static void
+compress_zstd(int fd_in, int fd_out, struct compress_params *params, const char *desc)
+{
+	char combuf[6];
+
+	snprintf(combuf, sizeof(combuf), "-c%d", params->level);
+	fd_fd_filter(fd_in, fd_out, desc, env_zstd, ZSTD, combuf, "-q", NULL);
+}
+#endif
+
+static const struct compressor compressor_zstd = {
+	.name = "zstd",
+	.extension = ".zst",
+        /* zstd commands's default is 3 but the aim is to be closer to xz's
+         * default compression efficiency */
+	.default_level = 19,
+	.fixup_params = fixup_none_params,
+	.compress = compress_zstd,
+	.decompress = decompress_zstd,
+};
+
 /*
  * Generic compressor filter.
  */
@@ -760,6 +884,7 @@ static const struct compressor *compressor_array[] = {
 	[COMPRESSOR_TYPE_XZ] = &compressor_xz,
 	[COMPRESSOR_TYPE_BZIP2] = &compressor_bzip2,
 	[COMPRESSOR_TYPE_LZMA] = &compressor_lzma,
+	[COMPRESSOR_TYPE_ZSTD] = &compressor_zstd,
 };
 
 static const struct compressor *
diff --git a/lib/dpkg/compress.h b/lib/dpkg/compress.h
index 08aaf2516..1af8a3490 100644
--- a/lib/dpkg/compress.h
+++ b/lib/dpkg/compress.h
@@ -42,6 +42,7 @@ enum compressor_type {
 	COMPRESSOR_TYPE_XZ,
 	COMPRESSOR_TYPE_BZIP2,
 	COMPRESSOR_TYPE_LZMA,
+	COMPRESSOR_TYPE_ZSTD,
 };
 
 enum compressor_strategy {
diff --git a/m4/dpkg-libs.m4 b/m4/dpkg-libs.m4
index 577264706..8cbb3faa3 100644
--- a/m4/dpkg-libs.m4
+++ b/m4/dpkg-libs.m4
@@ -74,6 +74,13 @@ AC_DEFUN([DPKG_LIB_Z], [
   DPKG_WITH_COMPRESS_LIB([z], [zlib.h], [gzdopen])
 ])# DPKG_LIB_Z
 
+# DPKG_LIB_ZSTD
+# -------------
+# Check for zstd library.
+AC_DEFUN([DPKG_LIB_ZSTD], [
+  DPKG_WITH_COMPRESS_LIB([zstd], [zstd.h], [ZSTD_decompressStream])
+])# DPKG_LIB_ZSTD
+
 # DPKG_LIB_LZMA
 # -------------
 # Check for lzma library.
diff --git a/man/deb.man b/man/deb.man
index 2b8da5ff0..8d680dbdd 100644
--- a/man/deb.man
+++ b/man/deb.man
@@ -75,8 +75,9 @@ The second required member is named
 .BR control.tar .
 It is a tar archive containing the package control information, either
 not compressed (supported since dpkg 1.17.6), or compressed with
-gzip (with \fB.gz\fP extension) or
-xz (with \fB.xz\fP extension, supported since 1.17.6),
+gzip (with \fB.gz\fP extension),
+xz (with \fB.xz\fP extension, supported since 1.17.6) or
+zstd (with \fB.zst\fP extension, supported since 1.19.1),
 as a series of plain files, of which the file
 .B control
 is mandatory and contains the core control information, the
@@ -98,6 +99,7 @@ It contains the filesystem as a tar archive, either
 not compressed (supported since dpkg 1.10.24), or compressed with
 gzip (with \fB.gz\fP extension),
 xz (with \fB.xz\fP extension, supported since dpkg 1.15.6),
+zstd (with \fB.zst\fP extension, supported since 1.19.1),
 bzip2 (with \fB.bz2\fP extension, supported since dpkg 1.10.24) or
 lzma (with \fB.lzma\fP extension, supported since dpkg 1.13.25).
 .PP
diff --git a/man/dpkg-deb.man b/man/dpkg-deb.man
index f843f0a83..f94539f6a 100644
--- a/man/dpkg-deb.man
+++ b/man/dpkg-deb.man
@@ -230,7 +230,7 @@ The default for this field is ā€œ${Package}\\t${Version}\\nā€.
 .TP
 .BI \-z compress-level
 Specify which compression level to use on the compressor backend, when
-building a package (default is 9 for gzip, 6 for xz).
+building a package (default is 9 for gzip, 6 for xz and 19 for zstd).
 The accepted values are 0-9 with: 0 being mapped to compressor none for gzip.
 Before dpkg 1.16.2 level 0 was equivalent to compressor none for all
 compressors.
diff --git a/man/dpkg-source.man b/man/dpkg-source.man
index 2233d7a8d..991162003 100644
--- a/man/dpkg-source.man
+++ b/man/dpkg-source.man
@@ -176,7 +176,7 @@ Specify the compression to use for created tarballs and diff files
 (\fB\-\-compression\fP since dpkg 1.15.5).
 Note that this option will not cause existing tarballs to be recompressed,
 it only affects new files. Supported values are:
-.IR gzip ", " bzip2 ", " lzma " and " xz .
+.IR gzip ", " bzip2 ", " lzma ", " zstd " and " xz .
 The default is \fIxz\fP for formats 2.0 and newer, and \fIgzip\fP for
 format 1.0. \fIxz\fP is only supported since dpkg 1.15.5.
 .TP
diff --git a/scripts/Dpkg/Compression.pm b/scripts/Dpkg/Compression.pm
index 3dbc4adf0..4ea512fdc 100644
--- a/scripts/Dpkg/Compression.pm
+++ b/scripts/Dpkg/Compression.pm
@@ -72,6 +72,12 @@ my $COMP = {
 	decomp_prog => [ 'unxz', '--format=lzma' ],
 	default_level => 6,
     },
+    zstd => {
+	file_ext => 'zst',
+	comp_prog => [ 'zstd', '-q' ],
+	decomp_prog => [ 'unzstd', '-q' ],
+	default_level => 19,
+    },
     xz => {
 	file_ext => 'xz',
 	comp_prog => [ 'xz' ],
-- 
2.15.1

From 9dec1a3f6be2e3d525a92f5a123300618407cb19 Mon Sep 17 00:00:00 2001
From: Balint Reczey <balint.rec...@canonical.com>
Date: Thu, 8 Mar 2018 10:14:30 +0100
Subject: [PATCH 2/4] Add test for zstd decompression

---
 debian/control       |  1 +
 t-func/deb-format.at | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/debian/control b/debian/control
index 833f6c2b7..b9e3c9cf4 100644
--- a/debian/control
+++ b/debian/control
@@ -19,6 +19,7 @@ Build-Depends:
 # Needed for --porefs.
  po4a (>= 0.43),
  zlib1g-dev,
+ zstd,
  libbz2-dev,
  libzstd-dev,
  liblzma-dev,
diff --git a/t-func/deb-format.at b/t-func/deb-format.at
index cdfc648a8..d9ca8b2c5 100644
--- a/t-func/deb-format.at
+++ b/t-func/deb-format.at
@@ -28,6 +28,7 @@ xz -c control.tar >control.tar.xz
 xz -c data.tar >data.tar.xz
 bzip2 -c data.tar >data.tar.bz2
 lzma -c data.tar >data.tar.lzma
+zstd -q -c data.tar >data.tar.zst
 touch _ignore
 touch unknown
 ])
@@ -290,6 +291,18 @@ drwxr-xr-x root/root         0 1970-01-01 00:00 ./
 -rw-r--r-- root/root         5 1970-01-01 00:00 ./file-templ
 ])
 
+AT_CHECK([
+# Test data.tar.zst member
+ar rc pkg-data-zst.deb debian-binary control.tar.gz data.tar.zst
+ar t pkg-data-zst.deb
+dpkg-deb -c pkg-data-zst.deb
+], [], [debian-binary
+control.tar.gz
+data.tar.zst
+drwxr-xr-x root/root         0 1970-01-01 00:00 ./
+-rw-r--r-- root/root         5 1970-01-01 00:00 ./file-templ
+])
+
 AT_CHECK([
 # Test data.tar.lzma member
 ar rc pkg-data-lzma.deb debian-binary control.tar.gz data.tar.lzma
-- 
2.15.1

From c927d94df0fdc59c25961505a5438b0dfc58710a Mon Sep 17 00:00:00 2001
From: Balint Reczey <balint.rec...@canonical.com>
Date: Fri, 9 Mar 2018 15:19:43 +0100
Subject: [PATCH 3/4] dpkg: Support Zstandard compressed packages with multiple
 frames

---
 lib/dpkg/compress.c  | 10 ++++++++--
 t-func/deb-format.at |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/lib/dpkg/compress.c b/lib/dpkg/compress.c
index e20add3b7..2d804818f 100644
--- a/lib/dpkg/compress.c
+++ b/lib/dpkg/compress.c
@@ -774,8 +774,6 @@ decompress_zstd(int fd_in, int fd_out, const char *desc)
     ohshit(_("ZSTD_createDStream() error "));
   }
 
-  /* TODO: a file may consist of multiple appended frames (ex : pzstd).
-   * The following implementation decompresses only the first frame */
   init_result = ZSTD_initDStream(dstream);
   if (ZSTD_isError(init_result)) {
     ohshit(_("ZSTD_initDStream() error : %s"), ZSTD_getErrorName(init_result));
@@ -791,6 +789,14 @@ decompress_zstd(int fd_in, int fd_out, const char *desc)
                ZSTD_getErrorName(to_read));
       }
       fd_write(fd_out, output.dst, output.pos);
+      /* possible next frame */
+      if (to_read == 0) {
+        init_result = ZSTD_initDStream(dstream);
+        if (ZSTD_isError(init_result)) {
+          ohshit(_("ZSTD_initDStream() error : %s"), ZSTD_getErrorName(init_result));
+        }
+        to_read = init_result;
+      }
     }
   }
 
diff --git a/t-func/deb-format.at b/t-func/deb-format.at
index d9ca8b2c5..0296c1d04 100644
--- a/t-func/deb-format.at
+++ b/t-func/deb-format.at
@@ -28,7 +28,7 @@ xz -c control.tar >control.tar.xz
 xz -c data.tar >data.tar.xz
 bzip2 -c data.tar >data.tar.bz2
 lzma -c data.tar >data.tar.lzma
-zstd -q -c data.tar >data.tar.zst
+pzstd -q -c data.tar >data.tar.zst
 touch _ignore
 touch unknown
 ])
-- 
2.15.1

From d4b3f22299339f4b54f0013b5f86eff48db1e8c4 Mon Sep 17 00:00:00 2001
From: Balint Reczey <balint.rec...@canonical.com>
Date: Fri, 9 Mar 2018 11:19:24 +0100
Subject: [PATCH 4/4] dpkg: Enable zstd uniform compression

---
 dpkg-deb/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dpkg-deb/main.c b/dpkg-deb/main.c
index 7f898210e..7a40ecb80 100644
--- a/dpkg-deb/main.c
+++ b/dpkg-deb/main.c
@@ -245,6 +245,7 @@ int main(int argc, const char *const *argv) {
   if (opt_uniform_compression &&
       (compress_params.type != COMPRESSOR_TYPE_NONE &&
        compress_params.type != COMPRESSOR_TYPE_GZIP &&
+       compress_params.type != COMPRESSOR_TYPE_ZSTD &&
        compress_params.type != COMPRESSOR_TYPE_XZ))
     badusage(_("unsupported compression type '%s' with uniform compression"),
              compressor_get_name(compress_params.type));
-- 
2.15.1

Reply via email to