Would it be possible to include support for lz4 and zstd into the GNU
tar implementation?
To the best of my knowledge, lz4 seems to be the fastest mature FOSS
compression tool/algorithm available right know (for both compression
and decompression). While compression is only a few percent faster than
LZO/lzop or Snappy (yet still achieving better compression rates when
applied to the Silesia Corpus), decompression is almost twice as fast as
Snappy, and thrice as fast as LZO. The only thing which I know to be
able to achieve even faster decompression rates is lz4 in HC mode - but
LZ4-HC only achieves a compression throughput and compression ratio
comparable to zlib.
https://github.com/Cyan4973/lz4
lz4 should be mostly compatible to gzip, as it supports the parameters
-d, -f, -1 and -9 just the same way as gzip does.
I've written a patch to implement LZ4 support for tar (see attachment),
but as I'm not really accustomed to fiddling with GNU tar code, you
should definitely double check it for any mistakes before merging it.
regards
Pascal Ernster
diff -rup a/config.h.in b/config.h.in
--- a/config.h.in 2014-07-27 22:34:21.000000000 +0200
+++ b/config.h.in 2015-11-29 15:02:11.303095442 +0100
@@ -2227,6 +2227,9 @@
slash. */
#undef LSTAT_FOLLOWS_SLASHED_SYMLINK
+/* Define to the program name of lz4 compressor program */
+#undef LZ4_PROGRAM
+
/* Define to the program name of lzip compressor program */
#undef LZIP_PROGRAM
diff -rup a/configure.ac b/configure.ac
--- a/configure.ac 2014-07-27 22:33:31.000000000 +0200
+++ b/configure.ac 2015-11-29 15:04:34.166596835 +0100
@@ -245,6 +245,7 @@ fi
TAR_COMPR_PROGRAM(compress)
TAR_COMPR_PROGRAM(gzip)
TAR_COMPR_PROGRAM(bzip2)
+TAR_COMPR_PROGRAM(lz4)
TAR_COMPR_PROGRAM(lzip)
TAR_COMPR_PROGRAM(lzma)
TAR_COMPR_PROGRAM(lzop)
diff -rup a/doc/tar.1 b/doc/tar.1
--- a/doc/tar.1 2014-07-27 22:30:40.000000000 +0200
+++ b/doc/tar.1 2015-11-29 15:00:50.939612976 +0100
@@ -716,6 +716,10 @@ Filter the archive through
Filter the archive through
.BR xz (1).
.TP
+\fB\-\-lz4\fR
+Filter the archive through
+.BR lz4 (1).
+.TP
\fB\-\-lzip\fR
Filter the archive through
.BR lzip (1).
diff -rup a/doc/tar.texi b/doc/tar.texi
--- a/doc/tar.texi 2014-07-22 02:37:41.000000000 +0200
+++ b/doc/tar.texi 2015-11-29 15:00:50.942946317 +0100
@@ -2887,6 +2887,12 @@ backup, using @var{snapshot-file} to det
With other operations, informs @command{tar} that the archive is in
incremental format. @xref{Incremental Dumps}.
+@opsummary{lz4}
+@item --lz4
+
+This option tells @command{tar} to read or write archives through
+@command{lz4}. @xref{gzip}.
+
@opsummary{lzip}
@item --lzip
@@ -9014,15 +9020,16 @@ switch to @samp{posix}.
@cindex gzip
@cindex bzip2
+@cindex lz4
@cindex lzip
@cindex lzma
@cindex lzop
@cindex compress
@GNUTAR{} is able to create and read compressed archives. It supports
a wide variety of compression programs, namely: @command{gzip},
-@command{bzip2}, @command{lzip}, @command{lzma}, @command{lzop},
-@command{xz} and traditional @command{compress}. The latter is
-supported mostly for backward compatibility, and we recommend
+@command{bzip2}, @command{lz4}, @command{lzip}, @command{lzma},
+@command{lzop}, @command{xz} and traditional @command{compress}. The
+latter is supported mostly for backward compatibility, and we recommend
against using it, because it is by far less effective than the other
compression programs@footnote{It also had patent problems in the past.}.
@@ -9031,6 +9038,7 @@ Creating a compressed archive is simple:
commands. The compression option is @option{-z} (@option{--gzip}) to
create a @command{gzip} compressed archive, @option{-j}
(@option{--bzip2}) to create a @command{bzip2} compressed archive,
+@option{--lz4} to create an @asis{lz4} compressed archive,
@option{--lzip} to create an @asis{lzip} compressed archive,
@option{-J} (@option{--xz}) to create an @asis{XZ} archive,
@option{--lzma} to create an @asis{LZMA} compressed
@@ -9144,6 +9152,10 @@ Filter the archive through @code{xz}.
@itemx --bzip2
Filter the archive through @code{bzip2}.
+@opindex lz4
+@item --lz4
+Filter the archive through @command{lz4}.
+
@opindex lzip
@item --lzip
Filter the archive through @command{lzip}.
@@ -9230,6 +9242,7 @@ suffix. The following suffixes are reco
@item @samp{.tz2} @tab @command{bzip2}
@item @samp{.tbz2} @tab @command{bzip2}
@item @samp{.tbz} @tab @command{bzip2}
+@item @samp{.lz4} @tab @command{lz4}
@item @samp{.lz} @tab @command{lzip}
@item @samp{.lzma} @tab @command{lzma}
@item @samp{.tlz} @tab @command{lzma}
diff -rup a/po/cs.po b/po/cs.po
--- a/po/cs.po 2014-07-27 22:35:24.000000000 +0200
+++ b/po/cs.po 2015-11-29 15:06:20.553323486 +0100
@@ -3010,6 +3010,9 @@ msgstr "--stat potÅebuje název souboru
#~ msgid "filter the archive through compress"
#~ msgstr "archiv protáhne skrze compress"
+#~ msgid "filter the archive through lz4"
+#~ msgstr "archiv protáhne skrze lz4"
+
#~ msgid "filter the archive through lzma"
#~ msgstr "archiv protáhne skrze lzma"
diff -rup a/po/de.po b/po/de.po
--- a/po/de.po 2014-07-27 22:35:24.000000000 +0200
+++ b/po/de.po 2015-11-29 15:06:44.139997167 +0100
@@ -3020,6 +3020,9 @@ msgstr "--stat erfordert Dateinamen"
#~ msgid "filter the archive through compress"
#~ msgstr "Archiv durch compress filtern"
+#~ msgid "filter the archive through lz4"
+#~ msgstr "Archiv durch lz4 filtern"
+
#~ msgid "filter the archive through lzma"
#~ msgstr "Archiv durch lzma filtern"
diff -rup a/po/fi.po b/po/fi.po
--- a/po/fi.po 2014-07-27 22:35:25.000000000 +0200
+++ b/po/fi.po 2015-11-29 15:07:54.916672877 +0100
@@ -2945,6 +2945,9 @@ msgstr "--stat vaatii tiedostonimiä"
#~ msgid "filter the archive through compress"
#~ msgstr "ohjaa arkisto compress-ohjelman läpi"
+#~ msgid "filter the archive through lz4"
+#~ msgstr "ohjaa arkisto lz4-ohjelman läpi"
+
#, fuzzy
#~ msgid "filter the archive through lzma"
#~ msgstr "ohjaa arkisto gzip-ohjelman läpi"
diff -rup a/po/fr.po b/po/fr.po
--- a/po/fr.po 2014-07-27 22:35:25.000000000 +0200
+++ b/po/fr.po 2015-11-29 15:08:18.710005507 +0100
@@ -3087,6 +3087,9 @@ msgstr "--stat nécessite un nom de fich
#~ msgid "filter the archive through compress"
#~ msgstr "Filtrer l'archive à travers « compress »"
+#~ msgid "filter the archive through lz4"
+#~ msgstr "Filtrer l'archive à travers « lz4 »"
+
#~ msgid "filter the archive through lzma"
#~ msgstr "Filtrer l'archive à travers « lzma »"
diff -rup a/po/ga.po b/po/ga.po
--- a/po/ga.po 2014-07-27 22:35:25.000000000 +0200
+++ b/po/ga.po 2015-11-29 15:08:42.666669703 +0100
@@ -3058,6 +3058,9 @@ msgstr "Tá ainmneacha comhaid de dhÃth
#~ msgid "filter the archive through compress"
#~ msgstr "scag an chartlann le compress"
+#~ msgid "filter the archive through lz4"
+#~ msgstr "scag an chartlann le lz4"
+
#~ msgid "filter the archive through lzma"
#~ msgstr "scag an chartlann le lzma"
diff -rup a/po/id.po b/po/id.po
--- a/po/id.po 2014-07-27 22:35:25.000000000 +0200
+++ b/po/id.po 2015-11-29 15:09:11.463331176 +0100
@@ -3006,6 +3006,9 @@ msgstr "--stat membutuhkan nama berkas"
#~ msgid "filter the archive through compress"
#~ msgstr "saring archive melalui compress"
+#~ msgid "filter the archive through lz4"
+#~ msgstr "saring archive melalui lz4"
+
#~ msgid "filter the archive through lzma"
#~ msgstr "saring archive melalui lzma"
diff -rup a/po/ru.po b/po/ru.po
--- a/po/ru.po 2014-07-27 22:35:25.000000000 +0200
+++ b/po/ru.po 2015-11-29 15:09:52.083319962 +0100
@@ -3023,6 +3023,9 @@ msgstr "Ð´Ð»Ñ Ð¾Ð¿Ñии --stat нÑжÐ
#~ msgid "filter the archive through compress"
#~ msgstr "пÑопÑÑÑиÑÑ Ð°ÑÑ
ив ÑеÑез compress"
+#~ msgid "filter the archive through lz4"
+#~ msgstr "пÑопÑÑÑиÑÑ Ð°ÑÑ
ив ÑеÑез lz4"
+
#~ msgid "filter the archive through lzma"
#~ msgstr "пÑопÑÑÑиÑÑ Ð°ÑÑ
ив ÑеÑез lzma"
diff -rup a/po/sl.po b/po/sl.po
--- a/po/sl.po 2014-07-27 22:35:25.000000000 +0200
+++ b/po/sl.po 2015-11-29 15:10:24.549974604 +0100
@@ -3017,6 +3017,9 @@ msgstr "--stat zahteva imena datotek"
#~ msgid "filter the archive through compress"
#~ msgstr "filtriraj arhiv skozi compress"
+#~ msgid "filter the archive through lz4"
+#~ msgstr "filtriraj arhiv skozi lz4"
+
#, fuzzy
#~ msgid "filter the archive through lzma"
#~ msgstr "filtriraj arhiv skozi gzip"
diff -rup a/po/zh_CN.po b/po/zh_CN.po
--- a/po/zh_CN.po 2014-07-27 22:35:25.000000000 +0200
+++ b/po/zh_CN.po 2015-11-29 15:11:01.066624723 +0100
@@ -2934,6 +2934,9 @@ msgstr "--stat é项éè¦æä»¶å"
#~ msgid "filter the archive through compress"
#~ msgstr "éè¿ compress è¿æ»¤å½æ¡£"
+#~ msgid "filter the archive through lz4"
+#~ msgstr "éè¿ lz4 è¿æ»¤å½æ¡£"
+
#~ msgid "filter the archive through lzma"
#~ msgstr "éè¿ lzma è¿æ»¤å½æ¡£"
diff -rup a/src/buffer.c b/src/buffer.c
--- a/src/buffer.c 2014-03-26 22:07:35.000000000 +0100
+++ b/src/buffer.c 2015-11-29 15:00:50.942946317 +0100
@@ -267,6 +267,7 @@ enum compress_type {
ct_compress,
ct_gzip,
ct_bzip2,
+ ct_lz4,
ct_lzip,
ct_lzma,
ct_lzop,
@@ -295,6 +296,7 @@ static struct zip_magic const magic[] =
{ ct_compress, 2, "\037\235" },
{ ct_gzip, 2, "\037\213" },
{ ct_bzip2, 3, "BZh" },
+ { ct_lz4, 4, "\x04\x22\x4D\x18" },
{ ct_lzip, 4, "LZIP" },
{ ct_lzma, 6, "\xFFLZMA" },
{ ct_lzop, 4, "\211LZO" },
@@ -309,6 +311,7 @@ static struct zip_program zip_program[]
{ ct_gzip, GZIP_PROGRAM, "-z" },
{ ct_bzip2, BZIP2_PROGRAM, "-j" },
{ ct_bzip2, "lbzip2", "-j" },
+ { ct_lz4, LZ4_PROGRAM, "--lz4" },
{ ct_lzip, LZIP_PROGRAM, "--lzip" },
{ ct_lzma, LZMA_PROGRAM, "--lzma" },
{ ct_lzma, XZ_PROGRAM, "-J" },
diff -rup a/src/suffix.c b/src/suffix.c
--- a/src/suffix.c 2014-02-10 18:47:27.000000000 +0100
+++ b/src/suffix.c 2015-11-29 15:00:50.942946317 +0100
@@ -39,6 +39,7 @@ static struct compression_suffix compres
{ S(tbz, BZIP2) },
{ S(tbz2, BZIP2) },
{ S(tz2, BZIP2) },
+ { S(lz4, LZ4) },
{ S(lz, LZIP) },
{ S(lzma, LZMA) },
{ S(tlz, LZMA) },
diff -rup a/src/tar.c b/src/tar.c
--- a/src/tar.c 2014-07-27 22:30:40.000000000 +0200
+++ b/src/tar.c 2015-11-29 15:00:50.942946317 +0100
@@ -301,6 +301,7 @@ enum
KEEP_DIRECTORY_SYMLINK_OPTION,
KEEP_NEWER_FILES_OPTION,
LEVEL_OPTION,
+ LZ4_OPTION,
LZIP_OPTION,
LZMA_OPTION,
LZOP_OPTION,
@@ -696,6 +697,7 @@ static struct argp_option options[] = {
{"ungzip", 0, 0, OPTION_ALIAS, NULL, GRID+1 },
{"compress", 'Z', 0, 0, NULL, GRID+1 },
{"uncompress", 0, 0, OPTION_ALIAS, NULL, GRID+1 },
+ {"lz4", LZ4_OPTION, 0, 0, NULL, GRID+1 },
{"lzip", LZIP_OPTION, 0, 0, NULL, GRID+1 },
{"lzma", LZMA_OPTION, 0, 0, NULL, GRID+1 },
{"lzop", LZOP_OPTION, 0, 0, NULL, GRID+1 },
@@ -1191,6 +1193,10 @@ tar_help_filter (int key, const char *te
s = xasprintf (_("filter the archive through %s"), COMPRESS_PROGRAM);
break;
+ case LZ4_OPTION:
+ s = xasprintf (_("filter the archive through %s"), LZ4_PROGRAM);
+ break;
+
case LZIP_OPTION:
s = xasprintf (_("filter the archive through %s"), LZIP_PROGRAM);
break;
@@ -1201,6 +1207,7 @@ tar_help_filter (int key, const char *te
case LZOP_OPTION:
s = xasprintf (_("filter the archive through %s"), LZOP_PROGRAM);
+ break;
case 'J':
s = xasprintf (_("filter the archive through %s"), XZ_PROGRAM);
@@ -1522,6 +1529,10 @@ parse_opt (int key, char *arg, struct ar
}
break;
+ case LZ4_OPTION:
+ set_use_compress_program_option (LZ4_PROGRAM);
+ break;
+
case LZIP_OPTION:
set_use_compress_program_option (LZIP_PROGRAM);
break;