In gzip I want to try HASH_BITS greater than 15, but deflate.c interferes:
#if HASH_BITS > BITS-1
error: cannot overlay head with tab_prefix1
Investigation shows that the data space for LZW compress acts as the master,
with zip deflate being a slave whose arrays never can be larger.
Attached are two patches which make the two sides more equal.
The first patch prepares the way by renaming BITS to LZW_BITS
and creating new header file deflate.h similar to lzw.h.
The second patch uses MAX() when overlaying arrays, which allows
either to be larger. (Beware 'sizeof' any previous master.)
Now I can vary HASH_BITS and LZW_BITS independently.
--
>From 4a3d2c59a280f11202f6b59465fd76119bb3eb75 Mon Sep 17 00:00:00 2001
From: John Reiser <[email protected]>
Date: Mon, 17 Jun 2013 14:24:29 -0700
Subject: [PATCH 1/2] Prepare to overlay arrays in either order (zip deflate
vs LZW compress) by renaming constant BITS ==> LZW_BITS
and creating file deflate.h.
---
deflate.c | 24 +++---------------------
deflate.h | 41 +++++++++++++++++++++++++++++++++++++++++
gzip.c | 14 +++++++-------
lzw.h | 6 +++---
unlzh.c | 2 +-
unlzw.c | 14 +++++++-------
6 files changed, 62 insertions(+), 39 deletions(-)
create mode 100644 deflate.h
diff --git a/deflate.c b/deflate.c
index f0f2394..31c81db 100644
--- a/deflate.c
+++ b/deflate.c
@@ -85,32 +85,14 @@
* Configuration parameters
*/
-/* Compile with MEDIUM_MEM to reduce the memory requirements or
- * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
- * entire input file can be held in memory (not possible on 16 bit systems).
- * Warning: defining these symbols affects HASH_BITS (see below) and thus
- * affects the compression ratio. The compressed output
- * is still correct, and might even be smaller in some cases.
- */
-
-#ifdef SMALL_MEM
-# define HASH_BITS 13 /* Number of bits used to hash strings */
-#endif
-#ifdef MEDIUM_MEM
-# define HASH_BITS 14
-#endif
-#ifndef HASH_BITS
-# define HASH_BITS 15
- /* For portability to 16 bit machines, do not use values above 15. */
-#endif
-
+#include "deflate.h"
/* To save space (see unlzw.c), we overlay prev+head with tab_prefix and
* window with tab_suffix. Check that we can do this:
*/
-#if (WSIZE<<1) > (1<<BITS)
+#if (WSIZE<<1) > (1<<LZW_BITS)
error: cannot overlay window with tab_suffix and prev with tab_prefix0
#endif
-#if HASH_BITS > BITS-1
+#if HASH_BITS > LZW_BITS-1
error: cannot overlay head with tab_prefix1
#endif
diff --git a/deflate.h b/deflate.h
new file mode 100644
index 0000000..43b02ea
--- /dev/null
+++ b/deflate.h
@@ -0,0 +1,41 @@
+/* deflate.h -- factored from deflate.c; somewhat parallel to lzw.h
+
+ Copyright (C) 1999, 2006, 2009-2013 Free Software Foundation, Inc.
+ Copyright (C) 1992-1993 Jean-loup Gailly
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Compile with MEDIUM_MEM to reduce the memory requirements or
+ * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
+ * entire input file can be held in memory (not possible on 16 bit systems).
+ * Warning: defining these symbols affects HASH_BITS (see below) and thus
+ * affects the compression ratio. The compressed output
+ * is still correct, and might even be smaller in some cases.
+ */
+
+#ifndef HASH_BITS
+
+#ifdef SMALL_MEM
+# define HASH_BITS 13 /* Number of bits used to hash strings */
+#endif
+#ifdef MEDIUM_MEM
+# define HASH_BITS 14
+#endif
+#ifndef HASH_BITS
+# define HASH_BITS 15
+ /* For portability to 16 bit machines, do not use values above 15. */
+#endif
+
+#endif
diff --git a/gzip.c b/gzip.c
index 93cc738..7da26ec 100644
--- a/gzip.c
+++ b/gzip.c
@@ -151,10 +151,10 @@ DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA);
DECLARE(ush, d_buf, DIST_BUFSIZE);
DECLARE(uch, window, 2L*WSIZE);
#ifndef MAXSEG_64K
- DECLARE(ush, tab_prefix, 1L<<BITS);
+ DECLARE(ush, tab_prefix, 1L<<LZW_BITS);
#else
- DECLARE(ush, tab_prefix0, 1L<<(BITS-1));
- DECLARE(ush, tab_prefix1, 1L<<(BITS-1));
+ DECLARE(ush, tab_prefix0, 1L<<(LZW_BITS-1));
+ DECLARE(ush, tab_prefix1, 1L<<(LZW_BITS-1));
#endif
/* local variables */
@@ -178,7 +178,7 @@ static int do_lzw = 0; /* generate output compatible with old compress (-Z
int test = 0; /* test .gz file integrity */
static int foreground = 0; /* set if program run in foreground */
char *program_name; /* program name */
- int maxbits = BITS; /* max bits per code for LZW */
+ int maxbits = LZW_BITS; /* max bits per code for LZW */
int method = DEFLATED;/* compression method */
int level = 6; /* compression level */
int exit_code = OK; /* program exit code */
@@ -551,10 +551,10 @@ int main (int argc, char **argv)
ALLOC(ush, d_buf, DIST_BUFSIZE);
ALLOC(uch, window, 2L*WSIZE);
#ifndef MAXSEG_64K
- ALLOC(ush, tab_prefix, 1L<<BITS);
+ ALLOC(ush, tab_prefix, 1L<<LZW_BITS);
#else
- ALLOC(ush, tab_prefix0, 1L<<(BITS-1));
- ALLOC(ush, tab_prefix1, 1L<<(BITS-1));
+ ALLOC(ush, tab_prefix0, 1L<<(LZW_BITS-1));
+ ALLOC(ush, tab_prefix1, 1L<<(LZW_BITS-1));
#endif
exiting_signal = quiet ? SIGPIPE : 0;
diff --git a/lzw.h b/lzw.h
index 7f78134..59c1a40 100644
--- a/lzw.h
+++ b/lzw.h
@@ -16,10 +16,10 @@
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
-#ifndef BITS
-# define BITS 16
+#ifndef LZW_BITS
+# define LZW_BITS 16
#endif
-#define INIT_BITS 9 /* Initial number of bits per code */
+#define INIT_LZW_BITS 9 /* Initial number of bits per code */
#define LZW_MAGIC "\037\235" /* Magic header for lzw files, 1F 9D */
diff --git a/unlzh.c b/unlzh.c
index 37084fe..bb85e9b 100644
--- a/unlzh.c
+++ b/unlzh.c
@@ -71,7 +71,7 @@ local void make_table (int nchar, uch bitlen[],
/* local ush right[2 * NC - 1]; */
#define left prev
#define right head
-#if NC > (1<<(BITS-2))
+#if NC > (1<<(LZW_BITS-2))
error cannot overlay left+right and prev
#endif
diff --git a/unlzw.c b/unlzw.c
index 676d58c..3390440 100644
--- a/unlzw.c
+++ b/unlzw.c
@@ -70,12 +70,12 @@ union bytes {
#endif
#ifndef MAXSEG_64K
- /* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */
+ /* DECLARE(ush, tab_prefix, (1<<LZW_BITS)); -- prefix code */
# define tab_prefixof(i) tab_prefix[i]
# define clear_tab_prefixof() memzero(tab_prefix, 256);
#else
- /* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */
- /* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd codes */
+ /* DECLARE(ush, tab_prefix0, (1<<(LZW_BITS-1)); -- prefix for even codes */
+ /* DECLARE(ush, tab_prefix1, (1<<(LZW_BITS-1)); -- prefix for odd codes */
ush *tab_prefix[2];
# define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1]
# define clear_tab_prefixof() \
@@ -128,15 +128,15 @@ int unlzw(in, out)
maxbits &= BIT_MASK;
maxmaxcode = MAXCODE(maxbits);
- if (maxbits > BITS) {
+ if (maxbits > LZW_BITS) {
fprintf(stderr,
"\n%s: %s: compressed with %d bits, can only handle %d bits\n",
- program_name, ifname, maxbits, BITS);
+ program_name, ifname, maxbits, LZW_BITS);
exit_code = ERROR;
return ERROR;
}
rsize = insize;
- maxcode = MAXCODE(n_bits = INIT_BITS)-1;
+ maxcode = MAXCODE(n_bits = INIT_LZW_BITS)-1;
bitmask = (1<<n_bits)-1;
oldcode = -1;
finchar = 0;
@@ -203,7 +203,7 @@ int unlzw(in, out)
free_ent = FIRST - 1;
posbits = ((posbits-1) +
((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
- maxcode = MAXCODE(n_bits = INIT_BITS)-1;
+ maxcode = MAXCODE(n_bits = INIT_LZW_BITS)-1;
bitmask = (1<<n_bits)-1;
goto resetbuf;
}
--
1.7.11.7
>From 5ed21000b29bda8ee6adbfdfea008ca9fba6cba8 Mon Sep 17 00:00:00 2001
From: John Reiser <[email protected]>
Date: Tue, 18 Jun 2013 07:59:48 -0700
Subject: [PATCH 2/2] Use MAX() for array overlays. Prefer {prev,head,window}
as base names.
---
deflate.c | 10 ----------
gzip.c | 29 ++++++++++++++++++-----------
gzip.h | 34 ++++++++++++++++++++++------------
trees.c | 3 ---
unlzw.c | 1 +
5 files changed, 41 insertions(+), 36 deletions(-)
diff --git a/deflate.c b/deflate.c
index 31c81db..7fb6ed2 100644
--- a/deflate.c
+++ b/deflate.c
@@ -86,16 +86,6 @@
*/
#include "deflate.h"
-/* To save space (see unlzw.c), we overlay prev+head with tab_prefix and
- * window with tab_suffix. Check that we can do this:
- */
-#if (WSIZE<<1) > (1<<LZW_BITS)
- error: cannot overlay window with tab_suffix and prev with tab_prefix0
-#endif
-#if HASH_BITS > LZW_BITS-1
- error: cannot overlay head with tab_prefix1
-#endif
-
#define HASH_SIZE (unsigned)(1<<HASH_BITS)
#define HASH_MASK (HASH_SIZE-1)
#define WMASK (WSIZE-1)
diff --git a/gzip.c b/gzip.c
index 7da26ec..7309e84 100644
--- a/gzip.c
+++ b/gzip.c
@@ -149,12 +149,19 @@ static char const *const license_msg[] = {
DECLARE(uch, inbuf, INBUFSIZ +INBUF_EXTRA);
DECLARE(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA);
DECLARE(ush, d_buf, DIST_BUFSIZE);
-DECLARE(uch, window, 2L*WSIZE);
+#define LEN_window (2L*WSIZE)
+#define LEN_tab_suffix (1L<<LZW_BITS)
+DECLARE(uch, window, MAX(LEN_window, LEN_tab_suffix));
+#define LEN_prev WSIZE
+#define LEN_head (1L<<HASH_BITS)
#ifndef MAXSEG_64K
- DECLARE(ush, tab_prefix, 1L<<LZW_BITS);
+# define LEN_tab_prefix (1L<< LZW_BITS )
+ DECLARE(ush, prev, MAX((LEN_prev + LEN_head), LEN_tab_prefix));
#else
- DECLARE(ush, tab_prefix0, 1L<<(LZW_BITS-1));
- DECLARE(ush, tab_prefix1, 1L<<(LZW_BITS-1));
+# define LEN_tab_prefix_0 (1L<<(LZW_BITS-1))
+# define LEN_tab_prefix_1 (1L<<(LZW_BITS-1))
+ DECLARE(ush, prev, MAX(LEN_prev, LEN_tab_prefix0));
+ DECLARE(ush, head, MAX(LEN_head, LEN_tab_prefix1));
#endif
/* local variables */
@@ -549,12 +556,12 @@ int main (int argc, char **argv)
ALLOC(uch, inbuf, INBUFSIZ +INBUF_EXTRA);
ALLOC(uch, outbuf, OUTBUFSIZ+OUTBUF_EXTRA);
ALLOC(ush, d_buf, DIST_BUFSIZE);
- ALLOC(uch, window, 2L*WSIZE);
+ ALLOC(uch, window, MAX(LEN_window, LEN_tab_suffix));
#ifndef MAXSEG_64K
- ALLOC(ush, tab_prefix, 1L<<LZW_BITS);
+ ALLOC(ush, prev, MAX((LEN_prev + LEN_head), LEN_tab_prefix));
#else
- ALLOC(ush, tab_prefix0, 1L<<(LZW_BITS-1));
- ALLOC(ush, tab_prefix1, 1L<<(LZW_BITS-1));
+ ALLOC(ush, prev, MAX(LEN_prev, LEN_tab_prefix0));
+ ALLOC(ush, head, MAX(LEN_head, LEN_tab_prefix1));
#endif
exiting_signal = quiet ? SIGPIPE : 0;
@@ -1880,10 +1887,10 @@ local void do_exit(exitcode)
FREE(d_buf);
FREE(window);
#ifndef MAXSEG_64K
- FREE(tab_prefix);
+ FREE(prev);
#else
- FREE(tab_prefix0);
- FREE(tab_prefix1);
+ FREE(prev);
+ FREE(head);
#endif
exit(exitcode);
}
diff --git a/gzip.h b/gzip.h
index 648073e..acbaf55 100644
--- a/gzip.h
+++ b/gzip.h
@@ -124,17 +124,28 @@ extern int method; /* compression method */
EXTERN(uch, inbuf); /* input buffer */
EXTERN(uch, outbuf); /* output buffer */
EXTERN(ush, d_buf); /* buffer for distances, see trees.c */
-EXTERN(uch, window); /* Sliding window and suffix table (unlzw) */
-#define tab_suffix window
+
+#ifndef WSIZE
+# define WSIZE 0x8000 /* window size--must be at least 32K for zip's deflate method */
+#endif /* Code is slightly faster if WSIZE is a power of 2. */
+
+#include "deflate.h"
+#ifndef LZW_BITS
+# define LZW_BITS 16
+#endif
+
+EXTERN(uch, window); /* Sliding window */
+#define tab_suffix window /* Suffix table (unlzw) */
+
#ifndef MAXSEG_64K
-# define tab_prefix prev /* hash link (see deflate.c) */
+ EXTERN(ush, prev); /* hash link (see deflate.c) */
+# define tab_prefix prev /* prefix code (see unlzw.c) */
# define head (prev+WSIZE) /* hash head (see deflate.c) */
- EXTERN(ush, tab_prefix); /* prefix code (see unlzw.c) */
#else
-# define tab_prefix0 prev
-# define head tab_prefix1
- EXTERN(ush, tab_prefix0); /* prefix for even codes */
- EXTERN(ush, tab_prefix1); /* prefix for odd codes */
+ EXTERN(ush, prev); /* hash link (see deflate.c) */
+# define tab_prefix0 prev /* prefix for even codes */
+ EXTERN(ush, head);
+# define tab_prefix1 head /* prefix for odd codes */
#endif
extern unsigned insize; /* valid bytes in inbuf */
@@ -178,10 +189,6 @@ typedef int file_t; /* Do not use stdio */
#define BINARY 0
#define ASCII 1
-#ifndef WSIZE
-# define WSIZE 0x8000 /* window size--must be a power of two, and */
-#endif /* at least 32K for zip's deflate method */
-
#define MIN_MATCH 3
#define MAX_MATCH 258
/* The minimum and maximum match lengths */
@@ -207,6 +214,9 @@ extern int save_orig_name; /* set if original name must be saved */
#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf(0))
#define try_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf(1))
+#define MIN(a,b) ((a) <= (b) ? (a) : (b)) /* XXX bug if arg has side effect */
+#define MAX(a,b) ((a) <= (b) ? (b) : (a)) /* XXX bug if arg has side effect */
+
/* put_byte is used for the compressed output, put_ubyte for the
* uncompressed output. However unlzw() uses window for its
* suffix table instead of its output buffer, so it does not use put_ubyte
diff --git a/trees.c b/trees.c
index f2ad360..d14348a 100644
--- a/trees.c
+++ b/trees.c
@@ -330,9 +330,6 @@ local void set_file_type (void);
* used.
*/
-#define MAX(a,b) (a >= b ? a : b)
-/* the arguments must not have side effects */
-
/* ===========================================================================
* Allocate the match buffer, initialize the various tables and save the
* location of the internal file attribute (ascii/binary) and method
diff --git a/unlzw.c b/unlzw.c
index 3390440..7bdf417 100644
--- a/unlzw.c
+++ b/unlzw.c
@@ -83,6 +83,7 @@ union bytes {
memzero(tab_prefix1, 128);
#endif
#define de_stack ((char_type *)(&d_buf[DIST_BUFSIZE-1]))
+ /* DECLARE(uch, tab_suffix, (1<<LZW_BITS)); -- suffix code */
#define tab_suffixof(i) tab_suffix[i]
int block_mode = BLOCK_MODE; /* block compress mode -C compatible with 2.0 */
--
1.7.11.7