On Mon, 22 May 2006, Stefan Völkel wrote:
> Package: privoxy
> Version: 3.0.3-2-1
> Severity: wishlist
> I made a dpatch to add deflate-filter support to privoxy. Patch is taken
> from
>
>
> http://sourceforge.net/tracker/?group_id=11118&atid=311118&func=detail&aid=895531
>
> Two small changes were made by me, I added JB_ERR_GENERIC to project.h
> and commented out old_buf in filters.c
I don't know, whether it was a good idea to comment out the "old_buf"
line, I think renaming it to "old" is what the author intended. In
addition to this block in filters.c was placed on a different position
than in the original patch.
I changed these (and renamed JB_ERR_GENERIC to JB_ERR_COMPRESS which
may make more sense) and now the patched version seems to run without
problems on my system.
Ciao
Roland
#! /bin/sh /usr/share/dpatch/dpatch-run
## 16_gzip.dpatch by Wil Mahan <[EMAIL PROTECTED]>
##
## All lines beginning with `## DP:' are a description of the patch.
## DP: Add deflate-filter support to privoxy, see sf bug id 895531
## DP: Adapted from privoxy 3.1 to 3.0.x by Roland Rosenfeld.
@DPATCH@
diff -urNad privoxy~/actionlist.h privoxy/actionlist.h
--- privoxy~/actionlist.h
+++ privoxy/actionlist.h
@@ -116,6 +116,7 @@
DEFINE_ACTION_STRING ("deanimate-gifs", ACTION_DEANIMATE,
ACTION_STRING_DEANIMATE)
DEFINE_CGI_PARAM_RADIO ("deanimate-gifs", ACTION_DEANIMATE,
ACTION_STRING_DEANIMATE, "first", 0)
DEFINE_CGI_PARAM_RADIO ("deanimate-gifs", ACTION_DEANIMATE,
ACTION_STRING_DEANIMATE, "last", 1)
+DEFINE_ACTION_BOOL ("decompress-from-server", ACTION_DECOMPRESS_IN)
DEFINE_ACTION_BOOL ("downgrade-http-version", ACTION_DOWNGRADE)
DEFINE_ACTION_BOOL ("fast-redirects", ACTION_FAST_REDIRECTS)
DEFINE_ACTION_MULTI ("filter", ACTION_MULTI_FILTER)
diff -urNad privoxy~/configure.in privoxy/configure.in
--- privoxy~/configure.in
+++ privoxy/configure.in
@@ -1234,6 +1234,20 @@
libpcrs is available],
[ if test $enableval = "no"; then have_pcrs=no; fi ])
+AC_ARG_ENABLE(zlib,
+[ --enable-zlib Use the zlib library to allow compressing or
+ decompressing data on the fly.],
+[enableval2=$enableval],
+[enableval2=no])
+if test $enableval2 = yes; then
+ AC_CHECK_LIB(z, zlibVersion, , [
+ AC_MSG_ERROR([Unable to find a copy of zlib. The zlib library
+is necessary to enable compresion support. ])
+ ])
+ AC_DEFINE(FEATURE_ZLIB,1,
+ [ Define to 1 to use compression through the zlib library. ])
+fi
+
# If we have libpcre and either we also have pcreposix or
# we don't need pcreposix, then link pcre dynamically; else
diff -urNad privoxy~/default.action.master privoxy/default.action.master
--- privoxy~/default.action.master
+++ privoxy/default.action.master
@@ -452,7 +452,7 @@
-hide-user-agent \
-kill-popups \
-limit-connect \
-+prevent-compression \
+-prevent-compression \
-send-vanilla-wafer \
-send-wafer \
+session-cookies-only \
diff -urNad privoxy~/filters.c privoxy/filters.c
--- privoxy~/filters.c
+++ privoxy/filters.c
@@ -1325,6 +1325,38 @@
return(NULL);
}
+#ifdef FEATURE_ZLIB
+ /* If the body has a compressed transfer-encoding, uncompress
+ * it first, adjusting size and iob->eod. Note that
+ * decompression occurs after de-chunking.
+ */
+ if (csp->content_type & CT_GZIP || csp->content_type & CT_DEFLATE)
+ {
+ /* Notice that we at least tried to decompress. */
+ if (JB_ERR_OK != decompress_iob(csp))
+ {
+ /* We failed to decompress the data; there's no point
+ * in continuing since we can't filter. This is
+ * slightly tricky because we need to remember not to
+ * modify the Content-Encoding header later; using
+ * CT_TABOO flag is a kludge for this purpose.
+ */
+ csp->content_type |= CT_TABOO;
+ return(NULL);
+ }
+ log_error(LOG_LEVEL_RE_FILTER, "Decompressing successful");
+
+ /* Decompression gives us a completely new iob, so we
+ * need to update.
+ */
+ size = csp->iob->eod - csp->iob->cur;
+ old = csp->iob->cur;
+
+ csp->flags |= CSP_FLAG_MODIFIED;
+ }
+#endif
+
+
/*
* If the body has a "chunked" transfer-encoding,
* get rid of it first, adjusting size and iob->eod
diff -urNad privoxy~/jcc.c privoxy/jcc.c
--- privoxy~/jcc.c
+++ privoxy/jcc.c
@@ -659,6 +659,10 @@
# include <select.h>
# endif
+#ifdef FEATURE_ZLIB
+#include <zlib.h>
+#endif
+
#endif
#include "project.h"
@@ -1609,6 +1613,8 @@
if ((csp->content_type & CT_TEXT) && /* It's a text / * MIME-Type
*/
!http->ssl && /* We talk plaintext */
+ !(csp->content_type & CT_GZIP) &&
+ !(csp->content_type & CT_DEFLATE) &&
block_popups) /* Policy allows */
{
block_popups_now = 1;
diff -urNad privoxy~/parsers.c privoxy/parsers.c
--- privoxy~/parsers.c
+++ privoxy/parsers.c
@@ -433,6 +433,10 @@
#include <assert.h>
#include <string.h>
+#ifdef FEATURE_ZLIB
+#include <zlib.h>
+#endif
+
#if !defined(_WIN32) && !defined(__OS2__)
#include <unistd.h>
#endif
@@ -632,6 +636,281 @@
}
+#ifdef FEATURE_ZLIB
+/*********************************************************************
+ *
+ * Function : decompress_iob
+ *
+ * Description : Decompress buffered page, expanding the
+ * buffer as necessary. csp->iob->cur
+ * should point to the the beginning of the
+ * compressed data block.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns : JB_ERR_OK on success, JB_ERR_MEMORY if out-of-memory
+ * limit reached, JB_ERR_COMPRESS if error decompressing
+ * buffer.
+ *
+ *********************************************************************/
+jb_err decompress_iob(struct client_state *csp)
+{
+ char *buf; /* new, uncompressed buffer */
+ int bufsize = csp->iob->size; /* allocated size of the new buffer */
+ /* Number of bytes at the beginning
+ * of the iob that we should NOT
+ * decompress.
+ */
+ int skip_size = csp->iob->cur - csp->iob->buf;
+ int status; /* return status of the inflate() call */
+ z_stream zstr; /* used by calls to zlib */
+
+ /* This is to protect the parsing of gzipped data, but it should(?)
+ * be valid for deflated data also.
+ */
+ if (bufsize < 10)
+ {
+ log_error (LOG_LEVEL_ERROR, "Buffer too small decompressing iob");
+ return JB_ERR_COMPRESS;
+ }
+
+ if (csp->content_type & CT_GZIP)
+ {
+ /* Our task is slightly complicated by the facts that data
+ * compressed by gzip does not include a zlib header, and
+ * that there is no easily accessible interface in zlib to
+ * handle a gzip header. We strip off the gzip header by
+ * hand, and later inform zlib not to expect a header.
+ */
+
+ /* Strip off the gzip header. Please see RFC 1952 for more
+ * explanation of the appropriate fields.
+ */
+ if ((*csp->iob->cur++ != (char)0x1f)
+ || (*csp->iob->cur++ != (char)0x8b)
+ || (*csp->iob->cur++ != Z_DEFLATED))
+ {
+ log_error (LOG_LEVEL_ERROR,
+ "Invalid gzip header when decompressing");
+ return JB_ERR_COMPRESS;
+ }
+ else {
+ int flags = *csp->iob->cur++;
+ if (flags & 0xe0)
+ {
+ /* The gzip header has reserved bits set; bail out. */
+ log_error (LOG_LEVEL_ERROR,
+ "Invalid gzip header when decompressing");
+ return JB_ERR_COMPRESS;
+ }
+ csp->iob->cur += 6;
+
+ /* Skip extra fields if necessary. */
+ if (flags & 0x04)
+ {
+ /* Skip a given number of bytes, specified as a 16-bit
+ * little-endian value.
+ */
+ csp->iob->cur += *csp->iob->cur++ + (*csp->iob->cur++ << 8);
+ }
+
+ /* Skip the filename if necessary. */
+ if (flags & 0x08)
+ {
+ /* A null-terminated string follows. */
+ while (*csp->iob->cur++);
+ }
+
+ /* Skip the comment if necessary. */
+ if (flags & 0x10)
+ {
+ while (*csp->iob->cur++);
+ }
+
+ /* Skip the CRC if necessary. */
+ if (flags & 0x02)
+ {
+ csp->iob->cur += 2;
+ }
+ }
+ }
+ else if (csp->content_type & CT_DEFLATE)
+ {
+ log_error (LOG_LEVEL_INFO, "Decompressing deflated iob: %d",
*csp->iob->cur);
+ /* In theory (that is, according to RFC 1950), deflate-compressed
+ * data should begin with a two-byte zlib header and have an
+ * adler32 checksum at the end. It seems that in practice the
+ * only the raw compressed data is sent. Note that this means that
+ * we are not RFC 1950-compliant here, but the advantage is that
+ * this actually works. :)
+ *
+ * We add a dummy null byte to tell zlib where the data ends,
+ * and later inform it not to expect a header.
+ *
+ * Fortunately, add_to_iob() has thoughtfully null-terminated
+ * the buffer; we can just increment the end pointer to include
+ * the dummy byte.
+ */
+ csp->iob->eod++;
+ }
+ else
+ {
+ log_error (LOG_LEVEL_ERROR,
+ "Unable to determine compression
format for decompression");
+ return JB_ERR_COMPRESS;
+ }
+
+ /* Set up the fields required by zlib. */
+ zstr.next_in = csp->iob->cur;
+ zstr.avail_in = csp->iob->eod - csp->iob->cur;
+ zstr.zalloc = Z_NULL;
+ zstr.zfree = Z_NULL;
+ zstr.opaque = Z_NULL;
+
+ /* Passing -MAX_WBITS to inflateInit2 tells the library
+ * that there is no zlib header.
+ */
+ if (inflateInit2 (&zstr, -MAX_WBITS) != Z_OK)
+ {
+ log_error (LOG_LEVEL_ERROR,
+ "Error initializing decompression");
+ return JB_ERR_COMPRESS;
+ }
+
+ /* Next, we allocate new storage for the inflated data.
+ * We don't modify the existing iob yet, so in case there
+ * is error in decompression we can recover gracefully.
+ */
+ buf = zalloc (bufsize);
+ if (NULL == buf)
+ {
+ log_error (LOG_LEVEL_ERROR,
+ "Out of memory decompressing iob");
+ return JB_ERR_MEMORY;
+ }
+
+ assert(bufsize >= skip_size);
+ memcpy(buf, csp->iob->buf, skip_size);
+ zstr.avail_out = bufsize - skip_size;
+ zstr.next_out = buf + skip_size;
+
+ /* Try to decompress the whole stream in one shot. */
+ while (Z_BUF_ERROR == (status = inflate(&zstr, Z_FINISH)))
+ {
+ /* We need to allocate more memory for the output buffer. */
+
+ char *tmpbuf; /* used for realloc'ing the buffer */
+ int oldbufsize = bufsize; /* keep track of the old bufsize */
+
+ /* If zlib wants more data then there's a problem, because
+ * the complete compressed file should have been buffered.
+ */
+ if (0 == zstr.avail_in)
+ {
+ log_error(LOG_LEVEL_ERROR,
+ "Unexpected end of compressed iob");
+ return JB_ERR_COMPRESS;
+ }
+
+ /* If we tried the limit and still didn't have enough
+ * memory, just give up.
+ */
+ if (bufsize == csp->config->buffer_limit)
+ {
+ log_error(LOG_LEVEL_ERROR, "Out of memory decompressing iob");
+ return JB_ERR_MEMORY;
+ }
+
+ /* Try doubling the buffer size each time. */
+ bufsize *= 2;
+
+ /* Don't exceed the buffer limit. */
+ if (bufsize > csp->config->buffer_limit)
+ {
+ bufsize = csp->config->buffer_limit;
+ }
+
+ /* Try to allocate the new buffer. */
+ tmpbuf = realloc(buf, bufsize);
+ if (NULL == tmpbuf)
+ {
+ log_error(LOG_LEVEL_ERROR, "Out of memory decompressing iob");
+ freez(buf);
+ return JB_ERR_MEMORY;
+ }
+ else
+ {
+ char *oldnext_out = zstr.next_out;
+
+ /* Update the fields for inflate() to use the new
+ * buffer, which may be in a different location from
+ * the old one.
+ */
+ zstr.avail_out += bufsize - oldbufsize;
+ zstr.next_out = tmpbuf + bufsize - zstr.avail_out;
+
+ /* Compare with an uglier method of calculating these values
+ * that doesn't require the extra oldbufsize variable.
+ */
+ assert(zstr.avail_out ==
+ tmpbuf + bufsize - (char *)zstr.next_out);
+ assert((char *)zstr.next_out ==
+ tmpbuf + ((char *)oldnext_out - buf));
+ assert(zstr.avail_out > 0);
+
+ buf = tmpbuf;
+ }
+ }
+
+ inflateEnd(&zstr);
+ if (status != Z_STREAM_END)
+ {
+ /* We failed to decompress the stream. */
+ log_error(LOG_LEVEL_ERROR,
+ "Error in decompressing to the buffer (iob): %s",
+ zstr.msg);
+ return JB_ERR_COMPRESS;
+ }
+
+ /* Finally, we can actually update the iob, since the
+ * decompression was successful. First, free the old
+ * buffer.
+ */
+ freez(csp->iob->buf);
+
+ /* Now, update the iob to use the new buffer. */
+ csp->iob->buf = buf;
+ csp->iob->cur = csp->iob->buf + skip_size;
+ csp->iob->eod = zstr.next_out;
+ csp->iob->size = bufsize;
+
+ /* Make sure the new uncompressed iob obeys some minimal
+ * consistency conditions.
+ */
+ if ((csp->iob->buf < csp->iob->cur)
+ && (csp->iob->cur <= csp->iob->eod)
+ && (csp->iob->eod <= csp->iob->buf + csp->iob->size))
+ {
+ char t = csp->iob->cur[100];
+ csp->iob->cur[100] = 0;
+ log_error(LOG_LEVEL_INFO,
+ "Sucessfully decompressed: %s", csp->iob->cur);
+ csp->iob->cur[100] = t;
+ return JB_ERR_OK;
+ }
+ else
+ {
+ /* It seems that zlib did something weird. */
+ log_error(LOG_LEVEL_ERROR,
+ "Unexpected error decompressing the buffer (iob): %d==%d,
%d>%d, %d<%d", csp->iob->cur, csp->iob->buf + skip_size, csp->iob->eod,
csp->iob->buf, csp->iob->eod, csp->iob->buf + csp->iob->size);
+ return JB_ERR_COMPRESS;
+ }
+
+}
+#endif /* defined(FEATURE_ZLIB) */
+
+
/*********************************************************************
*
* Function : get_header
@@ -936,13 +1215,59 @@
*********************************************************************/
jb_err server_content_encoding(struct client_state *csp, char **header)
{
+#ifdef FEATURE_ZLIB
+ if (strstr(*header, "gzip"))
+ {
+ /*
+ * If the body was modified, we have tried to
+ * decompress it, so adjust the header if necessary.
+ */
+ if ((csp->flags & CSP_FLAG_MODIFIED) /* we attempted to decompress */
+ && !(csp->content_type & CT_TABOO)) /* decompression was successful */
+ {
+ freez(*header);
+ *header = strdup("Content-Encoding: identity");
+ return (header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK;
+ }
+ else
+ {
+ csp->content_type |= CT_GZIP;
+ }
+ }
+ else if (strstr(*header, "deflate"))
+ {
+ /*
+ * If the body was modified, we have tried to
+ * decompress it, so adjust the header if necessary.
+ */
+ if ((csp->flags & CSP_FLAG_MODIFIED) /* we attempted to decompress */
+ && !(csp->content_type & CT_TABOO)) /* decompression was successful */
+ {
+ freez(*header);
+ *header = strdup("Content-Encoding: identity");
+ return (header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK;
+ }
+ else
+ {
+ csp->content_type |= CT_DEFLATE;
+ }
+ }
+ else if (strstr(*header, "compress"))
+ {
+ /* We can't decompress this; therefore we can't filter
+ * it either.
+ */
+ csp->content_type |= CT_TABOO;
+ }
+#else /* !defined(FEATURE_GZIP) */
/*
* Turn off pcrs and gif filtering if body compressed
*/
if (strstr(*header, "gzip") || strstr(*header, "compress") ||
strstr(*header, "deflate"))
{
- csp->content_type = CT_TABOO;
+ csp->content_type |= CT_TABOO;
}
+#endif /* !defined(FEATURE_GZIP) */
return JB_ERR_OK;
diff -urNad privoxy~/parsers.h privoxy/parsers.h
--- privoxy~/parsers.h
+++ privoxy/parsers.h
@@ -194,6 +194,7 @@
extern int flush_socket(jb_socket fd, struct client_state *csp);
extern jb_err add_to_iob(struct client_state *csp, char *buf, int n);
+extern jb_err decompress_iob(struct client_state *csp);
extern char *get_header(struct client_state *csp);
extern char *get_header_value(const struct list *header_list, const char
*header_name);
extern char *sed(const struct parsers pats[], const add_header_func_ptr
more_headers[], struct client_state *csp);
diff -urNad privoxy~/project.h privoxy/project.h
--- privoxy~/project.h
+++ privoxy/project.h
@@ -563,7 +563,7 @@
#define JB_ERR_PARSE 4 /**< Error parsing file */
#define JB_ERR_MODIFIED 5 /**< File has been modified outside of the
CGI actions editor. */
-
+#define JB_ERR_COMPRESS 6 /**< Error on decompression */
/**
* This macro is used to free a pointer that may be NULL.
@@ -818,6 +818,15 @@
#define CT_TABOO 4 /**< csp->content_type bitmask:
DO NOT filter, irrespective of other flags. */
+/* Although these are not, strictly speaking, content types
+ * (they are content encodings), it is simple to handle
+ * them as such.
+ */
+#define CT_GZIP 8 /**< csp->content_type bitmask:
+ gzip-compressed data. */
+#define CT_DEFLATE 16 /**< csp->content_type bitmask:
+ zlib-compressed data. */
+
/**
* The mask which includes all actions.
*/
@@ -862,6 +871,8 @@
#define ACTION_VANILLA_WAFER 0x00008000UL
/** Action bitmap: Limit CONNECT requests to safe ports. */
#define ACTION_LIMIT_CONNECT 0x00010000UL
+/** Action bitmap: Uncompress incoming text for filtering. */
+#define ACTION_DECOMPRESS_IN 0x00020000UL
/** Action string index: How to deanimate GIFs */
#define ACTION_STRING_DEANIMATE 0