Updated Branches:
  refs/heads/master f45c2c8ae -> 54d134fe0

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/54d134fe/plugins/gzip/gzip.cc
----------------------------------------------------------------------
diff --git a/plugins/gzip/gzip.cc b/plugins/gzip/gzip.cc
new file mode 100644
index 0000000..9260faf
--- /dev/null
+++ b/plugins/gzip/gzip.cc
@@ -0,0 +1,811 @@
+/** @file
+
+  Transforms content using gzip or deflate
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#include <string>
+#include <string.h>
+#include <zlib.h>
+#include <ts/ts.h>
+#include "debug_macros.h"
+#include "misc.h"
+#include "configuration.h"
+
+#include "ink_defs.h"
+
+using namespace std;
+using namespace Gzip;
+
+//FIXME: custom dictionaries would be nice. configurable/content-type?
+//FIXME: look into autoscaling the compression level based on connection speed
+// a gprs device might benefit from a higher compression ratio, whereas a 
desktop w. high bandwith
+// might be served better with little or no compression at all
+//FIXME: look into compressing from the task thread pool
+//FIXME: make normalizing accept encoding configurable
+
+// from mod_deflate:
+// ZLIB's compression algorithm uses a
+// 0-9 based scale that GZIP does where '1' is 'Best speed' 
+// and '9' is 'Best compression'. Testing has proved level '6' 
+// to be about the best level to use in an HTTP Server. 
+
+const int ZLIB_COMPRESSION_LEVEL = 6;
+
+int arg_idx_hooked;
+int arg_idx_host_configuration;
+int arg_idx_url_disallowed;
+
+
+const char * global_hidden_header_name;
+Configuration* config = NULL;
+const char *dictionary = NULL;
+
+static GzipData *
+gzip_data_alloc(int compression_type)
+{
+  GzipData *data;
+  int err;
+
+  data = (GzipData *) TSmalloc(sizeof(GzipData));
+  data->downstream_vio = NULL;
+  data->downstream_buffer = NULL;
+  data->downstream_reader = NULL;
+  data->downstream_length = 0;
+  data->state = transform_state_initialized;
+  data->compression_type = compression_type;
+  data->zstrm.next_in = Z_NULL;
+  data->zstrm.avail_in = 0;
+  data->zstrm.total_in = 0;
+  data->zstrm.next_out = Z_NULL;
+  data->zstrm.avail_out = 0;
+  data->zstrm.total_out = 0;
+  data->zstrm.zalloc = gzip_alloc;
+  data->zstrm.zfree = gzip_free;
+  data->zstrm.opaque = (voidpf) 0;
+  data->zstrm.data_type = Z_ASCII;
+
+  int window_bits = (compression_type == COMPRESSION_TYPE_GZIP) ? 
WINDOW_BITS_GZIP : WINDOW_BITS_DEFLATE;
+
+  err = deflateInit2(&data->zstrm, ZLIB_COMPRESSION_LEVEL, Z_DEFLATED, 
window_bits, ZLIB_MEMLEVEL, Z_DEFAULT_STRATEGY);
+
+  if (err != Z_OK) {
+    fatal("gzip-transform: ERROR: deflateInit (%d)!", err);
+  }
+
+  if (dictionary) {
+    err = deflateSetDictionary(&data->zstrm, (const Bytef *) dictionary, 
strlen(dictionary));
+    if (err != Z_OK) {
+      fatal("gzip-transform: ERROR: deflateSetDictionary (%d)!", err);
+    }
+  }
+
+  return data;
+}
+
+
+static void
+gzip_data_destroy(GzipData * data)
+{
+  TSReleaseAssert(data);
+
+  //deflateEnd returnvalue ignore is intentional
+  //it would spew log on every client abort
+  deflateEnd(&data->zstrm);
+
+  if (data->downstream_buffer) {
+    TSIOBufferDestroy(data->downstream_buffer);
+  }
+
+  TSfree(data);
+}
+
+static TSReturnCode
+gzip_content_encoding_header(TSMBuffer bufp, TSMLoc hdr_loc, const int 
compression_type)
+{
+  TSReturnCode ret;
+  TSMLoc ce_loc;
+
+  // Delete Content-Encoding if present???
+
+  if ((ret = TSMimeHdrFieldCreateNamed(bufp, hdr_loc, "Content-Encoding", 
sizeof("Content-Encoding") - 1, &ce_loc)) == TS_SUCCESS) {
+    if (compression_type == COMPRESSION_TYPE_DEFLATE) {
+      ret = TSMimeHdrFieldValueStringInsert(bufp, hdr_loc, ce_loc, -1, 
"deflate", sizeof("deflate") - 1);
+    } else if (compression_type == COMPRESSION_TYPE_GZIP) {
+      ret = TSMimeHdrFieldValueStringInsert(bufp, hdr_loc, ce_loc, -1, "gzip", 
sizeof("gzip") - 1);
+    }
+    if (ret == TS_SUCCESS) {
+      ret = TSMimeHdrFieldAppend(bufp, hdr_loc, ce_loc);
+    }
+    TSHandleMLocRelease(bufp, hdr_loc, ce_loc);
+  }
+
+  if (ret != TS_SUCCESS) {
+    error("cannot add the Content-Encoding header");
+  }
+
+  return ret;
+}
+
+static TSReturnCode
+gzip_vary_header(TSMBuffer bufp, TSMLoc hdr_loc)
+{
+  TSReturnCode ret;
+  TSMLoc ce_loc;
+
+  ce_loc = TSMimeHdrFieldFind(bufp, hdr_loc, "Vary", sizeof("Vary") - 1);
+  if (ce_loc) {
+    int idx, count, len; 
+    const char *value;
+
+    count = TSMimeHdrFieldValuesCount(bufp, hdr_loc, ce_loc);
+    for(idx=0; idx<count; idx++) {
+      value = TSMimeHdrFieldValueStringGet(bufp, hdr_loc, ce_loc, idx, &len);
+      if (len &&
+          strncasecmp("Accept-Encoding", value, len) == 0) {
+        // Bail, Vary: Accept-Encoding already sent from origin
+        TSHandleMLocRelease(bufp, hdr_loc, ce_loc);
+        return TS_SUCCESS;
+      }
+    }
+
+    ret = TSMimeHdrFieldValueStringInsert(bufp, hdr_loc, ce_loc, -1, 
"Accept-Encoding", sizeof("Accept-Encoding") - 1);
+    TSHandleMLocRelease(bufp, hdr_loc, ce_loc);
+  } else {
+    if ((ret = TSMimeHdrFieldCreateNamed(bufp, hdr_loc, "Vary", sizeof("Vary") 
- 1, &ce_loc)) == TS_SUCCESS) {
+      if ((ret = TSMimeHdrFieldValueStringInsert(bufp, hdr_loc, ce_loc, -1, 
"Accept-Encoding", sizeof("Accept-Encoding") - 1)) == TS_SUCCESS) {
+        ret = TSMimeHdrFieldAppend(bufp, hdr_loc, ce_loc);
+      }
+
+      TSHandleMLocRelease(bufp, hdr_loc, ce_loc);
+    }
+  }
+
+  if (ret != TS_SUCCESS) {
+    error("cannot add/update the Vary header");
+  }
+
+  return ret;
+}
+
+//FIXME: the etag alteration isn't proper. it should modify the value inside 
quotes
+//       specify a very header..
+static TSReturnCode
+gzip_etag_header(TSMBuffer bufp, TSMLoc hdr_loc)
+{
+  TSReturnCode ret = TS_SUCCESS;
+  TSMLoc ce_loc;
+
+  ce_loc = TSMimeHdrFieldFind(bufp, hdr_loc, TS_MIME_FIELD_ETAG, 
TS_MIME_LEN_ETAG);
+
+  if (ce_loc) {
+    int changetag = 1;
+    int strl;
+    const char *strv = TSMimeHdrFieldValueStringGet(bufp, hdr_loc, ce_loc, -1, 
&strl);
+    //do not alter weak etags.
+    //FIXME: consider just making the etag weak for compressed content
+    if (strl >= 2) {
+      if ((strv[0] == 'w' || strv[0] == 'W') && strv[1] == '/') {
+        changetag = 0;
+      }
+      if (changetag) {
+        ret = TSMimeHdrFieldValueAppend(bufp, hdr_loc, ce_loc, 0, "-df", 3);
+      }
+    }
+    TSHandleMLocRelease(bufp, hdr_loc, ce_loc);
+  }
+
+  if (ret != TS_SUCCESS) {
+    error("cannot handle the %s header", TS_MIME_FIELD_ETAG);
+  }
+
+  return ret;
+}
+
+//FIXME: some things are potentially compressible. those responses
+static void
+gzip_transform_init(TSCont contp, GzipData * data)
+{
+  //update the vary, content-encoding, and etag response headers
+  //prepare the downstream for transforming
+
+  TSVConn downstream_conn;
+  TSMBuffer bufp;
+  TSMLoc hdr_loc;
+
+  data->state = transform_state_output;
+
+  if (TSHttpTxnTransformRespGet(data->txn, &bufp, &hdr_loc) != TS_SUCCESS) {
+    error("Error TSHttpTxnTransformRespGet");
+    return;
+  }
+
+  if (gzip_content_encoding_header(bufp, hdr_loc, data->compression_type) == 
TS_SUCCESS &&
+      gzip_vary_header(bufp, hdr_loc) == TS_SUCCESS &&
+      gzip_etag_header(bufp, hdr_loc) == TS_SUCCESS) {
+    downstream_conn = TSTransformOutputVConnGet(contp);
+    data->downstream_buffer = TSIOBufferCreate();
+    data->downstream_reader = TSIOBufferReaderAlloc(data->downstream_buffer);
+    data->downstream_vio = TSVConnWrite(downstream_conn, contp, 
data->downstream_reader, INT64_MAX);
+  }
+
+  TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc);
+}
+
+
+
+static void
+gzip_transform_one(GzipData * data, TSIOBufferReader upstream_reader, int 
amount)
+{
+  TSIOBufferBlock downstream_blkp;
+  const char *upstream_buffer;
+  char *downstream_buffer;
+  int64_t upstream_length, downstream_length;
+  int err;
+
+  while (amount > 0) {
+    downstream_blkp = TSIOBufferReaderStart(upstream_reader);
+    if (!downstream_blkp) {
+      error("couldn't get from IOBufferBlock");
+      return;
+    }
+
+    upstream_buffer = TSIOBufferBlockReadStart(downstream_blkp, 
upstream_reader, &upstream_length);
+    if (!upstream_buffer) {
+      error("couldn't get from TSIOBufferBlockReadStart");
+      return;
+    }
+
+    if (upstream_length > amount) {
+      upstream_length = amount;
+    }
+
+    data->zstrm.next_in = (unsigned char *) upstream_buffer;
+    data->zstrm.avail_in = upstream_length;
+
+    while (data->zstrm.avail_in > 0) {
+      downstream_blkp = TSIOBufferStart(data->downstream_buffer);
+      downstream_buffer = TSIOBufferBlockWriteStart(downstream_blkp, 
&downstream_length);
+
+      data->zstrm.next_out = (unsigned char *) downstream_buffer;
+      data->zstrm.avail_out = downstream_length;
+
+      err = deflate(&data->zstrm, Z_NO_FLUSH);
+
+      if (err != Z_OK)
+        warning("deflate() call failed: %d", err);
+
+      if (downstream_length > data->zstrm.avail_out) {
+        TSIOBufferProduce(data->downstream_buffer, downstream_length - 
data->zstrm.avail_out);
+        data->downstream_length += (downstream_length - data->zstrm.avail_out);
+      }
+
+      if (data->zstrm.avail_out > 0) {
+        if (data->zstrm.avail_in != 0) {
+          error("gzip-transform: ERROR: avail_in is (%d): should be 0", 
data->zstrm.avail_in);
+        }
+      }
+    }
+
+    TSIOBufferReaderConsume(upstream_reader, upstream_length);
+    amount -= upstream_length;
+  }
+}
+
+static void
+gzip_transform_finish(GzipData * data)
+{
+  if (data->state == transform_state_output) {
+    TSIOBufferBlock downstream_blkp;
+    char *downstream_buffer;
+    int64_t downstream_length;
+    int err;
+
+    data->state = transform_state_finished;
+
+    for (;;) {
+      downstream_blkp = TSIOBufferStart(data->downstream_buffer);
+
+      downstream_buffer = TSIOBufferBlockWriteStart(downstream_blkp, 
&downstream_length);
+      data->zstrm.next_out = (unsigned char *) downstream_buffer;
+      data->zstrm.avail_out = downstream_length;
+
+      err = deflate(&data->zstrm, Z_FINISH);
+
+      if (downstream_length > (int64_t) data->zstrm.avail_out) {
+        TSIOBufferProduce(data->downstream_buffer, downstream_length - 
data->zstrm.avail_out);
+        data->downstream_length += (downstream_length - data->zstrm.avail_out);
+      }
+
+      if (err == Z_OK) {        /* some more data to encode */
+        continue;
+      }
+
+      if (err != Z_STREAM_END) {
+        warning("deflate should report Z_STREAM_END");
+      }
+      break;
+    }
+
+    if (data->downstream_length != (int64_t) (data->zstrm.total_out)) {
+      error("gzip-transform: ERROR: output lengths don't match (%d, %ld)", 
data->downstream_length,
+            data->zstrm.total_out);
+    }
+
+    gzip_log_ratio(data->zstrm.total_in, data->downstream_length);
+  }
+}
+
+
+static void
+gzip_transform_do(TSCont contp)
+{
+  TSVIO upstream_vio;
+  GzipData *data;
+  int64_t upstream_todo;
+  int64_t upstream_avail;
+  int64_t downstream_bytes_written;
+
+  data = (GzipData*)TSContDataGet(contp);
+  if (data->state == transform_state_initialized) {
+    gzip_transform_init(contp, data);
+  }
+
+  upstream_vio = TSVConnWriteVIOGet(contp);
+  downstream_bytes_written = data->downstream_length;
+
+  if (!TSVIOBufferGet(upstream_vio)) {
+    gzip_transform_finish(data);
+
+    TSVIONBytesSet(data->downstream_vio, data->downstream_length);
+
+    if (data->downstream_length > downstream_bytes_written) {
+      TSVIOReenable(data->downstream_vio);
+    }
+    return;
+  }
+
+  upstream_todo = TSVIONTodoGet(upstream_vio);
+
+  if (upstream_todo > 0) {
+    upstream_avail = TSIOBufferReaderAvail(TSVIOReaderGet(upstream_vio));
+
+    if (upstream_todo > upstream_avail) {
+      upstream_todo = upstream_avail;
+    }
+
+    if (upstream_todo > 0) {
+      gzip_transform_one(data, TSVIOReaderGet(upstream_vio), upstream_todo);
+      TSVIONDoneSet(upstream_vio, TSVIONDoneGet(upstream_vio) + upstream_todo);
+    }
+  }
+
+  if (TSVIONTodoGet(upstream_vio) > 0) {
+    if (upstream_todo > 0) {
+      if (data->downstream_length > downstream_bytes_written) {
+        TSVIOReenable(data->downstream_vio);
+      }
+      TSContCall(TSVIOContGet(upstream_vio), TS_EVENT_VCONN_WRITE_READY, 
upstream_vio);
+    }
+  } else {
+    gzip_transform_finish(data);
+    TSVIONBytesSet(data->downstream_vio, data->downstream_length);
+
+    if (data->downstream_length > downstream_bytes_written) {
+      TSVIOReenable(data->downstream_vio);
+    }
+
+    TSContCall(TSVIOContGet(upstream_vio), TS_EVENT_VCONN_WRITE_COMPLETE, 
upstream_vio);
+  }
+}
+
+
+static int
+gzip_transform(TSCont contp, TSEvent event, void * /* edata ATS_UNUSED */)
+{
+  if (TSVConnClosedGet(contp)) {
+    gzip_data_destroy((GzipData*)TSContDataGet(contp));
+    TSContDestroy(contp);
+    return 0;
+  } else {
+    switch (event) {
+    case TS_EVENT_ERROR:{
+        debug("gzip_transform: TS_EVENT_ERROR starts");
+        TSVIO upstream_vio = TSVConnWriteVIOGet(contp);
+        TSContCall(TSVIOContGet(upstream_vio), TS_EVENT_ERROR, upstream_vio);
+      }
+      break;
+    case TS_EVENT_VCONN_WRITE_COMPLETE:
+      TSVConnShutdown(TSTransformOutputVConnGet(contp), 0, 1);
+      break;
+    case TS_EVENT_VCONN_WRITE_READY:
+      gzip_transform_do(contp);
+      break;
+    case TS_EVENT_IMMEDIATE:
+      gzip_transform_do(contp);
+      break;
+    default:
+      warning("unknown event [%d]", event);
+      gzip_transform_do(contp);
+      break;
+    }
+  }
+
+  return 0;
+}
+
+
+static int
+gzip_transformable(TSHttpTxn txnp, int server, HostConfiguration * 
host_configuration, int *compress_type)
+{
+  /* Server response header */
+  TSMBuffer bufp;
+  TSMLoc hdr_loc;
+  TSMLoc field_loc;
+
+  /* Client request header */
+  TSMBuffer cbuf;
+  TSMLoc chdr;
+  TSMLoc cfield;
+
+  const char *value;
+  int nvalues;
+  int i, compression_acceptable, len;
+
+  TSHttpStatus resp_status;
+  if (server) {
+    TSHttpTxnServerRespGet(txnp, &bufp, &hdr_loc);
+  } else {
+    TSHttpTxnCachedRespGet(txnp, &bufp, &hdr_loc);
+  }
+  resp_status = TSHttpHdrStatusGet(bufp, hdr_loc);
+  TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc);
+
+  //conservatively pick some statusses to compress
+  if (!(resp_status == 200 || resp_status == 404 || resp_status == 500)) {
+    info("http response status [%d] is not compressible", resp_status);
+    return 0;
+  }
+
+  TSHttpTxnClientReqGet(txnp, &cbuf, &chdr);
+
+  //the only compressible method is currently GET.
+  int method_length;
+  const char *method = TSHttpHdrMethodGet(cbuf, chdr, &method_length);
+  if (!(method_length == TS_HTTP_LEN_GET && memcmp(method, TS_HTTP_METHOD_GET, 
TS_HTTP_LEN_GET) == 0)) {
+    debug("method is not GET, not compressible");
+    TSHandleMLocRelease(cbuf, TS_NULL_MLOC, chdr);
+    return 0;
+  }
+
+  cfield = TSMimeHdrFieldFind(cbuf, chdr, TS_MIME_FIELD_ACCEPT_ENCODING, 
TS_MIME_LEN_ACCEPT_ENCODING);
+  if (cfield != TS_NULL_MLOC) {
+    compression_acceptable = 0;
+    nvalues = TSMimeHdrFieldValuesCount(cbuf, chdr, cfield);
+    for (i=0; i<nvalues; i++) {
+      value = TSMimeHdrFieldValueStringGet(cbuf, chdr, cfield, i, &len);
+      if (!value) {
+        continue;
+      }
+
+      if (strncasecmp(value, "deflate", sizeof("deflate") - 1) == 0) {
+        compression_acceptable = 1;
+        *compress_type = COMPRESSION_TYPE_DEFLATE;
+        break;
+      } else if (strncasecmp(value, "gzip", sizeof("gzip") - 1) == 0) {
+        compression_acceptable = 1;
+        *compress_type = COMPRESSION_TYPE_GZIP;
+        break;
+      }
+    }
+
+    TSHandleMLocRelease(cbuf, chdr, cfield);
+    TSHandleMLocRelease(cbuf, TS_NULL_MLOC, chdr);
+
+    if (!compression_acceptable) {
+      info("no acceptable encoding found in request header, not compressible");
+      return 0;
+    }
+  } else {
+    info("no acceptable encoding found in request header, not compressible");
+    TSHandleMLocRelease(cbuf, chdr, cfield);
+    TSHandleMLocRelease(cbuf, TS_NULL_MLOC, chdr);
+    return 0;
+  }
+
+  if (server) {
+    TSHttpTxnServerRespGet(txnp, &bufp, &hdr_loc);
+  } else {
+    TSHttpTxnCachedRespGet(txnp, &bufp, &hdr_loc);
+  }
+
+  /* If there already exists a content encoding then we don't want
+     to do anything. */
+  field_loc = TSMimeHdrFieldFind(bufp, hdr_loc, 
TS_MIME_FIELD_CONTENT_ENCODING, -1);
+  if (field_loc) {
+    info("response is already content encoded, not compressible");
+    TSHandleMLocRelease(bufp, hdr_loc, field_loc);
+    TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc);
+    return 0;
+  }
+
+  /* We only want to do gzip compression on documents that have a
+     content type of "text/" or "application/x-javascript". */
+  field_loc = TSMimeHdrFieldFind(bufp, hdr_loc, TS_MIME_FIELD_CONTENT_TYPE, 
-1);
+  if (!field_loc) {
+    info("no content type header found, not compressible");
+    TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc);
+    return 0;
+  }
+
+  value = TSMimeHdrFieldValueStringGet(bufp, hdr_loc, field_loc, 0, &len);
+
+  int rv = host_configuration->ContentTypeIsCompressible(value, len);
+  if (!rv) { 
+    info("content-type [%.*s] not compressible", len, value);
+  }
+  TSHandleMLocRelease(bufp, hdr_loc, field_loc);
+  TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc);
+  return rv;
+}
+
+
+static void
+gzip_transform_add(TSHttpTxn txnp, int /* server ATS_UNUSED */, 
HostConfiguration * hc, int compress_type)
+{
+  int *tmp = (int *) TSHttpTxnArgGet(txnp, arg_idx_hooked);
+  if (tmp) {
+    //happens on cache_stale_hit
+    debug("transform hook already set, bail");
+    return;
+  } else {
+    TSHttpTxnArgSet(txnp, arg_idx_hooked, (void *) &GZIP_ONE);
+    info("adding compression transform");
+  }
+
+  TSHttpTxnUntransformedRespCache(txnp, 1);
+
+  if (!hc->cache()) {
+    TSHttpTxnTransformedRespCache(txnp, 0);
+  } else { 
+    TSHttpTxnTransformedRespCache(txnp, 1);
+  }
+
+  TSVConn connp;
+  GzipData *data;
+
+  connp = TSTransformCreate(gzip_transform, txnp);
+  data = gzip_data_alloc(compress_type);
+  data->txn = txnp;
+
+  TSContDataSet(connp, data);
+  TSHttpTxnHookAdd(txnp, TS_HTTP_RESPONSE_TRANSFORM_HOOK, connp);
+}
+
+static int
+cache_transformable(TSHttpTxn txnp)
+{
+  int obj_status;
+
+  if (TSHttpTxnCacheLookupStatusGet(txnp, &obj_status) == TS_ERROR) {
+    warning("Couldn't get cache status of object");
+    return 0;
+  }
+  if (obj_status == TS_CACHE_LOOKUP_HIT_STALE) {
+    info("stale cache hit");
+    return 0;
+  }
+  if (obj_status == TS_CACHE_LOOKUP_HIT_FRESH) {
+    info("fresh cache hit");
+    return 1;
+  }
+
+  return 0;
+}
+
+HostConfiguration * 
+find_host_configuration(TSHttpTxn /* txnp ATS_UNUSED */, TSMBuffer bufp, 
TSMLoc locp)
+{
+  TSMLoc fieldp = TSMimeHdrFieldFind(bufp, locp, TS_MIME_FIELD_HOST, 
TS_MIME_LEN_HOST);
+
+  if (fieldp) {
+    int strl;
+    const char *strv = TSMimeHdrFieldValueStringGet(bufp, locp, fieldp, -1, 
&strl);
+    TSHandleMLocRelease(bufp, locp, fieldp);
+
+    HostConfiguration * host_configuration = config->Find(strv, strl);   
+    return host_configuration;
+  }
+
+  return config->GlobalConfiguration();
+}
+
+
+static int
+transform_plugin(TSCont /* contp ATS_UNUSED */, TSEvent event, void *edata)
+{
+  TSHttpTxn txnp = (TSHttpTxn) edata;
+  int compress_type = COMPRESSION_TYPE_DEFLATE;
+
+  switch (event) {
+    case TS_EVENT_HTTP_READ_REQUEST_HDR:
+      {
+        TSMBuffer req_buf;
+        TSMLoc req_loc;
+        if (TSHttpTxnClientReqGet(txnp, &req_buf, &req_loc) == TS_SUCCESS) {
+          int url_len;
+          char * url = TSHttpTxnEffectiveUrlStringGet(txnp, &url_len);
+          HostConfiguration * hc = find_host_configuration(txnp, req_buf, 
req_loc);
+          //we could clone the hosting configuration here, to make it 
deletable on reload?
+          TSHttpTxnArgSet(txnp, arg_idx_host_configuration, (void *) hc);
+
+          if (!hc->enabled() || !hc->IsUrlAllowed(url, url_len)) {
+            //FIXME: no double negatives
+            TSHttpTxnArgSet(txnp, arg_idx_url_disallowed, (void *) &GZIP_ONE);
+            info("url [%.*s] not allowed", url_len, url);
+          } else {
+            normalize_accept_encoding(txnp, req_buf, req_loc); 
+          }
+          TSfree(url);
+          TSHandleMLocRelease(req_buf, TS_NULL_MLOC, req_loc);
+        }
+        TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE);
+      }
+      break;
+
+    case TS_EVENT_HTTP_READ_RESPONSE_HDR:
+      {
+        //os: the accept encoding header needs to be restored..
+        //otherwise the next request won't get a cache hit on this
+        HostConfiguration * hc = (HostConfiguration*)TSHttpTxnArgGet(txnp, 
arg_idx_host_configuration);
+        if (hc != NULL) { 
+          if (hc->remove_accept_encoding()) {
+            TSMBuffer req_buf;
+            TSMLoc req_loc;
+            if (TSHttpTxnServerReqGet(txnp, &req_buf, &req_loc) == TS_SUCCESS) 
{
+              restore_accept_encoding(txnp, req_buf, req_loc, 
global_hidden_header_name);
+              TSHandleMLocRelease(req_buf, TS_NULL_MLOC, req_loc);
+            }
+          }
+
+          int allowed = !TSHttpTxnArgGet(txnp, arg_idx_url_disallowed);
+          if ( allowed && gzip_transformable(txnp, 1, hc, &compress_type)) {
+            gzip_transform_add(txnp, 1, hc, compress_type);
+          }
+        }
+        TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE);
+      }
+      break;
+
+    case TS_EVENT_HTTP_SEND_REQUEST_HDR:
+      {
+        HostConfiguration * hc = (HostConfiguration*)TSHttpTxnArgGet(txnp, 
arg_idx_host_configuration);
+        if (hc!=NULL) {
+          if (hc->remove_accept_encoding()) {
+            TSMBuffer req_buf;
+            TSMLoc req_loc;
+            if (TSHttpTxnServerReqGet(txnp, &req_buf, &req_loc) == TS_SUCCESS) 
{
+              hide_accept_encoding(txnp, req_buf, req_loc, 
global_hidden_header_name);
+              TSHandleMLocRelease(req_buf, TS_NULL_MLOC, req_loc);
+            }
+          }
+        }
+        TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE);
+      }
+      break;
+
+    case TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE:
+      {
+        int allowed = !TSHttpTxnArgGet(txnp, arg_idx_url_disallowed);
+        HostConfiguration * hc = (HostConfiguration*)TSHttpTxnArgGet(txnp, 
arg_idx_host_configuration);
+        if ( hc != NULL ) { 
+          if (allowed && cache_transformable(txnp) && gzip_transformable(txnp, 
0, hc, &compress_type)) {
+            gzip_transform_add(txnp, 0, hc, compress_type);
+          }
+        }
+        TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE);
+      }
+      break;
+
+    default:
+      fatal("gzip transform unknown event");
+  }
+
+  return 0;
+}
+
+
+static void
+read_configuration(TSCont contp) {
+  const char * path = (const char *)TSContDataGet(contp);
+  Configuration * newconfig = Configuration::Parse(path);
+
+  Configuration * oldconfig =__sync_lock_test_and_set(&config, newconfig);
+  debug("config swapped,old config %p", oldconfig);
+
+  //FIXME: we have leaked.
+  //consider cloning or refcounting the configuration passed to the txn
+  //to make deleting the old configuration possible
+  //if (config != NULL )
+  //  delete config;
+}
+
+static int
+management_update(TSCont contp, TSEvent event, void * /* edata ATS_UNUSED */)
+{
+  TSReleaseAssert(event == TS_EVENT_MGMT_UPDATE);
+  info("management update event received");
+  read_configuration(contp);
+  return 0;
+}
+
+
+void
+TSPluginInit(int argc, const char *argv[])
+{
+  string config_path;
+
+  if (argc > 2)  {
+    fatal("the gzip plugin does not accept more than 1 plugin argument");
+  } else if (argc == 2) { 
+    config_path = std::string(argv[1]);
+  }
+
+  info("TSPluginInit %s", argv[0]);
+
+  if (!register_plugin()) {
+    fatal("The gzip plugin failed to register");
+  }
+  if (!check_ts_version()) {
+    fatal("The gzip plugin requires at least traffic server v3");
+  }
+
+  //if (argc == 2) {
+  //  dictionary = load_dictionary(argv[1]);
+  //}
+
+  if (TSHttpArgIndexReserve("gzip", "for remembering if the hook was set", 
&arg_idx_hooked) != TS_SUCCESS) {
+    fatal("failed to reserve an argument index");
+  }
+  if (TSHttpArgIndexReserve("gzip", "for storing if compression is 
applicable", &arg_idx_host_configuration) != TS_SUCCESS) {
+    fatal("failed to reserve an argument index");
+  }
+  if (TSHttpArgIndexReserve("gzip", "for storing if compression is disallowed 
for this txn", &arg_idx_url_disallowed) != TS_SUCCESS) {
+    fatal("failed to reserve an argument index");
+  }
+
+  global_hidden_header_name = init_hidden_header_name();
+
+  TSCont management_contp = TSContCreate(management_update, NULL);
+  //fixme: never freed. there is no shutdown event?
+  char * p = (char*)TSmalloc(config_path.size()+1);
+  strcpy(p,config_path.c_str());
+  TSContDataSet(management_contp,(void*)p);
+  TSMgmtUpdateRegister(management_contp, TAG);
+  read_configuration(management_contp);
+
+  TSCont transform_contp = TSContCreate(transform_plugin, NULL);
+  TSHttpHookAdd(TS_HTTP_READ_REQUEST_HDR_HOOK, transform_contp);
+  TSHttpHookAdd(TS_HTTP_READ_RESPONSE_HDR_HOOK, transform_contp);
+  TSHttpHookAdd(TS_HTTP_SEND_REQUEST_HDR_HOOK, transform_contp);
+  TSHttpHookAdd(TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, transform_contp);
+
+  info("loaded");
+}

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/54d134fe/plugins/gzip/misc.cc
----------------------------------------------------------------------
diff --git a/plugins/gzip/misc.cc b/plugins/gzip/misc.cc
new file mode 100644
index 0000000..5fb98a5
--- /dev/null
+++ b/plugins/gzip/misc.cc
@@ -0,0 +1,212 @@
+/** @file
+
+  Transforms content using gzip or deflate
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#include "ts/ts.h"
+#include "ink_defs.h"
+
+#include "misc.h"
+#include <string.h>
+#include <inttypes.h>
+#include "debug_macros.h"
+
+voidpf
+gzip_alloc(voidpf /* opaque ATS_UNUSED */, uInt items, uInt size)
+{
+  return (voidpf) TSmalloc(items * size);
+}
+
+void
+gzip_free(voidpf /* opaque ATS_UNUSED */, voidpf address)
+{
+  TSfree(address);
+}
+
+void
+normalize_accept_encoding(TSHttpTxn /* txnp ATS_UNUSED */, TSMBuffer reqp, 
TSMLoc hdr_loc)
+{
+  TSMLoc field = TSMimeHdrFieldFind(reqp, hdr_loc, 
TS_MIME_FIELD_ACCEPT_ENCODING, TS_MIME_LEN_ACCEPT_ENCODING);
+  int deflate = 0;
+  int gzip = 0;
+
+  //remove the accept encoding field(s), 
+  //while finding out if gzip or deflate is supported.    
+  while (field) {
+    TSMLoc tmp;
+
+    if (!deflate && !gzip) {
+      int value_count = TSMimeHdrFieldValuesCount(reqp, hdr_loc, field);
+
+      while (value_count > 0) {
+        int val_len = 0;
+        const char *val;
+
+        --value_count;
+        val = TSMimeHdrFieldValueStringGet(reqp, hdr_loc, field, value_count, 
&val_len);
+
+        if (val_len == (int) strlen("gzip"))
+          gzip = !strncmp(val, "gzip", val_len);
+        else if (val_len == (int) strlen("deflate"))
+          deflate = !strncmp(val, "deflate", val_len);
+      }
+    }
+
+    tmp = TSMimeHdrFieldNextDup(reqp, hdr_loc, field);
+    TSMimeHdrFieldDestroy(reqp, hdr_loc, field);        //catch retval?
+    TSHandleMLocRelease(reqp, hdr_loc, field);
+    field = tmp;
+  }
+
+  //append a new accept-encoding field in the header
+  if (deflate || gzip) {
+    TSMimeHdrFieldCreate(reqp, hdr_loc, &field);
+    TSMimeHdrFieldNameSet(reqp, hdr_loc, field, TS_MIME_FIELD_ACCEPT_ENCODING, 
TS_MIME_LEN_ACCEPT_ENCODING);
+
+    if (gzip) {
+      TSMimeHdrFieldValueStringInsert(reqp, hdr_loc, field, -1, "gzip", 
strlen("gzip"));
+      info("normalized accept encoding to gzip");
+    } else if (deflate) {
+      TSMimeHdrFieldValueStringInsert(reqp, hdr_loc, field, -1, "deflate", 
strlen("deflate"));
+      info("normalized accept encoding to deflate");
+    }
+
+    TSMimeHdrFieldAppend(reqp, hdr_loc, field);
+    TSHandleMLocRelease(reqp, hdr_loc, field);
+  }
+}
+
+void
+hide_accept_encoding(TSHttpTxn /* txnp ATS_UNUSED */, TSMBuffer reqp, TSMLoc 
hdr_loc, const char * hidden_header_name)
+{
+  TSMLoc field = TSMimeHdrFieldFind(reqp, hdr_loc, 
TS_MIME_FIELD_ACCEPT_ENCODING, TS_MIME_LEN_ACCEPT_ENCODING);
+  while (field) {
+    TSMLoc tmp;
+    tmp = TSMimeHdrFieldNextDup(reqp, hdr_loc, field);
+    TSMimeHdrFieldNameSet(reqp, hdr_loc, field, hidden_header_name, -1);
+    TSHandleMLocRelease(reqp, hdr_loc, field);
+    field = tmp;
+  }
+}
+
+void
+restore_accept_encoding(TSHttpTxn /* txnp ATS_UNUSED */, TSMBuffer reqp, 
TSMLoc hdr_loc, const char * hidden_header_name)
+{
+  TSMLoc field = TSMimeHdrFieldFind(reqp, hdr_loc, hidden_header_name, -1);
+
+  while (field) {
+    TSMLoc tmp;
+    tmp = TSMimeHdrFieldNextDup(reqp, hdr_loc, field);
+    TSMimeHdrFieldNameSet(reqp, hdr_loc, field, TS_MIME_FIELD_ACCEPT_ENCODING, 
TS_MIME_LEN_ACCEPT_ENCODING);
+    TSHandleMLocRelease(reqp, hdr_loc, field);
+    field = tmp;
+  }
+}
+
+const char *
+init_hidden_header_name()
+{
+  char * hidden_header_name;
+  const char *var_name = "proxy.config.proxy_name";
+  TSMgmtString result;
+
+  if (TSMgmtStringGet(var_name, &result) != TS_SUCCESS) {
+    fatal("failed to get server name");
+  } else {
+    int hidden_header_name_len = strlen("x-accept-encoding-") + strlen(result);
+    hidden_header_name = (char *) TSmalloc(hidden_header_name_len + 1);
+    hidden_header_name[hidden_header_name_len] = 0;
+    sprintf(hidden_header_name, "x-accept-encoding-%s", result);
+  }
+  return hidden_header_name;
+}
+
+int
+check_ts_version()
+{
+  const char *ts_version = TSTrafficServerVersionGet();
+  TSReleaseAssert(ts_version);
+
+  int scan_result;
+  int major_version;
+
+  scan_result = sscanf(ts_version, "%d", &major_version);
+  TSReleaseAssert(scan_result == 1);
+
+  return major_version >= 3;
+}
+
+int
+register_plugin()
+{
+  TSPluginRegistrationInfo info;
+
+  info.plugin_name = (char*)"gzip";
+  info.vendor_name = (char*)"Apache";
+  info.support_email = (char*)"[email protected]";
+
+  if (TSPluginRegister(TS_SDK_VERSION_3_0, &info) != TS_SUCCESS) {
+    return 0;
+  }
+  return 1;
+}
+
+const char *
+load_dictionary(const char *preload_file)
+{
+  char *dict = (char *) malloc(800000);
+  uLong dictId = adler32(0L, Z_NULL, 0);
+  uLong *adler = &dictId;
+
+  FILE *fp;
+  int i = 0;
+
+  fp = fopen(preload_file, "r");
+  if (!fp) {
+    fatal("gzip-transform: ERROR: Unable to open dict file %s", preload_file);
+  }
+
+  /* dict = (char *) calloc(8000, sizeof(char)); */
+
+  i = 0;
+  while (!feof(fp)) {
+    if (fscanf(fp, "%s\n", dict + i) == 1) {
+      i = strlen(dict);
+      strcat(dict + i, " ");
+      ++i;
+    }
+  }
+  dict[i - 1] = '\0';
+
+  /* TODO get the adler compute right */
+  *adler = adler32(*adler, (const Byte *) dict, sizeof(dict));
+  return dict;
+}
+
+void
+gzip_log_ratio(int64_t in, int64_t out)
+{
+  if (in) {
+    info("Compressed size %" PRId64" (bytes), Original size %" PRId64", ratio: 
%f", out, in, ((float) (in - out) / in));
+  } else {
+    debug("Compressed size %" PRId64" (bytes), Original size %" PRId64", 
ratio: %f", out, in, 0.0F);
+  }
+}

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/54d134fe/plugins/gzip/misc.h
----------------------------------------------------------------------
diff --git a/plugins/gzip/misc.h b/plugins/gzip/misc.h
new file mode 100644
index 0000000..aa2ae16
--- /dev/null
+++ b/plugins/gzip/misc.h
@@ -0,0 +1,81 @@
+/** @file
+
+  Transforms content using gzip or deflate
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#ifndef _GZIP_MISC_H_
+#define _GZIP_MISC_H_
+
+#include <zlib.h>
+#include <ts/ts.h>
+#include <stdlib.h>             //exit()
+#include <stdio.h>
+
+//zlib stuff, see [deflateInit2] at http://www.zlib.net/manual.html
+static const int ZLIB_MEMLEVEL = 9;     //min=1 (optimize for memory),max=9 
(optimized for speed)
+static const int WINDOW_BITS_DEFLATE = -15;
+static const int WINDOW_BITS_GZIP = 31;
+
+//misc
+static const int COMPRESSION_TYPE_DEFLATE = 1;
+static const int COMPRESSION_TYPE_GZIP = 2;
+//this one is just for txnargset/get to point to
+static const int GZIP_ONE = 1;
+static const int DICT_PATH_MAX = 512;
+static const int DICT_ENTRY_MAX = 2048;
+
+//this one is used to rename the accept encoding header
+//it will be restored later on
+//to make it work, the name must be different then downstream proxies though
+//otherwise the downstream will restore the accept encoding header
+
+enum transform_state
+{
+  transform_state_initialized,
+  transform_state_output,
+  transform_state_finished
+};
+
+typedef struct
+{
+  TSHttpTxn txn;
+  TSVIO downstream_vio;
+  TSIOBuffer downstream_buffer;
+  TSIOBufferReader downstream_reader;
+  int downstream_length;
+  z_stream zstrm;
+  enum transform_state state;
+  int compression_type;
+} GzipData;
+
+
+voidpf gzip_alloc(voidpf opaque, uInt items, uInt size);
+void gzip_free(voidpf opaque, voidpf address);
+void normalize_accept_encoding(TSHttpTxn txnp, TSMBuffer reqp, TSMLoc hdr_loc);
+void hide_accept_encoding(TSHttpTxn txnp, TSMBuffer reqp, TSMLoc hdr_loc, 
const char * hidden_header_name);
+void restore_accept_encoding(TSHttpTxn txnp, TSMBuffer reqp, TSMLoc hdr_loc, 
const char * hidden_header_name);
+const char * init_hidden_header_name();
+int check_ts_version();
+int register_plugin();
+const char *load_dictionary(const char *preload_file);
+void gzip_log_ratio(int64_t in, int64_t out);
+
+#endif

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/54d134fe/plugins/gzip/sample.gzip.config
----------------------------------------------------------------------
diff --git a/plugins/gzip/sample.gzip.config b/plugins/gzip/sample.gzip.config
new file mode 100644
index 0000000..2dd34ae
--- /dev/null
+++ b/plugins/gzip/sample.gzip.config
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+######################################################################
+#flags and options are:
+#
+# enable-gzip: default true, set true/false to enable/disable plugin for 
specific host
+#
+# remove-accept-encoding: this sets if the plugin should hide the accept 
encoding from origin servers
+# - to ease the load on the origins
+# - for when the proxy parses responses, and the resulting 
compression/decompression
+#   is wastefull
+#
+# cache: when set, the plugin stores the uncompressed and compressed response 
as alternates
+#
+# compressible-content-type: wildcard pattern for matching compressible 
content types
+#
+# disallow: wildcard pattern for disablign compression on urls
+######################################################################
+
+#first, we configure the default/global plugin behaviour
+enabled true
+remove-accept-encoding true
+cache false
+
+compressible-content-type text/*
+compressible-content-type *javascript*
+disallow /notthis/*.js
+disallow /notthat*
+disallow */bla*
+
+#override the global configuration for a host. 
+#www.foo.nl does NOT inherit anything
+[www.foo.nl]
+enabled true
+remove-accept-encoding true
+compressible-content-type text/*
+#disable a content type
+compressible-content-type !text/javascript
+
+cache false
+disallow /notthis/*.js
+disallow /notthat*

Reply via email to