Author: rhuijben
Date: Mon Oct 19 10:51:21 2015
New Revision: 1709385
URL: http://svn.apache.org/viewvc?rev=1709385&view=rev
Log:
Following up on r1709335, cleanup the implementation a bit and implement
the encoding step. Reorder and rename arguments to more closely follow
other serf functions.
* serf-dev/dev/buckets/hpack_buckets.c
(EOS_CHAR): New define.
(serf__hpack_huffman_decode): Update documentation and arguments. Explicitly
handle corner cases exactly as specified by the RFC.
(serf__hpack_huffman_encode): New function.
* serf-dev/dev/serf_private.h
(serf__hpack_huffman_decode): Tweak arguments.
(serf__hpack_huffman_encode): New function.
* serf-dev/dev/test/test_buckets.c
(test_hpack_huffman_decode): Update caller. Extend with more corner cases.
(test_hpack_huffman_encode): New function.
(test_buckets): Add test_hpack_huffman_encode.
Modified:
serf/trunk/buckets/hpack_buckets.c
serf/trunk/serf_private.h
serf/trunk/test/test_buckets.c
Modified: serf/trunk/buckets/hpack_buckets.c
URL:
http://svn.apache.org/viewvc/serf/trunk/buckets/hpack_buckets.c?rev=1709385&r1=1709384&r2=1709385&view=diff
==============================================================================
--- serf/trunk/buckets/hpack_buckets.c (original)
+++ serf/trunk/buckets/hpack_buckets.c Mon Oct 19 10:51:21 2015
@@ -27,6 +27,7 @@
#include "serf_private.h"
#include "hpack_huffman.inc"
+#define EOS_CHAR (256)
/* Callback for bsearch() */
static int
@@ -47,34 +48,37 @@ hpack_hm_compare(const void *k,
return 0;
}
-/* Convert raw data in RAW of size RAW_LEN. If RESULT is not NULL,
- put the result in the buffer pointed to by RESULT which is of size
- BUF_LEN. Sets *RESULT_LEN to the resulting length.
-
- If RESULT is not large enough return APR_EINVAL.
- */
+/* Convert encoded data in ENCODED of size ENCODED_LEN. If TEXT is not
+ NULL, put the result in the buffer pointed to by TEXT which is of size
+ TEXT_AVAIL. Sets *TEXT_LEN to the resulting length.
+
+ If TEXT_AVAIL allows it a final '\0' is added at TEXT[*TEXT_LEN].
+
+ If TEXT is not large enough return APR_ENOMEM. If ENCODED isn't properly
+ encoded return APR_EINVAL.
+ */
apr_status_t
-serf__hpack_huffman_decode(const unsigned char *raw,
- apr_size_t raw_len,
- char *result,
- apr_size_t buf_len,
- apr_size_t *result_len)
+serf__hpack_huffman_decode(const unsigned char *encoded,
+ apr_size_t encoded_len,
+ apr_size_t text_avail,
+ char *text,
+ apr_size_t *text_len)
{
apr_uint64_t stash = 0;
apr_int16_t bits_left = 0;
- *result_len = 0;
+ apr_size_t result_len = 0;
- while (raw_len || bits_left)
+ while (encoded_len || bits_left)
{
apr_uint32_t match;
struct serf_hpack_huffman_item_t *r;
- while (bits_left < 30 && raw_len)
+ while (bits_left < 30 && encoded_len)
{
- stash |= (apr_uint64_t)*raw << (64 - 8 - bits_left);
+ stash |= (apr_uint64_t)*encoded << (64 - 8 - bits_left);
bits_left += 8;
- raw_len--;
- raw++;
+ encoded_len--;
+ encoded++;
}
match = stash >> 32;
@@ -82,33 +86,156 @@ serf__hpack_huffman_decode(const unsigne
sizeof(serf_hpack_hm_map) / sizeof(serf_hpack_hm_map[0]),
sizeof(serf_hpack_hm_map[0]), hpack_hm_compare);
- if (!r)
+ if (!r || (r->bits > bits_left))
{
- if (!raw_len)
- break;
- else
- return SERF_ERROR_HTTP2_PROTOCOL_ERROR;
+ /* With a canonical huffman code we can only reach this state
+ at the end of the string */
+ break;
}
- else if (r->bits > bits_left)
- break;
- if (result)
+ if (r->cval == EOS_CHAR)
+ return APR_EINVAL;
+
+ if (text)
{
- if (*result_len < buf_len)
- result[*result_len] = (char)r->cval;
+ if (result_len < text_avail)
+ text[result_len] = (char)r->cval;
else
- return APR_EINVAL;
+ return APR_ENOMEM;
}
- (*result_len)++;
+ result_len++;
stash <<= r->bits;
- if (bits_left )
bits_left -= r->bits;
}
- if (result && *result_len < buf_len)
- result[*result_len] = 0;
+ if (bits_left)
+ {
+ /* https://tools.ietf.org/html/rfc7541#section-5.2
+
+ Upon decoding, an incomplete code at the end of the encoded data is
+ to be considered as padding and discarded. A padding strictly longer
+ than 7 bits MUST be treated as a decoding error. A padding not
+ corresponding to the most significant bits of the code for the EOS
+ symbol MUST be treated as a decoding error. A Huffman-encoded string
+ literal containing the EOS symbol MUST be treated as a decoding
+ error. */
+ const struct serf_hpack_huffman_item_t *eos;
+ apr_uint64_t exp_stash;
+
+ eos = &serf_hpack_hm_map[serf_hpack_hm_rmap[EOS_CHAR]];
+
+ /* Trim EOS value to the bits we need */
+ exp_stash = ((apr_uint32_t)eos->hval >> (32 - bits_left));
+ /* And move it to the right position */
+ exp_stash <<= 64 - bits_left;
+
+ if (exp_stash != stash)
+ return APR_EINVAL;
+ }
+
+ *text_len = result_len;
+ if (text && result_len < text_avail)
+ text[result_len] = 0;
return APR_SUCCESS;
}
+/* Encodes data in TEXT of size TEXT_LEN.
+
+ Sets ENCODED_LEN to the required length.
+
+ If ENCODED is not NULL, it specifies an output buffer of size
+ ENCODED_AVAIL into which the data will be encoded.
+
+ If ENCODE is not NULL and the data doesn't fit returns APR_ENOMEM.
+ */
+apr_status_t
+serf__hpack_huffman_encode(const char *text,
+ apr_size_t text_len,
+ apr_size_t encoded_avail,
+ unsigned char *encoded,
+ apr_size_t *encoded_len)
+{
+ apr_uint64_t stash = 0;
+ apr_int16_t bits_left = 0;
+ apr_size_t result_len = 0;
+
+ if (! encoded)
+ {
+ /* Just calculating size needed. Avoid bit handling */
+ apr_int64_t result_bits = 0;
+ while (text_len)
+ {
+ const struct serf_hpack_huffman_item_t *r;
+
+ r = &serf_hpack_hm_map[serf_hpack_hm_rmap[*(unsigned char*)text]];
+
+ result_bits += r->bits;
+ text_len--;
+ text++;
+ }
+
+ *encoded_len = (apr_size_t)((result_bits+7) / 8);
+ return APR_SUCCESS;
+ }
+
+ while (text_len)
+ {
+ if (text_len)
+ {
+ const struct serf_hpack_huffman_item_t *r;
+
+ r = &serf_hpack_hm_map[serf_hpack_hm_rmap[*(unsigned char*)text]];
+
+ stash |= (apr_uint64_t)r->hval << (64 - 32 - bits_left);
+ bits_left += r->bits;
+ text_len--;
+ text++;
+ }
+
+ while (bits_left > 8)
+ {
+ if (! encoded_avail)
+ return APR_ENOMEM;
+
+ *encoded = (unsigned char)(stash >> (64 - 8));
+ encoded++;
+ stash <<= 8;
+ bits_left -= 8;
+
+ encoded_avail--;
+ result_len++;
+ }
+ }
+
+ if (bits_left)
+ {
+ /* https://tools.ietf.org/html/rfc7541#section-5.2
+
+ As the Huffman-encoded data doesn't always end at an octet boundary,
+ some padding is inserted after it, up to the next octet boundary. To
+ prevent this padding from being misinterpreted as part of the string
+ literal, the most significant bits of the code corresponding to the
+ EOS (end-of-string) symbol are used. */
+ const struct serf_hpack_huffman_item_t *r;
+
+ if (! encoded_avail)
+ return APR_ENOMEM;
+
+ r = &serf_hpack_hm_map[serf_hpack_hm_rmap[EOS_CHAR]];
+ stash |= (apr_uint64_t)r->hval << (64 - 32 - bits_left);
+ /* bits_left += r->bits; */
+
+ *encoded = (unsigned char)(stash >> (64 - 8));
+ /* encoded++ */
+ /* stash <<= 8; */
+ /* bits_left -= 8; */
+ /* encoded_avail--; */
+ result_len++;
+ }
+
+ *encoded_len = result_len;
+
+ return APR_SUCCESS;
+}
Modified: serf/trunk/serf_private.h
URL:
http://svn.apache.org/viewvc/serf/trunk/serf_private.h?rev=1709385&r1=1709384&r2=1709385&view=diff
==============================================================================
--- serf/trunk/serf_private.h (original)
+++ serf/trunk/serf_private.h Mon Oct 19 10:51:21 2015
@@ -524,11 +524,17 @@ serf_bucket_t *serf__bucket_log_wrapper_
void serf__http2_protocol_init(serf_connection_t *conn);
/* From http2_hpack_buckets.c */
-apr_status_t serf__hpack_huffman_decode(const unsigned char *raw,
- apr_size_t raw_len,
- char *result,
- apr_size_t buf_len,
- apr_size_t *result_len);
+apr_status_t serf__hpack_huffman_decode(const unsigned char *encoded,
+ apr_size_t encoded_len,
+ apr_size_t text_avail,
+ char *text,
+ apr_size_t *text_len);
+
+apr_status_t serf__hpack_huffman_encode(const char *text,
+ apr_size_t text_len,
+ apr_size_t encoded_avail,
+ unsigned char *encoded,
+ apr_size_t *encoded_len);
/** Logging functions. **/
Modified: serf/trunk/test/test_buckets.c
URL:
http://svn.apache.org/viewvc/serf/trunk/test/test_buckets.c?rev=1709385&r1=1709384&r2=1709385&view=diff
==============================================================================
--- serf/trunk/test/test_buckets.c (original)
+++ serf/trunk/test/test_buckets.c Mon Oct 19 10:51:21 2015
@@ -1939,68 +1939,191 @@ static void test_hpack_huffman_decode(Cu
"\x72\xC1\xAB\x27\x0F\xB5\x29\x1F\x95\x87\x31"
"\x60\x65\xC0\x03\xED\x4E\xE5\xB1\x06\x3D\x50"
"\x07";
+ const unsigned char preD[] = "\0\0\0\0\0";
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre1, sizeof(pre1) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "www.example.com", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre2, sizeof(pre2) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "no-cache", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre3, sizeof(pre3) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "custom-key", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre4, sizeof(pre4) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "custom-value", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre5, sizeof(pre5) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "302", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre6, sizeof(pre6) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "private", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre7, sizeof(pre7) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "Mon, 21 Oct 2013 20:13:21 GMT", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre8, sizeof(pre8) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "https://www.example.com", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre9, sizeof(pre9) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "307", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preA, sizeof(preA) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "Mon, 21 Oct 2013 20:13:22 GMT", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preB, sizeof(preB) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "gzip", result);
+ CuAssertIntEquals(tc, strlen(result), len);
CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preC, sizeof(preC) - 1,
- result, sizeof(result),
+ sizeof(result), result,
&len));
CuAssertStrEquals(tc, "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; "
"version=1", result);
+ CuAssertIntEquals(tc, strlen(result), len);
+
+ CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preD, sizeof(preD) - 1,
+ sizeof(result), result,
+ &len));
+ CuAssertStrEquals(tc, "00000000", result);
+ CuAssertIntEquals(tc, strlen(result), len);
+
+ /* And now check some corner cases as specified in the RFC:
+
+ The remaining bits must be filled out with the prefix of EOS */
+ CuAssertIntEquals(tc, 0,
+ serf__hpack_huffman_decode((const unsigned char*)"\x07", 1,
+ sizeof(result), result,
+ &len));
+ CuAssertStrEquals(tc, "0", result);
+ CuAssertIntEquals(tc, 1, len);
+
+ CuAssertIntEquals(tc, APR_EINVAL,
+ serf__hpack_huffman_decode((const unsigned char*)"\x06", 1,
+ sizeof(result),
+ result, &len));
+ CuAssertIntEquals(tc, APR_EINVAL,
+ serf__hpack_huffman_decode((const unsigned char*)"\x01", 1,
+ sizeof(result),
+ result, &len));
+
+ /* EOS may not appear itself */
+ CuAssertIntEquals(tc, APR_EINVAL,
+ serf__hpack_huffman_decode((const unsigned char*)
+ "\xFF\xFF\xFF\xFF", 4,
+ sizeof(result), result, &len));
+}
+
+#define VERIFY_REVERSE(x) \
+ do \
+ { \
+ const char *v = (x); \
+ apr_size_t sz2; \
+ CuAssertIntEquals(tc, 0, \
+ serf__hpack_huffman_encode(v, strlen(v), \
+ sizeof(encoded), \
+ encoded, &encoded_len)); \
+ CuAssertIntEquals(tc, 0, \
+ serf__hpack_huffman_encode(v, strlen(v), \
+ 0, NULL, &sz2)); \
+ CuAssertIntEquals(tc, encoded_len, sz2); \
+ CuAssertIntEquals(tc, 0, \
+ serf__hpack_huffman_decode(encoded, encoded_len, \
+ sizeof(text), text, \
+ &text_len)); \
+ CuAssertStrEquals(tc, v, text); \
+ CuAssertIntEquals(tc, strlen(v), text_len); \
+ } \
+ while(0)
+
+static void test_hpack_huffman_encode(CuTest *tc)
+{
+ unsigned char encoded[1024];
+ char text[1024];
+ apr_size_t encoded_len;
+ apr_size_t text_len;
+
+ VERIFY_REVERSE("1234567890");
+ VERIFY_REVERSE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+
+ {
+ char from[256];
+ int i;
+
+ for (i = 0; i < sizeof(from); i++)
+ from[i] = (char)i;
+
+ CuAssertIntEquals(tc, 0,
+ serf__hpack_huffman_encode(from, sizeof(from),
+ sizeof(encoded),
+ encoded, &encoded_len));
+ /* Nice.. need 583 bytes to encode these 256 bytes :-) */
+ CuAssertIntEquals(tc, 583, encoded_len);
+ text[256] = 0xFE;
+ CuAssertIntEquals(tc, 0,
+ serf__hpack_huffman_decode(encoded, encoded_len,
+ sizeof(text), text,
+ &text_len));
+ CuAssertIntEquals(tc, 256, text_len);
+ CuAssertIntEquals(tc, 0, memcmp(from, text, sizeof(from)));
+ /* If there is space in the buffer serf__hpack_huffman_decode will add
+ a final '\0' after the buffer */
+ CuAssertIntEquals(tc, 0, text[256]);
+
+ for (i = 0; i < sizeof(from); i++)
+ from[i] = '0';
+
+ CuAssertIntEquals(tc, 0,
+ serf__hpack_huffman_encode(from, sizeof(from),
+ sizeof(encoded),
+ encoded, &encoded_len));
+ /* Ok, 160 to encode 256. Maybe there is some use case */
+ CuAssertIntEquals(tc, 160, encoded_len);
+ text[256] = 0xEF;
+ CuAssertIntEquals(tc, 0,
+ serf__hpack_huffman_decode(encoded, encoded_len,
+ sizeof(text), text,
+ &text_len));
+ CuAssertIntEquals(tc, 256, text_len);
+ CuAssertIntEquals(tc, 0, memcmp(from, text, sizeof(from)));
+ /* If there is space in the buffer serf__hpack_huffman_decode will add
+ a final '\0' after the buffer */
+ CuAssertIntEquals(tc, 0, text[256]);
+ }
}
+#undef VERIFY_REVERSE
CuSuite *test_buckets(void)
{
@@ -2034,6 +2157,7 @@ CuSuite *test_buckets(void)
SUITE_ADD_TEST(suite, test_http2_unframe_buckets);
SUITE_ADD_TEST(suite, test_http2_unpad_buckets);
SUITE_ADD_TEST(suite, test_hpack_huffman_decode);
+ SUITE_ADD_TEST(suite, test_hpack_huffman_encode);
#if 0
/* This test for issue #152 takes a lot of time generating 4GB+ of random
data so it's disabled by default. */