Author: stefan2
Date: Sat Jun 25 08:12:50 2011
New Revision: 1139502
URL: http://svn.apache.org/viewvc?rev=1139502&view=rev
Log:
Base64 encoding / decoding accounts for a significant part of the protocol
handling with NEON. Make that 3 .. 4 times faster.
* subversion/libsvn_subr/svn_base64.c
(BASE64_LINELEN, base64tab): add docstrings
(BYTES_PER_LINE): new, derived value
(encode_group): fine-tune
(decode_group_directly): new function variant
(encode_line, decode_line): new functions to process line-sized chunks at once
(encode_bytes, decode_bytes): use above functions when possible
Modified:
subversion/trunk/subversion/libsvn_subr/svn_base64.c
Modified: subversion/trunk/subversion/libsvn_subr/svn_base64.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/svn_base64.c?rev=1139502&r1=1139501&r2=1139502&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/svn_base64.c (original)
+++ subversion/trunk/subversion/libsvn_subr/svn_base64.c Sat Jun 25 08:12:50
2011
@@ -34,11 +34,19 @@
#include "svn_error.h"
#include "svn_base64.h"
-
+/* When asked to format the the base64-encoded output as multiple lines,
+ we put this many chars in each line (plus one new line char) unless
+ we run out of data.
+ It is vital for some of the optimizations below that this value is
+ a multiple of 4. */
#define BASE64_LINELEN 76
-static const char base64tab[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz0123456789+/";
+/* This number of bytes is encoded in a line of base64 chars. */
+#define BYTES_PER_LINE (BASE64_LINELEN / 4 * 3)
+
+/* Value -> base64 char mapping table (2^6 entries) */
+static const char base64tab[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
+ "abcdefghijklmnopqrstuvwxyz0123456789+/";
/* Binary input --> base64-encoded output */
@@ -59,13 +67,47 @@ struct encode_baton {
static APR_INLINE void
encode_group(const unsigned char *in, char *out)
{
- out[0] = base64tab[in[0] >> 2];
- out[1] = base64tab[((in[0] & 0x3) << 4) | (in[1] >> 4)];
- out[2] = base64tab[((in[1] & 0xf) << 2) | (in[2] >> 6)];
- out[3] = base64tab[in[2] & 0x3f];
+ /* Expand input bytes to machine word length (with zero extra cost
+ on x86/x64) ... */
+ apr_size_t part0 = in[0];
+ apr_size_t part1 = in[1];
+ apr_size_t part2 = in[2];
+
+ /* ... to prevent these arithmetic operations from being limited to
+ byte size. This saves non-zero cost conversions of the result when
+ calculating the addresses within base64tab. */
+ out[0] = base64tab[part0 >> 2];
+ out[1] = base64tab[((part0 & 3) << 4) | (part1 >> 4)];
+ out[2] = base64tab[((part1 & 0xf) << 2) | (part2 >> 6)];
+ out[3] = base64tab[part2 & 0x3f];
+}
+
+/* Base64-encode a line, i.e. BYTES_PER_LINE bytes from DATA into
+ BASE64_LINELEN chars and append it to STR. It does not assume that
+ a new line char will be appended, though.
+ The code in this function will simply transform the data without
+ performing any boundary checks. Therefore, DATA must have at least
+ BYTES_PER_LINE left and space for at least another BASE64_LINELEN
+ chars must have been pre-allocated in STR before calling this
+ function. */
+static void
+encode_line(svn_stringbuf_t *str, const char *data)
+{
+ /* Translate directly from DATA to STR->DATA. */
+ const unsigned char *in = data;
+ char *out = str->data + str->len;
+ char *end = out + BASE64_LINELEN;
+
+ /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
+ a multiple of 4. */
+ for ( ; out != end; in += 3, out += 4)
+ encode_group(in, out);
+
+ /* Expand and terminate the string. */
+ *out = '\0';
+ str->len += BASE64_LINELEN;
}
-
/* Base64-encode a byte string which may or may not be the totality of
the data being encoded. INBUF and *INBUFLEN carry the leftover
data from call to call, and *LINELEN carries the length of the
@@ -82,7 +124,9 @@ encode_bytes(svn_stringbuf_t *str, const
apr_size_t buflen;
/* Resize the stringbuf to make room for the (approximate) size of
- output, to avoid repeated resizes later. */
+ output, to avoid repeated resizes later.
+ Please note that our optimized code relies on the fact that STR
+ never needs to be resized until we leave this function. */
buflen = len * 4 / 3 + 4;
if (break_lines)
{
@@ -94,12 +138,31 @@ encode_bytes(svn_stringbuf_t *str, const
/* Keep encoding three-byte groups until we run out. */
while (*inbuflen + (end - p) >= 3)
{
- memcpy(inbuf + *inbuflen, p, 3 - *inbuflen);
- p += (3 - *inbuflen);
- encode_group(inbuf, group);
- svn_stringbuf_appendbytes(str, group, 4);
- *inbuflen = 0;
- *linelen += 4;
+ /* May we encode BYTES_PER_LINE bytes without caring about
+ line breaks, data in the temporary INBUF or running out
+ of data? */
+ if ( *inbuflen == 0
+ && (*linelen == 0 || !break_lines)
+ && (end - p >= BYTES_PER_LINE))
+ {
+ /* Yes, we can encode a whole chunk of data at once. */
+ encode_line(str, p);
+ p += BYTES_PER_LINE;
+ *linelen += BASE64_LINELEN;
+ }
+ else
+ {
+ /* No, this is one of a number of special cases.
+ Encode the data byte by byte. */
+ memcpy(inbuf + *inbuflen, p, 3 - *inbuflen);
+ p += (3 - *inbuflen);
+ encode_group(inbuf, group);
+ svn_stringbuf_appendbytes(str, group, 4);
+ *inbuflen = 0;
+ *linelen += 4;
+ }
+
+ /* Adc line breaks as necessary. */
if (break_lines && *linelen == BASE64_LINELEN)
{
svn_stringbuf_appendbyte(str, '\n');
@@ -238,7 +301,7 @@ struct decode_baton {
/* Base64-decode a group. IN needs to have four bytes and OUT needs
to have room for three bytes. The input bytes must already have
been decoded from base64tab into the range 0..63. The four
- six-byte values are pasted together to form three eight-bit bytes. */
+ six-bit values are pasted together to form three eight-bit bytes. */
static APR_INLINE void
decode_group(const unsigned char *in, char *out)
{
@@ -269,6 +332,64 @@ static const signed char reverse_base64[
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
+/* Similar to decode_group but this function also translates the
+ 6-bit values from the IN buffer before translating them.
+ Return FALSE if a non-base64 char (e.g. '=' or new line)
+ has been encountered. */
+static APR_INLINE svn_boolean_t
+decode_group_directly(const unsigned char *in, char *out)
+{
+ /* Translate the base64 chars in values [0..63, 0xff] */
+ apr_size_t part0 = (unsigned char)reverse_base64[(unsigned char)in[0]];
+ apr_size_t part1 = (unsigned char)reverse_base64[(unsigned char)in[1]];
+ apr_size_t part2 = (unsigned char)reverse_base64[(unsigned char)in[2]];
+ apr_size_t part3 = (unsigned char)reverse_base64[(unsigned char)in[3]];
+
+ /* Pack 4x6 bits into 3x8.*/
+ out[0] = (char)((part0 << 2) | (part1 >> 4));
+ out[1] = (char)(((part1 & 0xf) << 4) | (part2 >> 2));
+ out[2] = (char)(((part2 & 0x3) << 6) | part3);
+
+ /* FALSE, iff any part is 0xff. */
+ return (part0 | part1 | part2 | part3) != (unsigned char)(-1);
+}
+
+/* Base64-encode up to BASE64_LINELEN chars from *DATA and append it to
+ STR. After the function returns, *DATA will point to the first char
+ that has not been translated, yet. Returns TRUE if all BASE64_LINELEN
+ chars could be translated, i.e. no special char has been encountered
+ in between.
+ The code in this function will simply transform the data without
+ performing any boundary checks. Therefore, DATA must have at least
+ BASE64_LINELEN left and space for at least another BYTES_PER_LINE
+ chars must have been pre-allocated in STR before calling this
+ function. */
+static svn_boolean_t
+decode_line(svn_stringbuf_t *str, const char **data)
+{
+ /* Decode up to BYTES_PER_LINE bytes directly from *DATA into STR->DATA. */
+ const char *p = *data;
+ char *out = str->data + str->len;
+ char *end = out + BYTES_PER_LINE;
+
+ /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
+ a multiple of 4. Stop translation as soon as we encounter a special
+ char. Leave the entire group untouched in that case. */
+ for (; out < end; p += 4, out += 3)
+ if (!decode_group_directly(p, out))
+ break;
+
+ /* Update string sizes and positions. */
+ str->len = out - str->data;
+ *out = '\0';
+ *data = p;
+
+ /* Return FALSE, if the caller should continue the decoding process
+ using the slow standard method. */
+ return out == end;
+}
+
+
/* Decode a byte string which may or may not be the total amount of
data being decoded. INBUF and *INBUFLEN carry the leftover bytes
from call to call, and *DONE keeps track of whether we've seen an
@@ -279,16 +400,26 @@ static void
decode_bytes(svn_stringbuf_t *str, const char *data, apr_size_t len,
unsigned char *inbuf, int *inbuflen, svn_boolean_t *done)
{
- const char *p;
+ const char *p = data;
char group[3];
signed char find;
+ const char *end = data + len;
/* Resize the stringbuf to make room for the (approximate) size of
- output, to avoid repeated resizes later. */
+ output, to avoid repeated resizes later.
+ The optimizations in decode_line rely on no resizes being necessary! */
svn_stringbuf_ensure(str, (len / 4) * 3 + 3);
- for (p = data; !*done && p < data + len; p++)
+ while ( !*done && p < end )
{
+ /* If no data is left in temporary INBUF and there is at least
+ one line-sized chunk left to decode, we may use the optimized
+ code path. */
+ if ((*inbuflen == 0) && (p + BASE64_LINELEN <= end))
+ if (decode_line(str, &p))
+ continue;
+
+ /* A special case or decode_line encountered a special char. */
if (*p == '=')
{
/* We are at the end and have to decode a partial group. */
@@ -303,6 +434,8 @@ decode_bytes(svn_stringbuf_t *str, const
else
{
find = reverse_base64[(unsigned char)*p];
+ ++p;
+
if (find >= 0)
inbuf[(*inbuflen)++] = find;
if (*inbuflen == 4)