On Sat, Jan 1, 2011 at 9:33 PM, <[email protected]> wrote: > Author: stefan2 > Date: Sat Jan 1 20:33:22 2011 > New Revision: 1054286 > > URL: http://svn.apache.org/viewvc?rev=1054286&view=rev > Log: > XDelta calculation is major part of svn_txdelta_send_txstream. > Therefore, speed up string matching code and the relatively > expensive hash-code (adler32) calculations. > > * subversion/libsvn_delta/xdelta.c > (init_adler32): init adler checksum with 0 instead of 1; > use svn_adler32 for performance > (adler32_out): "-1" can / must be ommitted now that we > start at 0 instead of 1 for s1. > (adler32_replace): special, faster implementation replacing > a adler32_out(), adler32_in() sequence > > (match_length): new utility function > (find_match): speed up the main loop by calling match_length() > (compute_delta): optimize adler32 calculations > > Modified: > subversion/trunk/subversion/libsvn_delta/xdelta.c
Hi Stefan, This makes my (Windows 32-bit VCE 2008) build fail: svn_delta-1.lib(xdelta.obj) : error LNK2019: unresolved external symbol _svn__adler32 referenced in function _init_adler32 ..\..\..\Release\subversion\libsvn_delta\libsvn_delta-1.dll : fatal error LNK1120: 1 unresolved externals Johan > Modified: subversion/trunk/subversion/libsvn_delta/xdelta.c > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_delta/xdelta.c?rev=1054286&r1=1054285&r2=1054286&view=diff > ============================================================================== > --- subversion/trunk/subversion/libsvn_delta/xdelta.c (original) > +++ subversion/trunk/subversion/libsvn_delta/xdelta.c Sat Jan 1 20:33:22 2011 > @@ -29,6 +29,8 @@ > > #include "svn_delta.h" > #include "delta.h" > + > +#include "private/svn_adler32.h" > > /* This is pseudo-adler32. It is adler32 without the prime modulus. > The idea is borrowed from monotone, and is a translation of the C++ > @@ -39,6 +41,10 @@ > #define ADLER32_MASK 0x0000ffff > #define ADLER32_CHAR_MASK 0x000000ff > > +/* Size of the blocks we compute checksums for. This was chosen out of > + thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. */ > +#define MATCH_BLOCKSIZE 64 > + > /* Structure to store the state of our adler32 checksum. */ > struct adler32 > { > @@ -66,11 +72,32 @@ adler32_out(struct adler32 *ad, const ch > { > ad->s1 -= ((apr_uint32_t) (c)) & ADLER32_CHAR_MASK; > ad->s1 &= ADLER32_MASK; > - ad->s2 -= (ad->len * (((apr_uint32_t) c) & ADLER32_CHAR_MASK)) + 1; > + ad->s2 -= (ad->len * (((apr_uint32_t) c) & ADLER32_CHAR_MASK)); > ad->s2 &= ADLER32_MASK; > --ad->len; > } > > +/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time. > + * This function may (and will) only be called for > + * ad->len == MATCH_BLOCKSIZE. > + */ > +static APR_INLINE void > +adler32_replace(struct adler32 *ad, const char c_out, const char c_in) > +{ > + apr_uint32_t s1 = ad->s1; > + apr_uint32_t s2 = ad->s2; > + > + s2 -= (MATCH_BLOCKSIZE * (((apr_uint32_t) c_out) & ADLER32_CHAR_MASK)); > + > + s1 -= ((apr_uint32_t) (c_out)) & ADLER32_CHAR_MASK; > + s1 += ((apr_uint32_t) (c_in)) & ADLER32_CHAR_MASK; > + > + s2 += s1; > + > + ad->s1 = s1 & ADLER32_MASK; > + ad->s2 = s2 & ADLER32_MASK; > +} > + > /* Return the current adler32 checksum in the adler32 structure. */ > > static APR_INLINE apr_uint32_t > @@ -85,18 +112,15 @@ adler32_sum(const struct adler32 *ad) > static APR_INLINE struct adler32 * > init_adler32(struct adler32 *ad, const char *data, apr_uint32_t datalen) > { > - ad->s1 = 1; > - ad->s2 = 0; > - ad->len = 0; > - while (datalen--) > - adler32_in(ad, *(data++)); > + apr_uint32_t adler32 = svn__adler32(0, data, datalen); > + > + ad->s1 = adler32 & ADLER32_MASK; > + ad->s2 = (adler32 >> 16) & ADLER32_MASK; > + ad->len = datalen; > + > return ad; > } > > -/* Size of the blocks we compute checksums for. This was chosen out of > - thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. */ > -#define MATCH_BLOCKSIZE 64 > - > /* Information for a block of the delta source. The length of the > block is the smaller of MATCH_BLOCKSIZE and the difference between > the size of the source data and the position of this block. */ > @@ -201,6 +225,35 @@ init_blocks_table(const char *data, > } > } > > +/* Return the lowest position at which A and B differ. If no difference > + * can be found in the first MAX_LEN characters, MAX_LEN will be returned. > + */ > +static apr_size_t > +match_length(const char *a, const char *b, apr_size_t max_len) > +{ > + apr_size_t pos = 0; > + > +#if SVN_UNALIGNED_ACCESS_IS_OK > + > + /* Chunky operation is so much faster ... > + * > + * We can't make this work on architectures that require aligned access > + * because A and B will probably have different alignment. So, skipping > + * the first few chars until alignment is reached is not an option. > + */ > + for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t)) > + if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos)) > + break; > + > +#endif > + > + for (; pos < max_len; ++pos) > + if (a[pos] != b[pos]) > + break; > + > + return pos; > +} > + > /* Try to find a match for the target data B in BLOCKS, and then > extend the match as long as data in A and B at the match position > continues to match. We set the position in a we ended up in (in > @@ -229,6 +282,8 @@ find_match(const struct blocks *blocks, > apr_uint32_t sum = adler32_sum(rolling); > apr_size_t alen, badvance, apos; > apr_size_t tpos, tlen; > + apr_size_t delta, max_delta; > + const char *aptr, *bptr; > > tpos = find_block(blocks, sum); > > @@ -246,14 +301,15 @@ find_match(const struct blocks *blocks, > apos = tpos; > alen = tlen; > badvance = tlen; > + > /* Extend the match forward as far as possible */ > - while ((apos + alen < asize) > - && (bpos + badvance < bsize) > - && (a[apos + alen] == b[bpos + badvance])) > - { > - ++alen; > - ++badvance; > - } > + max_delta = asize - apos - alen < bsize - bpos - badvance > + ? asize - apos - alen > + : bsize - bpos - badvance; > + delta = match_length(a + apos + alen, b + bpos + badvance, max_delta); > + > + alen += delta; > + badvance += delta; > > /* See if we can extend backwards into a previous insert hunk. */ > while (apos > 0 > @@ -329,7 +385,6 @@ compute_delta(svn_txdelta__ops_baton_t * > apr_size_t apos = 0; > apr_size_t alen = 1; > apr_size_t badvance = 1; > - apr_size_t next; > svn_boolean_t match; > > match = find_match(&blocks, &rolling, a, asize, b, bsize, lo, &apos, > @@ -354,14 +409,49 @@ compute_delta(svn_txdelta__ops_baton_t * > svn_txdelta__insert_op(build_baton, svn_txdelta_source, > apos, alen, NULL, pool); > } > - next = lo; > - for (; next < lo + badvance; ++next) > + > + if (badvance == 1) > + { > + /* This seems to be the _vast_ majority case -- even if > + * you sum BADVANCE up, this case still accounts for 2/3 > + * of all bytes being processed. > + */ > + if (lo + MATCH_BLOCKSIZE < bsize) > + adler32_replace(&rolling, b[lo], b[lo + MATCH_BLOCKSIZE]); > + else > + adler32_out(&rolling, b[lo]); > + > + lo++; > + } > + else if (badvance >= MATCH_BLOCKSIZE) > { > - adler32_out(&rolling, b[next]); > - if (next + MATCH_BLOCKSIZE < bsize) > - adler32_in(&rolling, b[next + MATCH_BLOCKSIZE]); > + /* BADVANCE is often large enough that we can calculate the > + * Adler32 sum directly instead of expensively updating the > + * existing values. > + */ > + apr_size_t remaining_block = lo + MATCH_BLOCKSIZE < bsize > + ? MATCH_BLOCKSIZE > + : bsize - (lo + MATCH_BLOCKSIZE); > + init_adler32(&rolling, > + b + lo + badvance - remaining_block, > + remaining_block); > + lo += badvance; > + } > + else > + { > + /* The very rare 3rd case > + * (can only possibly happen close to end of the file). > + */ > + apr_size_t next = lo; > + > + for (; next < lo + badvance; ++next) > + if (next + MATCH_BLOCKSIZE < bsize) > + adler32_replace(&rolling, b[next], b[next + MATCH_BLOCKSIZE]); > + else > + adler32_out(&rolling, b[next]); > + > + lo = next; > } > - lo = next; > } > > /* If we still have an insert pending at the end, throw it in. */ > > > -- Johan

