Author: stefan2
Date: Sat Jan 1 20:33:22 2011
New Revision: 1054286
URL: http://svn.apache.org/viewvc?rev=1054286&view=rev
Log:
XDelta calculation is major part of svn_txdelta_send_txstream.
Therefore, speed up string matching code and the relatively
expensive hash-code (adler32) calculations.
* subversion/libsvn_delta/xdelta.c
(init_adler32): init adler checksum with 0 instead of 1;
use svn_adler32 for performance
(adler32_out): "-1" can / must be ommitted now that we
start at 0 instead of 1 for s1.
(adler32_replace): special, faster implementation replacing
a adler32_out(), adler32_in() sequence
(match_length): new utility function
(find_match): speed up the main loop by calling match_length()
(compute_delta): optimize adler32 calculations
Modified:
subversion/trunk/subversion/libsvn_delta/xdelta.c
Modified: subversion/trunk/subversion/libsvn_delta/xdelta.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_delta/xdelta.c?rev=1054286&r1=1054285&r2=1054286&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_delta/xdelta.c (original)
+++ subversion/trunk/subversion/libsvn_delta/xdelta.c Sat Jan 1 20:33:22 2011
@@ -29,6 +29,8 @@
#include "svn_delta.h"
#include "delta.h"
+
+#include "private/svn_adler32.h"
/* This is pseudo-adler32. It is adler32 without the prime modulus.
The idea is borrowed from monotone, and is a translation of the C++
@@ -39,6 +41,10 @@
#define ADLER32_MASK 0x0000ffff
#define ADLER32_CHAR_MASK 0x000000ff
+/* Size of the blocks we compute checksums for. This was chosen out of
+ thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. */
+#define MATCH_BLOCKSIZE 64
+
/* Structure to store the state of our adler32 checksum. */
struct adler32
{
@@ -66,11 +72,32 @@ adler32_out(struct adler32 *ad, const ch
{
ad->s1 -= ((apr_uint32_t) (c))& ADLER32_CHAR_MASK;
ad->s1&= ADLER32_MASK;
- ad->s2 -= (ad->len * (((apr_uint32_t) c)& ADLER32_CHAR_MASK)) + 1;
+ ad->s2 -= (ad->len * (((apr_uint32_t) c)& ADLER32_CHAR_MASK));
ad->s2&= ADLER32_MASK;
--ad->len;
}
+/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
+ * This function may (and will) only be called for
+ * ad->len == MATCH_BLOCKSIZE.
+ */
+static APR_INLINE void
+adler32_replace(struct adler32 *ad, const char c_out, const char c_in)
+{
+ apr_uint32_t s1 = ad->s1;
+ apr_uint32_t s2 = ad->s2;
+
+ s2 -= (MATCH_BLOCKSIZE * (((apr_uint32_t) c_out)& ADLER32_CHAR_MASK));
+
+ s1 -= ((apr_uint32_t) (c_out))& ADLER32_CHAR_MASK;
+ s1 += ((apr_uint32_t) (c_in))& ADLER32_CHAR_MASK;
+
+ s2 += s1;
+
+ ad->s1 = s1& ADLER32_MASK;
+ ad->s2 = s2& ADLER32_MASK;
+}
+
/* Return the current adler32 checksum in the adler32 structure. */
static APR_INLINE apr_uint32_t
@@ -85,18 +112,15 @@ adler32_sum(const struct adler32 *ad)
static APR_INLINE struct adler32 *
init_adler32(struct adler32 *ad, const char *data, apr_uint32_t datalen)
{
- ad->s1 = 1;
- ad->s2 = 0;
- ad->len = 0;
- while (datalen--)
- adler32_in(ad, *(data++));
+ apr_uint32_t adler32 = svn__adler32(0, data, datalen);
+
+ ad->s1 = adler32& ADLER32_MASK;
+ ad->s2 = (adler32>> 16)& ADLER32_MASK;
+ ad->len = datalen;
+
return ad;
}
-/* Size of the blocks we compute checksums for. This was chosen out of
- thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. */
-#define MATCH_BLOCKSIZE 64
-
/* Information for a block of the delta source. The length of the
block is the smaller of MATCH_BLOCKSIZE and the difference between
the size of the source data and the position of this block. */
@@ -201,6 +225,35 @@ init_blocks_table(const char *data,
}
}
+/* Return the lowest position at which A and B differ. If no difference
+ * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
+ */
+static apr_size_t
+match_length(const char *a, const char *b, apr_size_t max_len)
+{
+ apr_size_t pos = 0;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+ /* Chunky operation is so much faster ...
+ *
+ * We can't make this work on architectures that require aligned access
+ * because A and B will probably have different alignment. So, skipping
+ * the first few chars until alignment is reached is not an option.
+ */
+ for (; pos + sizeof(apr_size_t)<= max_len; pos += sizeof(apr_size_t))
+ if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
+ break;
+
+#endif
+
+ for (; pos< max_len; ++pos)
+ if (a[pos] != b[pos])
+ break;
+
+ return pos;
+}
+
/* Try to find a match for the target data B in BLOCKS, and then
extend the match as long as data in A and B at the match position
continues to match. We set the position in a we ended up in (in
@@ -229,6 +282,8 @@ find_match(const struct blocks *blocks,
apr_uint32_t sum = adler32_sum(rolling);
apr_size_t alen, badvance, apos;
apr_size_t tpos, tlen;
+ apr_size_t delta, max_delta;
+ const char *aptr, *bptr;