The branch, master has been updated
       via  de94193 Remove bypassed checksums in --inplace to improve speed.
      from  05fce65 Preparing for release of 3.1.0pre1

;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit de94193353864221280be9fbb6193d92eb133000
Author: Wayne Davison <way...@samba.org>
Date:   Sat Aug 3 09:44:13 2013 -0700

    Remove bypassed checksums in --inplace to improve speed.
    
    When checking a checksum that refers to a part of an --inplace file that
    has been overwritten w/o getting SUMFLG_SAME_OFFSET set, we remove the
    checksum from the list.  This will speed up files that have a lot of
    identical checksum blocks (e.g. sequences of zeros) that we can't use
    due to them not getting marked as being the same.  Patch provided by
    Michael Chapman.

-----------------------------------------------------------------------

Summary of changes:
 NEWS    |    3 +++
 match.c |   26 +++++++++++++++++---------
 2 files changed, 20 insertions(+), 9 deletions(-)


Changeset truncated at 500 lines:

diff --git a/NEWS b/NEWS
index 040ac2d..eec631d 100644
--- a/NEWS
+++ b/NEWS
@@ -154,6 +154,9 @@ Changes since 3.0.9:
       file for one way to package the resulting files.  (Suggestions for
       how to make this even easier to install & use are welcomed.)
 
+    - Improved the speed of some --inplace updates when there are lots of
+      identical checksum blocks that end up being unsuable.
+
     - Added the --outbuf=N|L|B option for chosing the output buffering.
 
     - Repating the --fuzzy option now causes the code to look for fuzzy matches
diff --git a/match.c b/match.c
index bafab9f..a8bd1f3 100644
--- a/match.c
+++ b/match.c
@@ -178,7 +178,8 @@ static void hash_search(int f,struct sum_struct *s,
 
        do {
                int done_csum2 = 0;
-               int32 i;
+               uint32 hash_entry;
+               int32 i, *prev;
 
                if (DEBUG_GTE(DELTASUM, 4)) {
                        rprintf(FINFO, "offset=%s sum=%04x%04x\n",
@@ -186,19 +187,32 @@ static void hash_search(int f,struct sum_struct *s,
                }
 
                if (tablesize == TRADITIONAL_TABLESIZE) {
-                       if ((i = hash_table[SUM2HASH2(s1,s2)]) < 0)
+                       hash_entry = SUM2HASH2(s1,s2);
+                       if ((i = hash_table[hash_entry]) < 0)
                                goto null_hash;
                        sum = (s1 & 0xffff) | (s2 << 16);
                } else {
                        sum = (s1 & 0xffff) | (s2 << 16);
-                       if ((i = hash_table[BIG_SUM2HASH(sum)]) < 0)
+                       hash_entry = BIG_SUM2HASH(sum);
+                       if ((i = hash_table[hash_entry]) < 0)
                                goto null_hash;
                }
+               prev = &hash_table[hash_entry];
 
                hash_hits++;
                do {
                        int32 l;
 
+                       /* When updating in-place, the chunk's offset must be
+                        * either >= our offset or identical data at that 
offset.
+                        * Remove any bypassed entries that we can never use. */
+                       if (updating_basis_file && s->sums[i].offset < offset
+                           && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) {
+                               *prev = s->sums[i].chain;
+                               continue;
+                       }
+                       prev = &s->sums[i].chain;
+
                        if (sum != s->sums[i].sum1)
                                continue;
 
@@ -207,12 +221,6 @@ static void hash_search(int f,struct sum_struct *s,
                        if (l != s->sums[i].len)
                                continue;
 
-                       /* in-place: ensure chunk's offset is either >= our
-                        * offset or that the data didn't move. */
-                       if (updating_basis_file && s->sums[i].offset < offset
-                           && !(s->sums[i].flags & SUMFLG_SAME_OFFSET))
-                               continue;
-
                        if (DEBUG_GTE(DELTASUM, 3)) {
                                rprintf(FINFO,
                                        "potential match at %s i=%ld 
sum=%08x\n",


-- 
The rsync repository.
_______________________________________________
rsync-cvs mailing list
rsync-cvs@lists.samba.org
https://lists.samba.org/mailman/listinfo/rsync-cvs

Reply via email to