My take on the diff-optimizations-bytes branch

Stefan Fuhrmann Sat, 01 Jan 2011 14:39:57 -0800

Hi Johan,

Thursday night I did something stupid and had a look at  how
svn blame could be made faster based on the HEAD code in
your branch.


One night and most of the following day later, I think I made it
a few percent faster now. Some of the code I committed directly
to /trunk and you need to pull these changes into your branch
to compile the attached patch.

Feel free to test it, take it apart and morph it any way you like --
I know the patch isn't very pretty in the first place. I tested the
patch on x64 LINUX and would like to hear whether it at least
somewhat improved performance on your system for your
svn blame config.xml use-case.

-- Stefan^2.

[[[
Speed-up of datasource_open() and its sub-routines
by a series of optimizations:

* allocate the file[] array on stack instead of heap
  (all members become directly addressible through
  the stack pointer because all static sub-functions
  will actually be in-lined)
* minor re-arragements in arithmetic expressions to
  maximize reuse of results (e.g. in INCREMENT_POINTERS)
* move hot spots to seperate functions and provide a
  specialized version for file_len == 2
  (many loops collapse to a single execution, other
  values can be hard-coded, etc.)
* use seperate loops to process data within a chunk
  so we don't need to call INCREMENT_POINTERS & friends
  that frequently
* scan / compare strings in machine-word granularity
  instead of bytes
]]]

Index: subversion/libsvn_diff/diff_file.c
===================================================================
--- subversion/libsvn_diff/diff_file.c  (revision 1054044)
+++ subversion/libsvn_diff/diff_file.c  (working copy)
@@ -46,6 +46,7 @@
 
 #include "private/svn_utf_private.h"
 #include "private/svn_eol_private.h"
+#include "private/svn_adler32.h"
 
 /* A token, i.e. a line read from a file. */
 typedef struct svn_diff__file_token_t
@@ -258,10 +259,10 @@
                                                                              \
     for (i = 0; i < files_len; i++)                                          \
     {                                                                        \
-      if (all_files[i]->curp < all_files[i]->endp - 1)                       \
-        all_files[i]->curp++;                                                \
+      if (all_files[i].curp + 1 < all_files[i].endp)                         \
+        all_files[i].curp++;                                                 \
       else                                                                   \
-        SVN_ERR(increment_chunk(all_files[i], pool));                        \
+        SVN_ERR(increment_chunk(&all_files[i], pool));                       \
     }                                                                        \
   } while (0)
 
@@ -276,10 +277,10 @@
                                                                              \
     for (i = 0; i < files_len; i++)                                          \
     {                                                                        \
-      if (all_files[i]->curp > all_files[i]->buffer)                         \
-        all_files[i]->curp--;                                                \
+      if (all_files[i].curp > all_files[i].buffer)                           \
+        all_files[i].curp--;                                                 \
       else                                                                   \
-        SVN_ERR(decrement_chunk(all_files[i], pool));                        \
+        SVN_ERR(decrement_chunk(&all_files[i], pool));                       \
     }                                                                        \
   } while (0)
 
@@ -349,12 +350,12 @@
  * the file (this can happen while scanning backwards). This is the case if
  * one of them has chunk == -1. */
 static svn_boolean_t
-is_one_at_bof(struct file_info *file[], apr_size_t file_len)
+is_one_at_bof(struct file_info file[], apr_size_t file_len)
 {
   apr_size_t i;
 
   for (i = 0; i < file_len; i++)
-    if (file[i]->chunk == -1)
+    if (file[i].chunk == -1)
       return TRUE;
 
   return FALSE;
@@ -363,53 +364,152 @@
 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
  * one of them has curp == endp (this can only happen at the last chunk)) */
 static svn_boolean_t
-is_one_at_eof(struct file_info *file[], apr_size_t file_len)
+is_one_at_eof(struct file_info file[], apr_size_t file_len)
 {
   apr_size_t i;
 
   for (i = 0; i < file_len; i++)
-    if (file[i]->curp == file[i]->endp)
+    if (file[i].curp == file[i].endp)
       return TRUE;
 
   return FALSE;
 }
 
-/* Find the prefix which is identical between all elements of the FILE array.
- * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
- * set to TRUE if one of the FILEs reached its end while scanning prefix,
- * i.e. at least one file consisted entirely of prefix.  Otherwise, 
- * REACHED_ONE_EOF is set to FALSE.
- *
- * After this function is finished, the buffers, chunks, curp's and endp's 
- * of the FILEs are set to point at the first byte after the prefix. */
+/* Quickly determine whether there is a new-line char in CHUNK.
+ * (mainly copy-n-paste from find_eol_start).
+ */
+#if APR_SIZEOF_VOIDP == 8
+#  define LOWER_7BITS_SET 0x7f7f7f7f7f7f7f7f
+#  define BIT_7_SET       0x8080808080808080
+#  define R_MASK          0x0a0a0a0a0a0a0a0a
+#  define N_MASK          0x0d0d0d0d0d0d0d0d
+#else
+#  define LOWER_7BITS_SET 0x7f7f7f7f
+#  define BIT_7_SET       0x80808080
+#  define R_MASK          0x0a0a0a0a
+#  define N_MASK          0x0d0d0d0d
+#endif
+
+static svn_boolean_t contains_nl(apr_size_t chunk)
+{
+  apr_size_t r_test = chunk ^ R_MASK;
+  apr_size_t n_test = chunk ^ N_MASK;
+
+  r_test |= (r_test & LOWER_7BITS_SET) + LOWER_7BITS_SET;
+  n_test |= (n_test & LOWER_7BITS_SET) + LOWER_7BITS_SET;
+
+  return (r_test & n_test & BIT_7_SET) != BIT_7_SET;
+}
+
+/* Optimized variant of the guts of find_identical_prefix for
+ * file_len == 2.
+ */
 static svn_error_t *
-find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
-                      struct file_info *file[], apr_size_t file_len,
-                      apr_pool_t *pool)
+find_identical_prefix_fast(svn_boolean_t *reached_one_eof,
+                           apr_off_t *prefix_lines,
+                           struct file_info file[],
+                           apr_pool_t *pool)
 {
   svn_boolean_t had_cr = FALSE;
+  apr_off_t lines = 0;
+  apr_size_t i = 0;
+
+  *reached_one_eof = FALSE;
+
+  while (*file[0].curp == *file[1].curp)
+    {
+      if (*file[0].curp == '\r')
+        {
+          lines++;
+          had_cr = TRUE;
+        }
+      else if (*file[0].curp == '\n' && !had_cr)
+        {
+          lines++;
+        }
+      else
+        {
+          had_cr = FALSE;
+        }
+
+      INCREMENT_POINTERS(file, 2, pool);
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+      /* Skip quickly over the stuff between EOLs. */
+
+      while (   (file[0].curp + sizeof(apr_size_t) < file[0].endp)
+             && (file[1].curp + sizeof(apr_size_t) < file[1].endp)
+             && (   *(const apr_size_t *)file[0].curp
+                 == *(const apr_size_t *)file[1].curp)
+             && !contains_nl(*(const apr_size_t *)file[0].curp))
+        {
+          file[0].curp += sizeof(apr_size_t);
+          file[1].curp += sizeof(apr_size_t);
+        }
+
+#endif
+
+      /* curp == endp indicates EOF (this can only happen with last chunk) */
+      if (is_one_at_eof(file, 2))
+        {
+          *reached_one_eof = TRUE;
+          break;
+        }
+    }
+
+  *prefix_lines = lines;
+
+  /* If all files reached their end (i.e. are fully identical), we're done */
+  if (file[0].curp == file[0].endp && file[1].curp == file[1].endp)
+    return SVN_NO_ERROR;
+
+  if (had_cr)
+    {
+      /* Check if we ended in the middle of a \r\n for one file, but \r for 
+         another. If so, back up one byte, so the next loop will back up
+         the entire line. Also decrement *prefix_lines, since we counted one
+         too many for the \r. */
+      if ((*file[0].curp == '\n') || (*file[1].curp == '\n'))
+        DECREMENT_POINTERS(file, 2, pool);
+    }
+
+  return SVN_NO_ERROR;
+}
+
+/* Non-optimized variant of the guts of find_identical_prefix for
+ * file_len != 2.
+ */
+static svn_error_t *
+find_identical_prefix_slow(svn_boolean_t *reached_one_eof,
+                           apr_off_t *prefix_lines,
+                           struct file_info file[], apr_size_t file_len,
+                           apr_pool_t *pool)
+{
+  svn_boolean_t had_cr = FALSE;
   svn_boolean_t is_match, reached_all_eof;
   apr_size_t i;
+  apr_off_t lines = 0;
 
   *prefix_lines = 0;
   for (i = 1, is_match = TRUE; i < file_len; i++)
-    is_match = is_match && *file[0]->curp == *file[i]->curp;
+    is_match = is_match && *file[0].curp == *file[i].curp;
+
   while (is_match)
     {
       /* ### TODO: see if we can take advantage of 
          diff options like ignore_eol_style or ignore_space. */
       /* check for eol, and count */
-      if (*file[0]->curp == '\r')
+      if (*file[0].curp == '\r')
         {
-          (*prefix_lines)++;
+          lines++;
           had_cr = TRUE;
         }
-      else if (*file[0]->curp == '\n' && !had_cr)
+      else if (*file[0].curp == '\n' && !had_cr)
         {
-          (*prefix_lines)++;
-          had_cr = FALSE;
+          lines++;
         }
-      else 
+      else
         {
           had_cr = FALSE;
         }
@@ -422,12 +522,14 @@
         break;
       else
         for (i = 1, is_match = TRUE; i < file_len; i++)
-          is_match = is_match && *file[0]->curp == *file[i]->curp;
+          is_match = is_match && *file[0].curp == *file[i].curp;
     }
 
+  *prefix_lines = lines;
+
   /* If all files reached their end (i.e. are fully identical), we're done */
   for (i = 0, reached_all_eof = TRUE; i < file_len; i++)
-    reached_all_eof = reached_all_eof && file[i]->curp == file[i]->endp;
+    reached_all_eof = reached_all_eof && file[i].curp == file[i].endp;
   if (reached_all_eof)
     return SVN_NO_ERROR;
 
@@ -440,20 +542,40 @@
       svn_boolean_t ended_at_nonmatching_newline = FALSE;
       for (i = 0; i < file_len; i++)
         ended_at_nonmatching_newline = ended_at_nonmatching_newline 
-                                       || *file[i]->curp == '\n';
+                                       || *file[i].curp == '\n';
       if (ended_at_nonmatching_newline)
-        {
-          (*prefix_lines)--;
-          DECREMENT_POINTERS(file, file_len, pool);
-        }
+        DECREMENT_POINTERS(file, file_len, pool);
     }
 
+  return SVN_NO_ERROR;
+}
+
+/* Find the prefix which is identical between all elements of the FILE array.
+ * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
+ * set to TRUE if one of the FILEs reached its end while scanning prefix,
+ * i.e. at least one file consisted entirely of prefix.  Otherwise, 
+ * REACHED_ONE_EOF is set to FALSE.
+ *
+ * After this function is finished, the buffers, chunks, curp's and endp's 
+ * of the FILEs are set to point at the first byte after the prefix. */
+static svn_error_t *
+find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
+                      struct file_info file[], apr_size_t file_len,
+                      apr_pool_t *pool)
+{
+  if (file_len == 2)
+    SVN_ERR(find_identical_prefix_fast(reached_one_eof, prefix_lines,
+                                       file, pool));
+  else
+    SVN_ERR(find_identical_prefix_slow(reached_one_eof, prefix_lines,
+                                       file, file_len, pool));
+
   /* Back up one byte, so we point at the last identical byte */
   DECREMENT_POINTERS(file, file_len, pool);
 
   /* Back up to the last eol sequence (\n, \r\n or \r) */
   while (!is_one_at_bof(file, file_len) && 
-         *file[0]->curp != '\n' && *file[0]->curp != '\r')
+         *file[0].curp != '\n' && *file[0].curp != '\r')
     DECREMENT_POINTERS(file, file_len, pool);
 
   /* Slide one byte forward, to point past the eol sequence */
@@ -462,9 +584,154 @@
   return SVN_NO_ERROR;
 }
 
+/* Scan backwards until mismatch or until we reach the prefix.
+ * This is the optimized implementation for file_len==2.
+ */
+static svn_error_t*
+scan_backward_fast(struct file_info file[],
+                   apr_off_t min_chunk0, apr_off_t min_offset0,
+                   apr_pool_t *pool)
+{
+  svn_boolean_t reached_prefix;
 
+  while (1)
+    {
+      const char* min_curp0 = file[0].chunk == min_chunk0
+                            ? file[0].buffer + min_offset0 + 1
+                            : file[0].buffer + 1;
+      const char* min_curp1 = file[1].buffer + 1;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+      if (   (file[0].curp - sizeof(apr_size_t) < file[0].curp)
+          && (file[1].curp - sizeof(apr_size_t) < file[1].curp))
+        {
+          do
+            {
+              file[0].curp -= sizeof(apr_size_t);
+              file[1].curp -= sizeof(apr_size_t);
+            }
+          while (   (file[0].curp >= min_curp0)
+                 && (file[1].curp >= min_curp1)
+                 && (   *(const apr_size_t*)(file[0].curp + 1)
+                     == *(const apr_size_t*)(file[1].curp + 1)));
+
+          file[0].curp += sizeof(apr_size_t);
+          file[1].curp += sizeof(apr_size_t);
+        }
+
+#endif
+
+      while (   (*file[0].curp == *file[1].curp)
+             && (file[0].curp != min_curp0)
+             && (file[1].curp != min_curp1))
+      {
+        file[0].curp--;
+        file[1].curp--;
+      }
+
+      if (*file[0].curp != *file[1].curp)
+        return SVN_NO_ERROR;
+
+      DECREMENT_POINTERS(file, 2, pool);
+      reached_prefix = file[0].chunk == min_chunk0
+                    && (file[0].curp - file[0].buffer) == min_offset0;
+
+      if (reached_prefix || is_one_at_bof(file, 2))
+        return SVN_NO_ERROR;
+    }
+}
+
+/* Scan backwards until mismatch or until we reach the prefix.
+ * This is general implementation for file_len!=2.
+ */
+static svn_error_t*
+scan_backward_slow(struct file_info file[], apr_size_t file_len,
+                   apr_off_t min_chunk0, apr_off_t min_offset0,
+                   apr_pool_t *pool)
+{
+  apr_size_t i;
+  svn_boolean_t is_match, reached_prefix;
+
+  for (i = 1; i < file_len; i++)
+    if (*file[0].curp != *file[i].curp)
+      return SVN_NO_ERROR;
+
+  while (1)
+    {
+      DECREMENT_POINTERS(file, file_len, pool);
+
+      reached_prefix = file[0].chunk == min_chunk0 
+                    && (file[0].curp - file[0].buffer) == min_offset0;
+
+      if (reached_prefix || is_one_at_bof(file, file_len))
+        return SVN_NO_ERROR;
+
+      for (i = 1; i < file_len; i++)
+        if (*file[0].curp != *file[i].curp)
+          return SVN_NO_ERROR;
+    }
+}
+
 #define SUFFIX_LINES_TO_KEEP 50
 
+/* Slide forward until we find an eol sequence to add the rest of the line
+ * we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
+ * one file reaches its end.
+ * This is the optimized implementation for file_len == 2.
+ */
+static svn_error_t*
+scan_forward_fast(struct file_info file[], apr_pool_t *pool)
+{
+  int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
+
+  do
+    {
+      while (!is_one_at_eof(file, 2)
+              && *file[0].curp != '\n'
+              && *file[0].curp != '\r')
+        INCREMENT_POINTERS(file, 2, pool);
+
+      /* Slide one or two more bytes, to point past the eol. */
+      if (!is_one_at_eof(file, 2) && *file[0].curp == '\r')
+        INCREMENT_POINTERS(file, 2, pool);
+      if (!is_one_at_eof(file, 2) && *file[0].curp == '\n')
+        INCREMENT_POINTERS(file, 2, pool);
+    }
+  while (!is_one_at_eof(file, 2) && suffix_lines_to_keep--);
+
+  return SVN_NO_ERROR;
+}
+
+/* Slide forward until we find an eol sequence to add the rest of the line
+ * we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
+ * one file reaches its end.
+ * This is the general implementation for file_len != 2.
+ */
+static svn_error_t*
+scan_forward_slow(struct file_info file[], apr_size_t file_len,
+                  apr_pool_t *pool)
+{
+  int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
+
+  do
+    {
+      while (!is_one_at_eof(file, file_len)
+              && *file[0].curp != '\n'
+              && *file[0].curp != '\r')
+        INCREMENT_POINTERS(file, file_len, pool);
+
+      /* Slide one or two more bytes, to point past the eol. */
+      if (!is_one_at_eof(file, file_len) && *file[0].curp == '\r')
+        INCREMENT_POINTERS(file, file_len, pool);
+      if (!is_one_at_eof(file, file_len) && *file[0].curp == '\n')
+        INCREMENT_POINTERS(file, file_len, pool);
+    }
+  while (!is_one_at_eof(file, file_len) && suffix_lines_to_keep--);
+
+  return SVN_NO_ERROR;
+}
+
 /* Find the suffix which is identical between all elements of the FILE array.
  *
  * Before this function is called the FILEs' pointers and chunks should be 
@@ -472,39 +739,32 @@
  * find_identical_prefix), so we can determine where suffix scanning should 
  * ultimately stop. */
 static svn_error_t *
-find_identical_suffix(struct file_info *file[], apr_size_t file_len,
+find_identical_suffix(struct file_info file[], apr_size_t file_len,
                       apr_pool_t *pool)
 {
   struct file_info file_for_suffix[4];
-  struct file_info *file_for_suffix_ptr[4];
   apr_off_t length[4];
   apr_off_t suffix_min_chunk0;
   apr_off_t suffix_min_offset0;
   apr_off_t min_file_size;
-  int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
-  svn_boolean_t is_match, reached_prefix;
   apr_size_t i;
 
-  for (i = 0; i < file_len; i++)
-    {
-      memset(&file_for_suffix[i], 0, sizeof(file_for_suffix[i]));
-      file_for_suffix_ptr[i] = &file_for_suffix[i];
-    }
+  memset(file_for_suffix, 0, sizeof(file_for_suffix));
 
   /* Initialize file_for_suffix[].
      Read last chunk, position curp at last byte. */
   for (i = 0; i < file_len; i++)
     {
-      file_for_suffix[i].path = file[i]->path;
-      file_for_suffix[i].file = file[i]->file;
-      file_for_suffix[i].size = file[i]->size;
+      file_for_suffix[i].path = file[i].path;
+      file_for_suffix[i].file = file[i].file;
+      file_for_suffix[i].size = file[i].size;
       file_for_suffix[i].chunk =
         (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
       length[i] = offset_in_chunk(file_for_suffix[i].size);
-      if (file_for_suffix[i].chunk == file[i]->chunk)
+      if (file_for_suffix[i].chunk == file[i].chunk)
         {
           /* Prefix ended in last chunk, so we can reuse the prefix buffer */
-          file_for_suffix[i].buffer = file[i]->buffer;
+          file_for_suffix[i].buffer = file[i].buffer;
         }
       else
         {
@@ -522,68 +782,45 @@
 
   /* Get the chunk and pointer offset (for file[0]) at which we should stop
      scanning backward for the identical suffix, i.e. when we reach prefix. */
-  suffix_min_chunk0 = file[0]->chunk;
-  suffix_min_offset0 = file[0]->curp - file[0]->buffer;
+  suffix_min_chunk0 = file[0].chunk;
+  suffix_min_offset0 = file[0].curp - file[0].buffer;
 
   /* Compensate if other files are smaller than file[0] */
-  for (i = 1, min_file_size = file[0]->size; i < file_len; i++)
-    if (file[i]->size < min_file_size)
-      min_file_size = file[i]->size;
-  if (file[0]->size > min_file_size)
+  for (i = 1, min_file_size = file[0].size; i < file_len; i++)
+    if (file[i].size < min_file_size)
+      min_file_size = file[i].size;
+  if (file[0].size > min_file_size)
     {
-      suffix_min_chunk0 += (file[0]->size - min_file_size) / CHUNK_SIZE;
-      suffix_min_offset0 += (file[0]->size - min_file_size) % CHUNK_SIZE;
+      suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
+      suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
     }
 
   /* Scan backwards until mismatch or until we reach the prefix. */
-  for (i = 1, is_match = TRUE; i < file_len; i++)
-    is_match =
-      is_match && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
-  while (is_match)
-    {
-      DECREMENT_POINTERS(file_for_suffix_ptr, file_len, pool);
-      
-      reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0 
-                       && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
-                          == suffix_min_offset0;
+  if (file_len == 2)
+    SVN_ERR(scan_backward_fast(file_for_suffix,
+                               suffix_min_chunk0, suffix_min_offset0,
+                               pool));
+  else
+    SVN_ERR(scan_backward_slow(file_for_suffix, file_len,
+                               suffix_min_chunk0, suffix_min_offset0,
+                               pool));
 
-      if (reached_prefix || is_one_at_bof(file_for_suffix_ptr, file_len))
-        break;
-      else
-        for (i = 1, is_match = TRUE; i < file_len; i++)
-          is_match =
-            is_match && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
-    }
-
   /* Slide one byte forward, to point at the first byte of identical suffix */
-  INCREMENT_POINTERS(file_for_suffix_ptr, file_len, pool);
+  INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 
   /* Slide forward until we find an eol sequence to add the rest of the line
      we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least 
      one file reaches its end. */
-  do
-    {
-      while (!is_one_at_eof(file_for_suffix_ptr, file_len)
-             && *file_for_suffix[0].curp != '\n'
-             && *file_for_suffix[0].curp != '\r')
-        INCREMENT_POINTERS(file_for_suffix_ptr, file_len, pool);
+  if (file_len == 2)
+    SVN_ERR(scan_forward_fast(file_for_suffix, pool));
+  else
+    SVN_ERR(scan_forward_slow(file_for_suffix, file_len, pool));
 
-      /* Slide one or two more bytes, to point past the eol. */
-      if (!is_one_at_eof(file_for_suffix_ptr, file_len)
-          && *file_for_suffix[0].curp == '\r')
-        INCREMENT_POINTERS(file_for_suffix_ptr, file_len, pool);
-      if (!is_one_at_eof(file_for_suffix_ptr, file_len)
-          && *file_for_suffix[0].curp == '\n')
-        INCREMENT_POINTERS(file_for_suffix_ptr, file_len, pool);
-    }
-  while (!is_one_at_eof(file_for_suffix_ptr, file_len) 
-         && suffix_lines_to_keep--);
-
   /* Save the final suffix information in the original file_info */
   for (i = 0; i < file_len; i++)
     {
-      file[i]->suffix_start_chunk = file_for_suffix[i].chunk;
-      file[i]->suffix_offset_in_chunk = 
+      file[i].suffix_start_chunk = file_for_suffix[i].chunk;
+      file[i].suffix_offset_in_chunk = 
         file_for_suffix[i].curp - file_for_suffix[i].buffer;
     }
 
@@ -612,7 +849,7 @@
                  apr_size_t datasource_len)
 {
   svn_diff__file_baton_t *file_baton = baton;
-  struct file_info *file[4];
+  struct file_info files[4];
   apr_finfo_t finfo[4];
   apr_off_t length[4];
   svn_boolean_t reached_one_eof;
@@ -621,18 +858,21 @@
   /* Open datasources and read first chunk */
   for (i = 0; i < datasource_len; i++)
     {
-      file[i] = &file_baton->files[datasource_to_index(datasource[i])];
-      SVN_ERR(svn_io_file_open(&file[i]->file, file[i]->path,
+      struct file_info *file
+          = &file_baton->files[datasource_to_index(datasource[i])];
+      SVN_ERR(svn_io_file_open(&file->file, file->path,
                                APR_READ, APR_OS_DEFAULT, file_baton->pool));
       SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
-                                   file[i]->file, file_baton->pool));
-      file[i]->size = finfo[i].size;
+                                   file->file, file_baton->pool));
+      file->size = finfo[i].size;
       length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
-      file[i]->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
-      SVN_ERR(read_chunk(file[i]->file, file[i]->path, file[i]->buffer,
+      file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
+      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
                          length[i], 0, file_baton->pool));
-      file[i]->endp = file[i]->buffer + length[i];
-      file[i]->curp = file[i]->buffer;
+      file->endp = file->buffer + length[i];
+      file->curp = file->buffer;
+
+      files[i] = *file;
     }
 
   for (i = 0; i < datasource_len; i++)
@@ -641,15 +881,18 @@
       return SVN_NO_ERROR;
 
   SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
-                                file, datasource_len, file_baton->pool));
+                                files, datasource_len, file_baton->pool));
 
-  if (reached_one_eof)
-    /* At least one file consisted totally of identical prefix, 
-     * so there will be no identical suffix. We're done. */
-    return SVN_NO_ERROR;
+  if (!reached_one_eof)
+    /* No file consisted totally of identical prefix,
+     * so there may be some identical suffix.  */
 
-  SVN_ERR(find_identical_suffix(file, datasource_len, file_baton->pool));
+    SVN_ERR(find_identical_suffix(files, datasource_len, file_baton->pool));
 
+  /* Copy local results back to baton. */
+  for (i = 0; i < datasource_len; i++)
+    file_baton->files[datasource_to_index(datasource[i])] = files[i];
+
   return SVN_NO_ERROR;
 }
 
@@ -747,7 +990,7 @@
                                  &file->normalize_state,
                                  curp, file_baton->options);
       file_token->length += length;
-      h = svn_diff__adler32(h, curp, length);
+      h = svn__adler32(h, curp, length);
 
       curp = endp = file->buffer;
       file->chunk++;
@@ -794,7 +1037,7 @@
 
       file_token->length += length;
 
-      *hash = svn_diff__adler32(h, c, length);
+      *hash = svn__adler32(h, c, length);
       *token = file_token;
     }

My take on the diff-optimizations-bytes branch

Reply via email to