Hi Stefan2 and others.

This patch simplifies code in RA-svn/marshal.c by using a single code
path to read both short and long strings efficiently.  Using a single
code path is beneficial for test coverage.

It is an alternative to r1028352 which merged r985606 and r1028092 from
the performance branch.

I have run the test suite over RA-svn (with BDB) but haven't done any
more testing than that.  It's just something I noticed while looking at
that code, it's not solving a problem I've noticed or anything like
that.  By inspection I believe it is more efficient than
read_long_string() was, and unchanged for short strings, but if you're
able to do any performance analysis on it, that would be great.

- Julian

Simplify and optimize RA-svn's string marshalling.

This largely reverts r1028352 which merged r985606 and r1028092 from the
performance branch.  That change used separate functions to handle short and
long strings.  This change instead improves the original code to handle both
short and long strings efficiently.

Compared with the pre-r1028352 code, this version uses a 1-MB chunk size
instead of 4-KB and eliminates one memcpy of the data.

* subversion/libsvn_ra_svn/marshal.c
  (read_long_string): Remove.
  (read_string): Handle long strings with the same code as short strings, by
    reading directly into pre-allocated space, but pre-allocate no more than
    a reasonable chunk size at a time.
--This line, and those below, will be ignored--

Index: subversion/libsvn_ra_svn/marshal.c
===================================================================
--- subversion/libsvn_ra_svn/marshal.c	(revision 1130931)
+++ subversion/libsvn_ra_svn/marshal.c	(working copy)
@@ -563,36 +563,6 @@ svn_error_t *svn_ra_svn_write_tuple(svn_
 
 /* --- READING DATA ITEMS --- */
 
-/* Read LEN bytes from CONN into a supposedly empty STRINGBUF.
- * POOL will be used for temporary allocations. */
-static svn_error_t *
-read_long_string(svn_ra_svn_conn_t *conn, apr_pool_t *pool,
-                 svn_stringbuf_t *stringbuf, apr_uint64_t len)
-{
-  char readbuf[4096];
-  apr_size_t readbuf_len;
-
-  /* We can't store strings longer than the maximum size of apr_size_t,
-   * so check for wrapping */
-  if (((apr_size_t) len) < len)
-    return svn_error_create(SVN_ERR_RA_SVN_MALFORMED_DATA, NULL,
-                            _("String length larger than maximum"));
-
-  while (len)
-    {
-      readbuf_len = len > sizeof(readbuf) ? sizeof(readbuf) : (apr_size_t)len;
-
-      SVN_ERR(readbuf_read(conn, pool, readbuf, readbuf_len));
-      /* Read into a stringbuf_t to so we don't allow the sender to allocate
-       * an arbitrary amount of memory without actually sending us that much
-       * data */
-      svn_stringbuf_appendbytes(stringbuf, readbuf, readbuf_len);
-      len -= readbuf_len;
-    }
-
-  return SVN_NO_ERROR;
-}
-
 /* Read LEN bytes from CONN into already-allocated structure ITEM.
  * Afterwards, *ITEM is of type 'SVN_RA_SVN_STRING', and its string
  * data is allocated in POOL. */
@@ -601,42 +571,34 @@ static svn_error_t *read_string(svn_ra_s
 {
   svn_stringbuf_t *stringbuf;
 
-  /* We should not use large strings in our protocol. However, we may
-   * receive a claim that a very long string is going to follow. In that
-   * case, we start small and wait for all that data to actually show up.
-   * This does not fully prevent DOS attacs but makes them harder (you
-   * have to actually send gigabytes of data).
-   */
-  if (len > SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD)
-    {
-      /* This string might take a large amount of memory. Don't allocate
-       * the whole buffer at once, so to prevent OOM issues by corrupted
-       * network data.
-       */
-      stringbuf = svn_stringbuf_create("", pool);
-      SVN_ERR(read_long_string(conn, pool, stringbuf, len));
-    }
-  else
+  /* Read the string in chunks.  The chunk size is large enough to avoid
+   * re-allocation in typical cases, and small enough to ensure we do not
+   * pre-allocate an unreasonable amount of memory if (perhaps due to
+   * network data corruption or a DOS attack), we receive a bogus claim that
+   * a very long string is going to follow.  In that case, we start small
+   * and wait for all that data to actually show up.  This does not fully
+   * prevent DOS attacks but makes them harder (you have to actually send
+   * gigabytes of data). */
+  stringbuf = svn_stringbuf_create_ensure(
+                (apr_size_t)(len < SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD
+                             ? len : SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD),
+                pool);
+
+  /* Read the string data directly into the string structure.
+   * Do it iteratively, if necessary.  */
+  while (len)
     {
-      /* This is a reasonably sized string. So, provide a buffer large
-       * enough to prevent re-allocation as long as the data transmission
-       * is not flawed.
-       */
-      stringbuf = svn_stringbuf_create_ensure((apr_size_t)len, pool);
-
-      /* Read the string data directly into the string structure.
-       * Do it iteratively, if necessary.
-       */
-      while (len)
-        {
-          apr_size_t readbuf_len = (apr_size_t)len;
-          char *dest = stringbuf->data + stringbuf->len;
-          SVN_ERR(readbuf_read(conn, pool, dest, readbuf_len));
-
-          stringbuf->len += readbuf_len;
-          stringbuf->data[stringbuf->len] = '\0';
-          len -= readbuf_len;
-        }
+      apr_size_t readbuf_len
+        = (apr_size_t)(len < SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD
+                       ? len : SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD);
+      char *dest = stringbuf->data + stringbuf->len;
+
+      svn_stringbuf_ensure(stringbuf, stringbuf->len + readbuf_len);
+      SVN_ERR(readbuf_read(conn, pool, dest, readbuf_len));
+
+      stringbuf->len += readbuf_len;
+      stringbuf->data[stringbuf->len] = '\0';
+      len -= readbuf_len;
     }
 
   /* Return the string properly wrapped into an RA_SVN item.

Reply via email to