What are you trying to optimize here?

When I perform a commit like this (the 16k file test in client tests) I
see 20 MINUTES spend in the fsfs handling of the commit and then 20
seconds in this part of the code (including all libsvn_wc work for the
commit).

So maybe this helps 0.001%, while there is much more to gain somewhere
else?

Bert Huijben (Cell phone) From: [email protected]
Sent: zaterdag 7 mei 2011 16:18
To: [email protected]
Subject: svn commit: r1100546 -
in /subversion/trunk/subversion/libsvn_client: client.h commit.c
commit_util.c copy.c
Author: stefan2
Date: Sat May  7 14:17:53 2011
New Revision: 1100546

URL: http://svn.apache.org/viewvc?rev=1100546&view=rev
Log:
Speed up "harvest" phase for very large numbers of changed items.
Previously, look_up_committable caused O(n^2) runtime with a
very small factor.

Introduces a second hash that makes this function a simple hash
lookup. For that, we need to replace the "hash of arrays" parameter
on the harvester functions with a structure containing that hash
as well as the by_path hash (to be used in look_up_committable).

* subversion/libsvn_client/client.h
  (svn_client__committables_t): new structure
  (svn_client__harvest_committables, svn_client__get_copy_committables):
   use that new structure instead of plain apr_hash_t
* subversion/libsvn_client/commit_util.c
  (add_committable): use and update that new structure instead of
plain apr_hash_t
  (look_up_committable): major optimization
  (harvest_committables): adapt pass-through parameter type
  (create_committables): new utility function
  (svn_client__harvest_committables, svn_client__get_copy_committables):
   adapt implementation to signature change
  (copy_committables_baton): sync with API change
  (harvest_copy_committables): adapt to baton change

* subversion/libsvn_client/commit.c
  (svn_client_commit5): adapt caller to API change
* subversion/libsvn_client/copy.c
  (wc_to_repos_copy): dito

Modified:
    subversion/trunk/subversion/libsvn_client/client.h
    subversion/trunk/subversion/libsvn_client/commit.c
    subversion/trunk/subversion/libsvn_client/commit_util.c
    subversion/trunk/subversion/libsvn_client/copy.c

Modified: subversion/trunk/subversion/libsvn_client/client.h
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/client.h?rev=1100546&r1=1100545&r2=1100546&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/client.h (original)
+++ subversion/trunk/subversion/libsvn_client/client.h Sat May  7 14:17:53 2011
@@ -777,6 +777,28 @@ typedef struct svn_client__copy_pair_t

 */

+/* Structure that contains an apr_hash_t * hash of apr_array_header_t *
+   arrays of svn_client_commit_item3_t * structures; keyed by the
+   canonical repository URLs. For faster lookup, it also provides
+   an hash index keyed by the local absolute path. */
+typedef struct svn_client__committables_t
+{
+  /* apr_array_header_t array of svn_client_commit_item3_t structures
+     keyed by canonical repository URL */
+  apr_hash_t *by_repository;
+
+  /* svn_client_commit_item3_t structures keyed by local absolute path
+     (path member in the respective structures).
+
+     This member is for fast lookup only, i.e. whether there is an
+     entry for the given path or not, but it will only allow for one
+     entry per absolute path (in case of duplicate entries in the
+     above arrays). The "canonical" data storage containing all item
+     is by_repository. */
+  apr_hash_t *by_path;
+
+} svn_client__committables_t;
+
 /* Callback for the commit harvester to check if a node exists at the specified
    url */
 typedef svn_error_t *(*svn_client__check_url_kind_t)(void *baton,
@@ -796,10 +818,10 @@ typedef svn_error_t *(*svn_client__check
        items that are in the same repository, creating a new array if
        necessary.

-     - add (or update) a reference to this array to the COMMITTABLES
-       hash, keyed on the canonical repository name.  ### todo, until
-       multi-repository support actually exists, the single key here
-       will actually be some arbitrary thing to be ignored.
+     - add (or update) a reference to this array to the by_repository
+       hash within COMMITTABLES and update the by_path member as well-
+       ### todo, until multi-repository support actually exists, the
+       single key here will actually be some arbitrary thing to be ignored.

      - if the candidate has a lock token, add it to the LOCK_TOKENS hash.

@@ -807,11 +829,9 @@ typedef svn_error_t *(*svn_client__check
        the directories children recursively for any lock tokens and
        add them to the LOCK_TOKENS array.

-   At the successful return of this function, COMMITTABLES will be an
-   apr_hash_t * hash of apr_array_header_t * arrays (of
-   svn_client_commit_item3_t * structures), keyed on const char *
-   canonical repository URLs.  LOCK_TOKENS will point to a hash table
-   with const char * lock tokens, keyed on const char * URLs.
+   At the successful return of this function, COMMITTABLES will point
+   a new svn_client__committables_t*.  LOCK_TOKENS will point to a hash
+   table with const char * lock tokens, keyed on const char * URLs.

    If DEPTH is specified, descend (or not) into each target in TARGETS
    as specified by DEPTH; the behavior is the same as that described
@@ -829,7 +849,7 @@ typedef svn_error_t *(*svn_client__check
    CTX->CANCEL_BATON while harvesting to determine if the client has
    cancelled the operation. */
 svn_error_t *
-svn_client__harvest_committables(apr_hash_t **committables,
+svn_client__harvest_committables(svn_client__committables_t **committables,
                                  apr_hash_t **lock_tokens,
                                  const char *base_dir_abspath,
                                  const apr_array_header_t *targets,
@@ -844,16 +864,15 @@ svn_client__harvest_committables(apr_has


 /* Recursively crawl each absolute working copy path SRC in COPY_PAIRS,
-   harvesting commit_items into a COMMITABLES hash (see the docstring for
-   svn_client__harvest_committables for what that really means) as if
-   every entry at or below the SRC was to be committed as a set of adds
-   (mostly with history) to a new repository URL (DST in COPY_PAIRS).
+   harvesting commit_items into a COMMITABLES structure as if every entry
+   at or below the SRC was to be committed as a set of adds (mostly with
+   history) to a new repository URL (DST in COPY_PAIRS).

    If CTX->CANCEL_FUNC is non-null, it will be called with
    CTX->CANCEL_BATON while harvesting to determine if the client has
    cancelled the operation.  */
 svn_error_t *
-svn_client__get_copy_committables(apr_hash_t **committables,
+svn_client__get_copy_committables(svn_client__committables_t **committables,
                                   const apr_array_header_t *copy_pairs,
                                   svn_client__check_url_kind_t check_url_func,
                                   void *check_url_baton,

Modified: subversion/trunk/subversion/libsvn_client/commit.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/commit.c?rev=1100546&r1=1100545&r2=1100546&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/commit.c (original)
+++ subversion/trunk/subversion/libsvn_client/commit.c Sat May  7 14:17:53 2011
@@ -1186,7 +1186,7 @@ svn_client_commit5(const apr_array_heade
   apr_array_header_t *rel_targets;
   apr_array_header_t *lock_targets;
   apr_array_header_t *locks_obtained;
-  apr_hash_t *committables;
+  svn_client__committables_t *committables;
   apr_hash_t *lock_tokens;
   apr_hash_t *sha1_checksums;
   apr_array_header_t *commit_items;
@@ -1307,11 +1307,11 @@ svn_client_commit5(const apr_array_heade
   if (cmt_err)
     goto cleanup;

-  if (apr_hash_count(committables) == 0)
+  if (apr_hash_count(committables->by_repository) == 0)
     {
       goto cleanup; /* Nothing to do */
     }
-  else if (apr_hash_count(committables) > 1)
+  else if (apr_hash_count(committables->by_repository) > 1)
     {
       cmt_err = svn_error_create(
              SVN_ERR_UNSUPPORTED_FEATURE, NULL,
@@ -1321,7 +1321,8 @@ svn_client_commit5(const apr_array_heade
     }

   {
-    apr_hash_index_t *hi = apr_hash_first(iterpool, committables);
+    apr_hash_index_t *hi = apr_hash_first(iterpool,
+                                          committables->by_repository);

     commit_items = svn__apr_hash_index_val(hi);
   }

Modified: subversion/trunk/subversion/libsvn_client/commit_util.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/commit_util.c?rev=1100546&r1=1100545&r2=1100546&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/commit_util.c (original)
+++ subversion/trunk/subversion/libsvn_client/commit_util.c Sat May  7
14:17:53 2011
@@ -75,7 +75,7 @@ fixup_out_of_date_error(const char *path
    `COMMITTABLES') to the COMMITTABLES hash.  All of the commit item's
    members are allocated out of RESULT_POOL. */
 static svn_error_t *
-add_committable(apr_hash_t *committables,
+add_committable(svn_client__committables_t *committables,
                 const char *local_abspath,
                 svn_node_kind_t kind,
                 const char *repos_root_url,
@@ -97,14 +97,17 @@ add_committable(apr_hash_t *committables
   /* ### todo: Get the canonical repository for this item, which will
      be the real key for the COMMITTABLES hash, instead of the above
      bogosity. */
-  array = apr_hash_get(committables, repos_root_url, APR_HASH_KEY_STRING);
+  array = apr_hash_get(committables->by_repository,
+                       repos_root_url,
+                       APR_HASH_KEY_STRING);

   /* E-gads!  There is no array for this repository yet!  Oh, no
      problem, we'll just create (and add to the hash) one. */
   if (array == NULL)
     {
       array = apr_array_make(result_pool, 1, sizeof(new_item));
-      apr_hash_set(committables, apr_pstrdup(result_pool, repos_root_url),
+      apr_hash_set(committables->by_repository,
+                   apr_pstrdup(result_pool, repos_root_url),
                    APR_HASH_KEY_STRING, array);
     }

@@ -130,6 +133,12 @@ add_committable(apr_hash_t *committables
   /* Now, add the commit item to the array. */
   APR_ARRAY_PUSH(array, svn_client_commit_item3_t *) = new_item;

+  /* ... and to the hash. */
+  apr_hash_set(committables->by_path,
+               new_item->path,
+               APR_HASH_KEY_STRING,
+               new_item);
+
   return SVN_NO_ERROR;
 }

@@ -165,29 +174,12 @@ check_prop_mods(svn_boolean_t *props_cha
 /* If there is a commit item for PATH in COMMITTABLES, return it, else
    return NULL.  Use POOL for temporary allocation only. */
 static svn_client_commit_item3_t *
-look_up_committable(apr_hash_t *committables,
+look_up_committable(svn_client__committables_t *committables,
                     const char *path,
                     apr_pool_t *pool)
 {
-  apr_hash_index_t *hi;
-
-  for (hi = apr_hash_first(pool, committables); hi; hi = apr_hash_next(hi))
-    {
-      apr_array_header_t *these_committables = svn__apr_hash_index_val(hi);
-      int i;
-
-      for (i = 0; i < these_committables->nelts; i++)
-        {
-          svn_client_commit_item3_t *this_committable
-            = APR_ARRAY_IDX(these_committables, i,
-                            svn_client_commit_item3_t *);
-
-          if (strcmp(this_committable->path, path) == 0)
-            return this_committable;
-        }
-    }
-
-  return NULL;
+  return (svn_client_commit_item3_t *)
+      apr_hash_get(committables->by_path, path, APR_HASH_KEY_STRING);
 }

 /* Helper for harvest_committables().
@@ -324,7 +316,7 @@ bail_on_tree_conflicted_ancestor(svn_wc_
 static svn_error_t *
 harvest_committables(svn_wc_context_t *wc_ctx,
                      const char *local_abspath,
-                     apr_hash_t *committables,
+                     svn_client__committables_t *committables,
                      apr_hash_t *lock_tokens,
                      const char *repos_root_url,
                      const char *commit_relpath,
@@ -907,9 +899,20 @@ validate_dangler(void *baton,
   return SVN_NO_ERROR;
 }

+/* Allocate and initialize the COMMITTABLES structure from POOL.
+ */
+void
+create_committables(svn_client__committables_t **committables,
+                    apr_pool_t *pool)
+{
+  *committables = apr_palloc(pool, sizeof(**committables));
+
+  (*committables)->by_repository = apr_hash_make(pool);
+  (*committables)->by_path = apr_hash_make(pool);
+}

 svn_error_t *
-svn_client__harvest_committables(apr_hash_t **committables,
+svn_client__harvest_committables(svn_client__committables_t **committables,
                                  apr_hash_t **lock_tokens,
                                  const char *base_dir_abspath,
                                  const apr_array_header_t *targets,
@@ -954,8 +957,8 @@ svn_client__harvest_committables(apr_has

   SVN_ERR_ASSERT(svn_dirent_is_absolute(base_dir_abspath));

-  /* Create the COMMITTABLES hash. */
-  *committables = apr_hash_make(result_pool);
+  /* Create the COMMITTABLES structure. */
+  create_committables(committables, result_pool);

   /* And the LOCK_TOKENS dito. */
   *lock_tokens = apr_hash_make(result_pool);
@@ -1071,7 +1074,7 @@ svn_client__harvest_committables(apr_has
   hdb.check_url_func = check_url_func;
   hdb.check_url_baton = check_url_baton;

-  SVN_ERR(svn_iter_apr_hash(NULL, *committables,
+  SVN_ERR(svn_iter_apr_hash(NULL, (*committables)->by_repository,
                             handle_descendants, &hdb, iterpool));

   /* Make sure that every path in danglers is part of the commit. */
@@ -1087,7 +1090,7 @@ svn_client__harvest_committables(apr_has
 struct copy_committables_baton
 {
   svn_client_ctx_t *ctx;
-  apr_hash_t *committables;
+  svn_client__committables_t *committables;
   apr_pool_t *result_pool;
   svn_client__check_url_kind_t check_url_func;
   void *check_url_baton;
@@ -1137,7 +1140,7 @@ harvest_copy_committables(void *baton, v
   hdb.check_url_func = btn->check_url_func;
   hdb.check_url_baton = btn->check_url_baton;

-  SVN_ERR(svn_iter_apr_hash(NULL, btn->committables,
+  SVN_ERR(svn_iter_apr_hash(NULL, btn->committables->by_repository,
                             handle_descendants, &hdb, pool));

   return SVN_NO_ERROR;
@@ -1146,7 +1149,7 @@ harvest_copy_committables(void *baton, v


 svn_error_t *
-svn_client__get_copy_committables(apr_hash_t **committables,
+svn_client__get_copy_committables(svn_client__committables_t **committables,
                                   const apr_array_header_t *copy_pairs,
                                   svn_client__check_url_kind_t check_url_func,
                                   void *check_url_baton,
@@ -1156,7 +1159,8 @@ svn_client__get_copy_committables(apr_ha
 {
   struct copy_committables_baton btn;

-  *committables = apr_hash_make(result_pool);
+  /* Create the COMMITTABLES structure. */
+  create_committables(committables, result_pool);

   btn.ctx = ctx;
   btn.committables = *committables;

Modified: subversion/trunk/subversion/libsvn_client/copy.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/copy.c?rev=1100546&r1=1100545&r2=1100546&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/copy.c (original)
+++ subversion/trunk/subversion/libsvn_client/copy.c Sat May  7 14:17:53 2011
@@ -1190,7 +1190,7 @@ wc_to_repos_copy(const apr_array_header_
   svn_ra_session_t *ra_session;
   const svn_delta_editor_t *editor;
   void *edit_baton;
-  apr_hash_t *committables;
+  svn_client__committables_t *committables;
   apr_array_header_t *commit_items;
   apr_pool_t *iterpool;
   apr_array_header_t *new_dirs = NULL;
@@ -1335,7 +1335,8 @@ wc_to_repos_copy(const apr_array_header_
                                             ctx, pool, pool));

   /* The committables are keyed by the repository root */
-  commit_items = apr_hash_get(committables, cukb.repos_root_url,
+  commit_items = apr_hash_get(committables->by_repository,
+                              cukb.repos_root_url,
                               APR_HASH_KEY_STRING);
   SVN_ERR_ASSERT(commit_items != NULL);

Reply via email to