Author: stefan2
Date: Sat May 7 14:17:53 2011
New Revision: 1100546
URL: http://svn.apache.org/viewvc?rev=1100546&view=rev
Log:
Speed up "harvest" phase for very large numbers of changed items.
Previously, look_up_committable caused O(n^2) runtime with a
very small factor.
Introduces a second hash that makes this function a simple hash
lookup. For that, we need to replace the "hash of arrays" parameter
on the harvester functions with a structure containing that hash
as well as the by_path hash (to be used in look_up_committable).
* subversion/libsvn_client/client.h
(svn_client__committables_t): new structure
(svn_client__harvest_committables, svn_client__get_copy_committables):
use that new structure instead of plain apr_hash_t
* subversion/libsvn_client/commit_util.c
(add_committable): use and update that new structure instead of plain
apr_hash_t
(look_up_committable): major optimization
(harvest_committables): adapt pass-through parameter type
(create_committables): new utility function
(svn_client__harvest_committables, svn_client__get_copy_committables):
adapt implementation to signature change
(copy_committables_baton): sync with API change
(harvest_copy_committables): adapt to baton change
* subversion/libsvn_client/commit.c
(svn_client_commit5): adapt caller to API change
* subversion/libsvn_client/copy.c
(wc_to_repos_copy): dito
Modified:
subversion/trunk/subversion/libsvn_client/client.h
subversion/trunk/subversion/libsvn_client/commit.c
subversion/trunk/subversion/libsvn_client/commit_util.c
subversion/trunk/subversion/libsvn_client/copy.c
Modified: subversion/trunk/subversion/libsvn_client/client.h
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/client.h?rev=1100546&r1=1100545&r2=1100546&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/client.h (original)
+++ subversion/trunk/subversion/libsvn_client/client.h Sat May 7 14:17:53 2011
@@ -777,6 +777,28 @@ typedef struct svn_client__copy_pair_t
*/
+/* Structure that contains an apr_hash_t * hash of apr_array_header_t *
+ arrays of svn_client_commit_item3_t * structures; keyed by the
+ canonical repository URLs. For faster lookup, it also provides
+ an hash index keyed by the local absolute path. */
+typedef struct svn_client__committables_t
+{
+ /* apr_array_header_t array of svn_client_commit_item3_t structures
+ keyed by canonical repository URL */
+ apr_hash_t *by_repository;
+
+ /* svn_client_commit_item3_t structures keyed by local absolute path
+ (path member in the respective structures).
+
+ This member is for fast lookup only, i.e. whether there is an
+ entry for the given path or not, but it will only allow for one
+ entry per absolute path (in case of duplicate entries in the
+ above arrays). The "canonical" data storage containing all item
+ is by_repository. */
+ apr_hash_t *by_path;
+
+} svn_client__committables_t;
+
/* Callback for the commit harvester to check if a node exists at the specified
url */
typedef svn_error_t *(*svn_client__check_url_kind_t)(void *baton,
@@ -796,10 +818,10 @@ typedef svn_error_t *(*svn_client__check
items that are in the same repository, creating a new array if
necessary.
- - add (or update) a reference to this array to the COMMITTABLES
- hash, keyed on the canonical repository name. ### todo, until
- multi-repository support actually exists, the single key here
- will actually be some arbitrary thing to be ignored.
+ - add (or update) a reference to this array to the by_repository
+ hash within COMMITTABLES and update the by_path member as well-
+ ### todo, until multi-repository support actually exists, the
+ single key here will actually be some arbitrary thing to be ignored.
- if the candidate has a lock token, add it to the LOCK_TOKENS hash.
@@ -807,11 +829,9 @@ typedef svn_error_t *(*svn_client__check
the directories children recursively for any lock tokens and
add them to the LOCK_TOKENS array.
- At the successful return of this function, COMMITTABLES will be an
- apr_hash_t * hash of apr_array_header_t * arrays (of
- svn_client_commit_item3_t * structures), keyed on const char *
- canonical repository URLs. LOCK_TOKENS will point to a hash table
- with const char * lock tokens, keyed on const char * URLs.
+ At the successful return of this function, COMMITTABLES will point
+ a new svn_client__committables_t*. LOCK_TOKENS will point to a hash
+ table with const char * lock tokens, keyed on const char * URLs.
If DEPTH is specified, descend (or not) into each target in TARGETS
as specified by DEPTH; the behavior is the same as that described
@@ -829,7 +849,7 @@ typedef svn_error_t *(*svn_client__check
CTX->CANCEL_BATON while harvesting to determine if the client has
cancelled the operation. */
svn_error_t *
-svn_client__harvest_committables(apr_hash_t **committables,
+svn_client__harvest_committables(svn_client__committables_t **committables,
apr_hash_t **lock_tokens,
const char *base_dir_abspath,
const apr_array_header_t *targets,
@@ -844,16 +864,15 @@ svn_client__harvest_committables(apr_has
/* Recursively crawl each absolute working copy path SRC in COPY_PAIRS,
- harvesting commit_items into a COMMITABLES hash (see the docstring for
- svn_client__harvest_committables for what that really means) as if
- every entry at or below the SRC was to be committed as a set of adds
- (mostly with history) to a new repository URL (DST in COPY_PAIRS).
+ harvesting commit_items into a COMMITABLES structure as if every entry
+ at or below the SRC was to be committed as a set of adds (mostly with
+ history) to a new repository URL (DST in COPY_PAIRS).
If CTX->CANCEL_FUNC is non-null, it will be called with
CTX->CANCEL_BATON while harvesting to determine if the client has
cancelled the operation. */
svn_error_t *
-svn_client__get_copy_committables(apr_hash_t **committables,
+svn_client__get_copy_committables(svn_client__committables_t **committables,
const apr_array_header_t *copy_pairs,
svn_client__check_url_kind_t check_url_func,
void *check_url_baton,
Modified: subversion/trunk/subversion/libsvn_client/commit.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/commit.c?rev=1100546&r1=1100545&r2=1100546&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/commit.c (original)
+++ subversion/trunk/subversion/libsvn_client/commit.c Sat May 7 14:17:53 2011
@@ -1186,7 +1186,7 @@ svn_client_commit5(const apr_array_heade
apr_array_header_t *rel_targets;
apr_array_header_t *lock_targets;
apr_array_header_t *locks_obtained;
- apr_hash_t *committables;
+ svn_client__committables_t *committables;
apr_hash_t *lock_tokens;
apr_hash_t *sha1_checksums;
apr_array_header_t *commit_items;
@@ -1307,11 +1307,11 @@ svn_client_commit5(const apr_array_heade
if (cmt_err)
goto cleanup;
- if (apr_hash_count(committables) == 0)
+ if (apr_hash_count(committables->by_repository) == 0)
{
goto cleanup; /* Nothing to do */
}
- else if (apr_hash_count(committables) > 1)
+ else if (apr_hash_count(committables->by_repository) > 1)
{
cmt_err = svn_error_create(
SVN_ERR_UNSUPPORTED_FEATURE, NULL,
@@ -1321,7 +1321,8 @@ svn_client_commit5(const apr_array_heade
}
{
- apr_hash_index_t *hi = apr_hash_first(iterpool, committables);
+ apr_hash_index_t *hi = apr_hash_first(iterpool,
+ committables->by_repository);
commit_items = svn__apr_hash_index_val(hi);
}
Modified: subversion/trunk/subversion/libsvn_client/commit_util.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/commit_util.c?rev=1100546&r1=1100545&r2=1100546&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/commit_util.c (original)
+++ subversion/trunk/subversion/libsvn_client/commit_util.c Sat May 7 14:17:53
2011
@@ -75,7 +75,7 @@ fixup_out_of_date_error(const char *path
`COMMITTABLES') to the COMMITTABLES hash. All of the commit item's
members are allocated out of RESULT_POOL. */
static svn_error_t *
-add_committable(apr_hash_t *committables,
+add_committable(svn_client__committables_t *committables,
const char *local_abspath,
svn_node_kind_t kind,
const char *repos_root_url,
@@ -97,14 +97,17 @@ add_committable(apr_hash_t *committables
/* ### todo: Get the canonical repository for this item, which will
be the real key for the COMMITTABLES hash, instead of the above
bogosity. */
- array = apr_hash_get(committables, repos_root_url, APR_HASH_KEY_STRING);
+ array = apr_hash_get(committables->by_repository,
+ repos_root_url,
+ APR_HASH_KEY_STRING);
/* E-gads! There is no array for this repository yet! Oh, no
problem, we'll just create (and add to the hash) one. */
if (array == NULL)
{
array = apr_array_make(result_pool, 1, sizeof(new_item));
- apr_hash_set(committables, apr_pstrdup(result_pool, repos_root_url),
+ apr_hash_set(committables->by_repository,
+ apr_pstrdup(result_pool, repos_root_url),
APR_HASH_KEY_STRING, array);
}
@@ -130,6 +133,12 @@ add_committable(apr_hash_t *committables
/* Now, add the commit item to the array. */
APR_ARRAY_PUSH(array, svn_client_commit_item3_t *) = new_item;
+ /* ... and to the hash. */
+ apr_hash_set(committables->by_path,
+ new_item->path,
+ APR_HASH_KEY_STRING,
+ new_item);
+
return SVN_NO_ERROR;
}
@@ -165,29 +174,12 @@ check_prop_mods(svn_boolean_t *props_cha
/* If there is a commit item for PATH in COMMITTABLES, return it, else
return NULL. Use POOL for temporary allocation only. */
static svn_client_commit_item3_t *
-look_up_committable(apr_hash_t *committables,
+look_up_committable(svn_client__committables_t *committables,
const char *path,
apr_pool_t *pool)
{
- apr_hash_index_t *hi;
-
- for (hi = apr_hash_first(pool, committables); hi; hi = apr_hash_next(hi))
- {
- apr_array_header_t *these_committables = svn__apr_hash_index_val(hi);
- int i;
-
- for (i = 0; i < these_committables->nelts; i++)
- {
- svn_client_commit_item3_t *this_committable
- = APR_ARRAY_IDX(these_committables, i,
- svn_client_commit_item3_t *);
-
- if (strcmp(this_committable->path, path) == 0)
- return this_committable;
- }
- }
-
- return NULL;
+ return (svn_client_commit_item3_t *)
+ apr_hash_get(committables->by_path, path, APR_HASH_KEY_STRING);
}
/* Helper for harvest_committables().
@@ -324,7 +316,7 @@ bail_on_tree_conflicted_ancestor(svn_wc_
static svn_error_t *
harvest_committables(svn_wc_context_t *wc_ctx,
const char *local_abspath,
- apr_hash_t *committables,
+ svn_client__committables_t *committables,
apr_hash_t *lock_tokens,
const char *repos_root_url,
const char *commit_relpath,
@@ -907,9 +899,20 @@ validate_dangler(void *baton,
return SVN_NO_ERROR;
}
+/* Allocate and initialize the COMMITTABLES structure from POOL.
+ */
+void
+create_committables(svn_client__committables_t **committables,
+ apr_pool_t *pool)
+{
+ *committables = apr_palloc(pool, sizeof(**committables));
+
+ (*committables)->by_repository = apr_hash_make(pool);
+ (*committables)->by_path = apr_hash_make(pool);
+}
svn_error_t *
-svn_client__harvest_committables(apr_hash_t **committables,
+svn_client__harvest_committables(svn_client__committables_t **committables,
apr_hash_t **lock_tokens,
const char *base_dir_abspath,
const apr_array_header_t *targets,
@@ -954,8 +957,8 @@ svn_client__harvest_committables(apr_has
SVN_ERR_ASSERT(svn_dirent_is_absolute(base_dir_abspath));
- /* Create the COMMITTABLES hash. */
- *committables = apr_hash_make(result_pool);
+ /* Create the COMMITTABLES structure. */
+ create_committables(committables, result_pool);
/* And the LOCK_TOKENS dito. */
*lock_tokens = apr_hash_make(result_pool);
@@ -1071,7 +1074,7 @@ svn_client__harvest_committables(apr_has
hdb.check_url_func = check_url_func;
hdb.check_url_baton = check_url_baton;
- SVN_ERR(svn_iter_apr_hash(NULL, *committables,
+ SVN_ERR(svn_iter_apr_hash(NULL, (*committables)->by_repository,
handle_descendants, &hdb, iterpool));
/* Make sure that every path in danglers is part of the commit. */
@@ -1087,7 +1090,7 @@ svn_client__harvest_committables(apr_has
struct copy_committables_baton
{
svn_client_ctx_t *ctx;
- apr_hash_t *committables;
+ svn_client__committables_t *committables;
apr_pool_t *result_pool;
svn_client__check_url_kind_t check_url_func;
void *check_url_baton;
@@ -1137,7 +1140,7 @@ harvest_copy_committables(void *baton, v
hdb.check_url_func = btn->check_url_func;
hdb.check_url_baton = btn->check_url_baton;
- SVN_ERR(svn_iter_apr_hash(NULL, btn->committables,
+ SVN_ERR(svn_iter_apr_hash(NULL, btn->committables->by_repository,
handle_descendants, &hdb, pool));
return SVN_NO_ERROR;
@@ -1146,7 +1149,7 @@ harvest_copy_committables(void *baton, v
svn_error_t *
-svn_client__get_copy_committables(apr_hash_t **committables,
+svn_client__get_copy_committables(svn_client__committables_t **committables,
const apr_array_header_t *copy_pairs,
svn_client__check_url_kind_t check_url_func,
void *check_url_baton,
@@ -1156,7 +1159,8 @@ svn_client__get_copy_committables(apr_ha
{
struct copy_committables_baton btn;
- *committables = apr_hash_make(result_pool);
+ /* Create the COMMITTABLES structure. */
+ create_committables(committables, result_pool);
btn.ctx = ctx;
btn.committables = *committables;
Modified: subversion/trunk/subversion/libsvn_client/copy.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/copy.c?rev=1100546&r1=1100545&r2=1100546&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/copy.c (original)
+++ subversion/trunk/subversion/libsvn_client/copy.c Sat May 7 14:17:53 2011
@@ -1190,7 +1190,7 @@ wc_to_repos_copy(const apr_array_header_
svn_ra_session_t *ra_session;
const svn_delta_editor_t *editor;
void *edit_baton;
- apr_hash_t *committables;
+ svn_client__committables_t *committables;
apr_array_header_t *commit_items;
apr_pool_t *iterpool;
apr_array_header_t *new_dirs = NULL;
@@ -1335,7 +1335,8 @@ wc_to_repos_copy(const apr_array_header_
ctx, pool, pool));
/* The committables are keyed by the repository root */
- commit_items = apr_hash_get(committables, cukb.repos_root_url,
+ commit_items = apr_hash_get(committables->by_repository,
+ cukb.repos_root_url,
APR_HASH_KEY_STRING);
SVN_ERR_ASSERT(commit_items != NULL);