> -----Original Message----- > From: s...@apache.org [mailto:s...@apache.org] > Sent: vrijdag 20 juni 2014 17:24 > To: comm...@subversion.apache.org > Subject: svn commit: r1604188 - in > /subversion/trunk/subversion/libsvn_fs_fs: low_level.c low_level.h > transaction.c > > Author: stsp > Date: Fri Jun 20 15:24:02 2014 > New Revision: 1604188 > > URL: http://svn.apache.org/r1604188 > Log: > Improve memory usage while fetching changed paths data from a > transaction > by eliminating a full in-memory copy of changed path data. > > * subversion/libsvn_fs_fs/low_level.c > (svn_fs_fs__read_changes_incrementally): New FSFS-internal API which > invokes a callback for each changed path in a transaction. > > * subversion/libsvn_fs_fs/low_level.h > (svn_fs_fs__read_changes_incrementally): Declare. > > * subversion/libsvn_fs_fs/transaction.c > (svn_fs_fs__txn_changes_fetch): Use > svn_fs_fs__read_changes_incrementally(), > instead of reading a changes_t list into memory and looping over that list. > > Modified: > subversion/trunk/subversion/libsvn_fs_fs/low_level.c > subversion/trunk/subversion/libsvn_fs_fs/low_level.h > subversion/trunk/subversion/libsvn_fs_fs/transaction.c > > Modified: subversion/trunk/subversion/libsvn_fs_fs/low_level.c > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/lo > w_level.c?rev=1604188&r1=1604187&r2=1604188&view=diff > ========================================================== > ==================== > --- subversion/trunk/subversion/libsvn_fs_fs/low_level.c (original) > +++ subversion/trunk/subversion/libsvn_fs_fs/low_level.c Fri Jun 20 > 15:24:02 2014 > @@ -365,6 +365,31 @@ svn_fs_fs__read_changes(apr_array_header > return SVN_NO_ERROR; > } > > +svn_error_t * > +svn_fs_fs__read_changes_incrementally(svn_stream_t *stream, > + svn_fs_fs__change_receiver_t > + change_receiver, > + void *change_receiver_baton, > + apr_pool_t *pool) > +{ > + change_t *change; > + apr_pool_t *iterpool; > + > + iterpool = svn_pool_create(pool); > + do > + { > + svn_pool_clear(iterpool); > + > + SVN_ERR(read_change(&change, stream, iterpool)); > + if (change) > + SVN_ERR(change_receiver(change_receiver_baton, change, iterpool)); > + } > + while (change); > + svn_pool_destroy(iterpool); > + > + return SVN_NO_ERROR; > +} > + > /* Write a single change entry, path PATH, change CHANGE, and copyfrom > string COPYFROM, into the file specified by FILE. Only include the > node kind field if INCLUDE_NODE_KIND is true. Only include the > > Modified: subversion/trunk/subversion/libsvn_fs_fs/low_level.h > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/lo > w_level.h?rev=1604188&r1=1604187&r2=1604188&view=diff > ========================================================== > ==================== > --- subversion/trunk/subversion/libsvn_fs_fs/low_level.h (original) > +++ subversion/trunk/subversion/libsvn_fs_fs/low_level.h Fri Jun 20 > 15:24:02 2014 > @@ -68,6 +68,20 @@ svn_fs_fs__read_changes(apr_array_header > svn_stream_t *stream, > apr_pool_t *pool); > > +typedef svn_error_t *(*svn_fs_fs__change_receiver_t)( > + void *baton, > + change_t *change, > + apr_pool_t *scratch_pool); > + > +/* Read all the changes from STREAM and invoke CHANGE_RECEIVER on > each change. > + Do all allocations in POOL. */ > +svn_error_t * > +svn_fs_fs__read_changes_incrementally(svn_stream_t *stream, > + svn_fs_fs__change_receiver_t > + change_receiver, > + void *change_receiver_baton, > + apr_pool_t *scratch_pool); > + > /* Write the changed path info from CHANGES in filesystem FS to the > output stream STREAM. You may call this function multiple time on > the same stream but the last call should set TERMINATE_LIST to write > > Modified: subversion/trunk/subversion/libsvn_fs_fs/transaction.c > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/tra > nsaction.c?rev=1604188&r1=1604187&r2=1604188&view=diff > ========================================================== > ==================== > --- subversion/trunk/subversion/libsvn_fs_fs/transaction.c (original) > +++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Fri Jun 20 > 15:24:02 2014 > @@ -777,84 +777,72 @@ fold_change(apr_hash_t *changed_paths, > return SVN_NO_ERROR; > } > > -/* Examine all the changed path entries in CHANGES and store them in > +/* An implementation of svn_fs_fs__change_receiver_t. > + Examine all the changed path entries in CHANGES and store them in > *CHANGED_PATHS. Folding is done to remove redundant or unnecessary > data. Do all allocations in POOL. */ > static svn_error_t * > -process_changes(apr_hash_t *changed_paths, > - apr_array_header_t *changes, > - apr_pool_t *pool) > -{ > - apr_pool_t *iterpool = svn_pool_create(pool); > - int i; > - > - /* Read in the changes one by one, folding them into our local hash > - as necessary. */ > - > - for (i = 0; i < changes->nelts; ++i) > - { > - /* The ITERPOOL will be cleared at the end of this function > - * since it is only used rarely and for a single hash iterator. > - */ > - change_t *change = APR_ARRAY_IDX(changes, i, change_t *); > - > - SVN_ERR(fold_change(changed_paths, change)); > - > - /* Now, if our change was a deletion or replacement, we have to > - blow away any changes thus far on paths that are (or, were) > - children of this path. > - ### i won't bother with another iteration pool here -- at > - most we talking about a few extra dups of paths into what > - is already a temporary subpool. > +process_changes(void *baton, > + change_t *change, > + apr_pool_t *scratch_pool) > +{ > + apr_hash_t *changed_paths = baton; > + > + SVN_ERR(fold_change(changed_paths, change)); > + > + /* Now, if our change was a deletion or replacement, we have to > + blow away any changes thus far on paths that are (or, were) > + children of this path. > + ### i won't bother with another iteration pool here -- at > + most we talking about a few extra dups of paths into what > + is already a temporary subpool. > + */ > + > + if ((change->info.change_kind == svn_fs_path_change_delete) > + || (change->info.change_kind == svn_fs_path_change_replace)) > + { > + apr_hash_index_t *hi; > + apr_pool_t *iterpool; > + > + /* a potential child path must contain at least 2 more chars > + (the path separator plus at least one char for the name). > + Also, we should not assume that all paths have been normalized > + i.e. some might have trailing path separators. > */ > + apr_ssize_t path_len = change->path.len; > + apr_ssize_t min_child_len = path_len == 0 > + ? 1 > + : change->path.data[path_len-1] == '/' > + ? path_len + 1 > + : path_len + 2; > + > + /* CAUTION: This is the inner loop of an O(n^2) algorithm. > + The number of changes to process may be >> 1000. > + Therefore, keep the inner loop as tight as possible. > + */ > + iterpool = svn_pool_create(scratch_pool); > + for (hi = apr_hash_first(scratch_pool, changed_paths); > + hi; > + hi = apr_hash_next(hi)) > + { > + /* KEY is the path. */ > + const void *path; > + apr_ssize_t klen; > + apr_hash_this(hi, &path, &klen, NULL); > > - if ((change->info.change_kind == svn_fs_path_change_delete) > - || (change->info.change_kind == svn_fs_path_change_replace)) > - { > - apr_hash_index_t *hi; > - > - /* a potential child path must contain at least 2 more chars > - (the path separator plus at least one char for the name). > - Also, we should not assume that all paths have been normalized > - i.e. some might have trailing path separators. > - */ > - apr_ssize_t path_len = change->path.len; > - apr_ssize_t min_child_len = path_len == 0 > - ? 1 > - : change->path.data[path_len-1] == '/' > - ? path_len + 1 > - : path_len + 2; > - > - /* CAUTION: This is the inner loop of an O(n^2) algorithm. > - The number of changes to process may be >> 1000. > - Therefore, keep the inner loop as tight as possible. > - */ > - for (hi = apr_hash_first(iterpool, changed_paths); > - hi; > - hi = apr_hash_next(hi)) > - { > - /* KEY is the path. */ > - const void *path; > - apr_ssize_t klen; > - apr_hash_this(hi, &path, &klen, NULL); > - > - /* If we come across a child of our path, remove it. > - Call svn_dirent_is_child only if there is a chance that > - this is actually a sub-path. > - */ > - if ( klen >= min_child_len > - && svn_dirent_is_child(change->path.data, path, iterpool)) > - apr_hash_set(changed_paths, path, klen, NULL); > - } > - > - /* Clear the per-iteration subpool. */ > svn_pool_clear(iterpool); > + > + /* If we come across a child of our path, remove it. > + Call svn_dirent_is_child only if there is a chance that > + this is actually a sub-path. > + */ > + if ( klen >= min_child_len > + && svn_dirent_is_child(change->path.data, path, iterpool)) > + apr_hash_set(changed_paths, path, klen, NULL);
This doesn't look like an on-disk path, so it should never use the dirent apis. It should use either a relpath or an fspath api. And you can just pass a NULL pool to the is_child functions (except for the URI variant), so that might even completely avoid using an iterpool. > } > + svn_pool_destroy(iterpool); > } > > - /* Destroy the per-iteration subpool. */ > - svn_pool_destroy(iterpool); > - > return SVN_NO_ERROR; > } > > @@ -866,7 +854,6 @@ svn_fs_fs__txn_changes_fetch(apr_hash_t > { > apr_file_t *file; > apr_hash_t *changed_paths = apr_hash_make(pool); > - apr_array_header_t *changes; > apr_pool_t *scratch_pool = svn_pool_create(pool); > > SVN_ERR(svn_io_file_open(&file, > @@ -874,11 +861,11 @@ svn_fs_fs__txn_changes_fetch(apr_hash_t > APR_READ | APR_BUFFERED, APR_OS_DEFAULT, > scratch_pool)); > > - SVN_ERR(svn_fs_fs__read_changes(&changes, > + SVN_ERR(svn_fs_fs__read_changes_incrementally( > svn_stream_from_aprfile2(file, TRUE, > scratch_pool), > + process_changes, changed_paths, > scratch_pool)); > - SVN_ERR(process_changes(changed_paths, changes, pool)); > svn_pool_destroy(scratch_pool); If the incrementally / streamy function uses a proper iterpool, the subpool called scratch_pool might not be necessary in this block. (Might still be necessary for usages outside the diff hunks) Bert > > *changed_paths_p = changed_paths; >