Modified: subversion/branches/javahl-ra/tools/server-side/fsfs-stats.c URL: http://svn.apache.org/viewvc/subversion/branches/javahl-ra/tools/server-side/fsfs-stats.c?rev=1426654&r1=1426653&r2=1426654&view=diff ============================================================================== --- subversion/branches/javahl-ra/tools/server-side/fsfs-stats.c (original) +++ subversion/branches/javahl-ra/tools/server-side/fsfs-stats.c Fri Dec 28 23:03:51 2012 @@ -1,4 +1,4 @@ -/* diff.c -- test driver for text diffs +/* fsfs-stats.c -- gather size statistics on FSFS repositories * * ==================================================================== * Licensed to the Apache Software Foundation (ASF) under one @@ -22,7 +22,6 @@ #include <assert.h> -#include <sys/stat.h> #include <apr.h> #include <apr_general.h> @@ -37,29 +36,40 @@ #include "svn_sorts.h" #include "svn_delta.h" #include "svn_hash.h" +#include "svn_cache_config.h" #include "private/svn_string_private.h" #include "private/svn_subr_private.h" #include "private/svn_dep_compat.h" +#include "private/svn_cache.h" #ifndef _ #define _(x) x #endif -#define ERROR_TAG "diff: " +#define ERROR_TAG "fsfs-stats: " +/* We group representations into 2x2 different kinds plus one default: + * [dir / file] x [text / prop]. The assignment is done by the first node + * that references the respective representation. + */ typedef enum rep_kind_t { + /* The representation is _directly_ unused, i.e. not referenced by any + * noderev. However, some other representation may use it as delta base. + * null value. Should not occur in real-word repositories. */ unused_rep, + /* a properties on directory rep */ dir_property_rep, + /* a properties on file rep */ file_property_rep, - /* a directory rep (including PLAIN / DELTA header) */ + /* a directory rep */ dir_rep, - /* a file rep (including PLAIN / DELTA header) */ + /* a file rep */ file_rep } rep_kind_t; @@ -73,6 +83,7 @@ typedef struct representation_t /* item length in bytes */ apr_size_t size; + /* item length after de-deltification */ apr_size_t expanded_size; /* deltification base, or NULL if there is none */ @@ -80,13 +91,15 @@ typedef struct representation_t /* revision that contains this representation * (may be referenced by other revisions, though) */ - svn_revnum_t revision; + + /* number of nodes that reference this representation */ apr_uint32_t ref_count; /* length of the PLAIN / DELTA line in the source file in bytes */ apr_uint16_t header_size; + /* classification of the representation. values of rep_kind_t */ char kind; /* the source content has a PLAIN header, so we may simply copy the @@ -118,9 +131,16 @@ typedef struct revision_info_t * for non-packed revs) */ apr_size_t end; + /* number of directory noderevs in this revision */ apr_size_t dir_noderev_count; + + /* number of file noderevs in this revision */ apr_size_t file_noderev_count; + + /* total size of directory noderevs (i.e. the structs - not the rep) */ apr_size_t dir_noderev_size; + + /* total size of file noderevs (i.e. the structs - not the rep) */ apr_size_t file_noderev_size; /* all representation_t of this revision (in no particular order), @@ -128,43 +148,17 @@ typedef struct revision_info_t apr_array_header_t *representations; } revision_info_t; -/* A cached, undeltified txdelta window. +/* Data type to identify a representation. It will be used to address + * cached combined (un-deltified) windows. */ -typedef struct window_cache_entry_t +typedef struct window_cache_key_t { - /* revision containing the window */ + /* revision of the representation */ svn_revnum_t revision; - /* offset of the deltified window within that revision */ + /* its offset */ apr_size_t offset; - - /* window content */ - svn_stringbuf_t *window; -} window_cache_entry_t; - -/* Cache for undeltified txdelta windows. (revision, offset) will be mapped - * directly into the ENTRIES array of INSERT_COUNT buckets (most entries - * will be NULL). - * - * The cache will be cleared when USED exceeds CAPACITY. - */ -typedef struct window_cache_t -{ - /* fixed-size array of ENTRY_COUNT elements */ - window_cache_entry_t *entries; - - /* used to allocate windows */ - apr_pool_t *pool; - - /* size of ENTRIES in elements */ - apr_size_t entry_count; - - /* maximum combined size of all cached windows */ - apr_size_t capacity; - - /* current combined size of all cached windows */ - apr_size_t used; -} window_cache_t; +} window_cache_key_t; /* Root data structure containing all information about a given repository. */ @@ -196,7 +190,7 @@ typedef struct fs_fs_t representation_t *null_base; /* undeltified txdelta window cache */ - window_cache_t *window_cache; + svn_cache__t *window_cache; } fs_fs_t; /* Return the rev pack folder for revision REV in FS. @@ -238,45 +232,55 @@ open_rev_or_pack_file(apr_file_t **file, pool); } -/* Read the whole content of the file containing REV in FS and return that - * in *CONTENT. - */ +/* Return the length of FILE in *FILE_SIZE. Use POOL for allocations. +*/ static svn_error_t * -rev_or_pack_file_size(apr_off_t *file_size, - fs_fs_t *fs, - svn_revnum_t rev, - apr_pool_t *pool) +get_file_size(apr_off_t *file_size, + apr_file_t *file, + apr_pool_t *pool) { - apr_file_t *file; apr_finfo_t finfo; - SVN_ERR(open_rev_or_pack_file(&file, fs, rev, pool)); SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, file, pool)); - SVN_ERR(svn_io_file_close(file, pool)); *file_size = finfo.size; return SVN_NO_ERROR; } -/* Get the file content of revision REVISION in FS and return it in *DATA. - * Use SCRATCH_POOL for temporary allocations. +/* Get the file content of revision REVISION in FS and return it in *CONTENT. + * Read the LEN bytes starting at file OFFSET. When provided, use FILE as + * packed or plain rev file. + * Use POOL for temporary allocations. */ static svn_error_t * get_content(svn_stringbuf_t **content, + apr_file_t *file, fs_fs_t *fs, svn_revnum_t revision, apr_off_t offset, apr_size_t len, apr_pool_t *pool) { - apr_file_t *file; apr_pool_t * file_pool = svn_pool_create(pool); + apr_size_t large_buffer_size = 0x10000; - SVN_ERR(open_rev_or_pack_file(&file, fs, revision, file_pool)); - SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool)); + if (file == NULL) + SVN_ERR(open_rev_or_pack_file(&file, fs, revision, file_pool)); *content = svn_stringbuf_create_ensure(len, pool); (*content)->len = len; + +#if APR_VERSION_AT_LEAST(1,3,0) + /* for better efficiency use larger buffers on large reads */ + if ( (len >= large_buffer_size) + && (apr_file_buffer_size_get(file) < large_buffer_size)) + apr_file_buffer_set(file, + apr_palloc(apr_file_pool_get(file), + large_buffer_size), + large_buffer_size); +#endif + + SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool)); SVN_ERR(svn_io_file_read_full2(file, (*content)->data, len, NULL, NULL, pool)); svn_pool_destroy(file_pool); @@ -284,89 +288,48 @@ get_content(svn_stringbuf_t **content, return SVN_NO_ERROR; } -/* Return a new txdelta window cache with ENTRY_COUNT buckets in its index - * and a the total CAPACITY given in bytes. - * Use POOL for all cache-related allocations. +/* In *RESULT, return the cached txdelta window stored in REPRESENTATION + * within FS. If that has not been found in cache, return NULL. + * Allocate the result in POOL. */ -static window_cache_t * -create_window_cache(apr_pool_t *pool, - apr_size_t entry_count, - apr_size_t capacity) -{ - window_cache_t *result = apr_pcalloc(pool, sizeof(*result)); - - result->pool = svn_pool_create(pool); - result->entry_count = entry_count; - result->capacity = capacity; - result->used = 0; - result->entries = apr_pcalloc(pool, sizeof(*result->entries) * entry_count); - - return result; -} - -/* Return the position within FS' window cache ENTRIES index for the given - * (REVISION, OFFSET) pair. This is a cache-internal function. - */ -static apr_size_t -get_window_cache_index(fs_fs_t *fs, - svn_revnum_t revision, - apr_size_t offset) -{ - return (revision + offset * 0xd1f3da69) % fs->window_cache->entry_count; -} - -/* Return the cached txdelta window stored in REPRESENTAION within FS. - * If that has not been found in cache, return NULL. - */ -static svn_stringbuf_t * -get_cached_window(fs_fs_t *fs, +static svn_error_t * +get_cached_window(svn_stringbuf_t **result, + fs_fs_t *fs, representation_t *representation, apr_pool_t *pool) { - svn_revnum_t revision = representation->revision; - apr_size_t offset = representation->offset; - - apr_size_t i = get_window_cache_index(fs, revision, offset); - window_cache_entry_t *entry = &fs->window_cache->entries[i]; - - return entry->offset == offset && entry->revision == revision - ? svn_stringbuf_dup(entry->window, pool) - : NULL; + svn_boolean_t found = FALSE; + window_cache_key_t key; + key.revision = representation->revision; + key.offset = representation->offset; + + *result = NULL; + return svn_error_trace(svn_cache__get((void**)result, &found, + fs->window_cache, + &key, pool)); } -/* Cache the undeltified txdelta WINDOW for REPRESENTAION within FS. +/* Cache the undeltified txdelta WINDOW for REPRESENTATION within FS. + * Use POOL for temporaries. */ -static void +static svn_error_t * set_cached_window(fs_fs_t *fs, representation_t *representation, - svn_stringbuf_t *window) + svn_stringbuf_t *window, + apr_pool_t *pool) { /* select entry */ - svn_revnum_t revision = representation->revision; - apr_size_t offset = representation->offset; + window_cache_key_t key; + key.revision = representation->revision; + key.offset = representation->offset; - apr_size_t i = get_window_cache_index(fs, revision, offset); - window_cache_entry_t *entry = &fs->window_cache->entries[i]; - - /* if the capacity is exceeded, clear the cache */ - fs->window_cache->used += window->len; - if (fs->window_cache->used >= fs->window_cache->capacity) - { - svn_pool_clear(fs->window_cache->pool); - memset(fs->window_cache->entries, - 0, - sizeof(*fs->window_cache->entries) * fs->window_cache->entry_count); - fs->window_cache->used = window->len; - } - - /* set the entry to a copy of the window data */ - entry->window = svn_stringbuf_dup(window, fs->window_cache->pool); - entry->offset = offset; - entry->revision = revision; + return svn_error_trace(svn_cache__set(fs->window_cache, &key, window, + pool)); } -/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file. - Use POOL for temporary allocations. */ +/* Given rev pack PATH in FS, read the manifest file and return the offsets + * in *MANIFEST. Use POOL for allocations. + */ static svn_error_t * read_manifest(apr_array_header_t **manifest, fs_fs_t *fs, @@ -409,6 +372,10 @@ read_manifest(apr_array_header_t **manif return svn_stream_close(manifest_stream); } +/* Read header information for the revision stored in FILE_CONTENT (one + * whole revision). Return the offsets within FILE_CONTENT for the + * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN. + * Use POOL for temporary allocations. */ static svn_error_t * read_revision_header(apr_size_t *changes, apr_size_t *changes_len, @@ -447,8 +414,10 @@ read_revision_header(apr_size_t *changes return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, _("Final line in revision file missing space")); + /* terminate the header line */ *space = 0; - + + /* extract information */ SVN_ERR(svn_cstring_strtoui64(&val, line+1, 0, APR_SIZE_MAX, 10)); *root_noderev = (apr_size_t)val; SVN_ERR(svn_cstring_strtoui64(&val, space+1, 0, APR_SIZE_MAX, 10)); @@ -458,6 +427,10 @@ read_revision_header(apr_size_t *changes return SVN_NO_ERROR; } +/* Read the FSFS format number and sharding size from the format file at + * PATH and return it in *PFORMAT and *MAX_FILES_PER_DIR respectively. + * Use POOL for temporary allocations. + */ static svn_error_t * read_format(int *pformat, int *max_files_per_dir, const char *path, apr_pool_t *pool) @@ -467,6 +440,7 @@ read_format(int *pformat, int *max_files char buf[80]; apr_size_t len; + /* open format file and read the first line */ err = svn_io_file_open(&file, path, APR_READ | APR_BUFFERED, APR_OS_DEFAULT, pool); if (err && APR_STATUS_IS_ENOENT(err->apr_err)) @@ -541,21 +515,27 @@ read_format(int *pformat, int *max_files return svn_io_file_close(file, pool); } +/* Read the content of the file at PATH and return it in *RESULT. + * Use POOL for temporary allocations. + */ static svn_error_t * read_number(svn_revnum_t *result, const char *path, apr_pool_t *pool) { svn_stringbuf_t *content; - apr_int64_t number; + apr_uint64_t number; SVN_ERR(svn_stringbuf_from_file2(&content, path, pool)); content->data[content->len-1] = 0; - SVN_ERR(svn_cstring_atoi64(&number, content->data)); + SVN_ERR(svn_cstring_strtoui64(&number, content->data, 0, LONG_MAX, 10)); *result = (svn_revnum_t)number; return SVN_NO_ERROR; } +/* Create *FS for the repository at PATH and read the format and size info. + * Use POOL for temporary allocations. + */ static svn_error_t * fs_open(fs_fs_t **fs, const char *path, apr_pool_t *pool) { @@ -570,7 +550,8 @@ fs_open(fs_fs_t **fs, const char *path, pool)); if (((*fs)->format != 4) && ((*fs)->format != 6)) return svn_error_create(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, NULL); - + + /* read size (HEAD) info */ SVN_ERR(read_number(&(*fs)->min_unpacked_rev, svn_dirent_join(path, "db/min-unpacked-rev", pool), pool)); @@ -579,12 +560,18 @@ fs_open(fs_fs_t **fs, const char *path, pool); } +/* Utility function that returns true if STRING->DATA matches KEY. + */ static svn_boolean_t key_matches(svn_string_t *string, const char *key) { return strcmp(string->data, key) == 0; } +/* Comparator used for binary search comparing the absolute file offset + * of a representation to some other offset. DATA is a *representation_t, + * KEY is a pointer to an apr_size_t. + */ static int compare_representation_offsets(const void *data, const void *key) { @@ -597,6 +584,15 @@ compare_representation_offsets(const voi return diff > 0 ? 1 : 0; } +/* Find the revision_info_t object to the given REVISION in FS and return + * it in *REVISION_INFO. For performance reasons, we skip the lookup if + * the info is already provided. + * + * In that revision, look for the representation_t object for offset OFFSET. + * If it already exists, set *IDX to its index in *REVISION_INFO's + * representations list and return the representation object. Otherwise, + * set the index to where it must be inserted and return NULL. + */ static representation_t * find_representation(int *idx, fs_fs_t *fs, @@ -606,7 +602,8 @@ find_representation(int *idx, { revision_info_t *info; *idx = -1; - + + /* first let's find the revision */ info = revision_info ? *revision_info : NULL; if (info == NULL || info->revision != revision) { @@ -617,23 +614,36 @@ find_representation(int *idx, *revision_info = info; } + /* not found -> no result */ if (info == NULL) return NULL; + + assert(revision == info->revision); + /* look for the representation */ *idx = svn_sort__bsearch_lower_bound(&offset, info->representations, compare_representation_offsets); if (*idx < info->representations->nelts) { + /* return the representation, if this is the one we were looking for */ representation_t *result = APR_ARRAY_IDX(info->representations, *idx, representation_t *); if (result->offset == offset) return result; } + /* not parsed, yet */ return NULL; } +/* Read the representation header in FILE_CONTENT at OFFSET. Return its + * size in *HEADER_SIZE, set *IS_PLAIN if no deltification was used and + * return the deltification base representation in *REPRESENTATION. If + * there is none, set it to NULL. Use FS to it look up. + * + * Use POOL for allocations and SCRATCH_POOL for temporaries. + */ static svn_error_t * read_rep_base(representation_t **representation, apr_size_t *header_size, @@ -649,10 +659,12 @@ read_rep_base(representation_t **represe svn_revnum_t revision; apr_uint64_t temp; + /* identify representation header (1 line) */ const char *buffer = file_content->data + offset; const char *line_end = strchr(buffer, '\n'); *header_size = line_end - buffer + 1; + /* check for PLAIN rep */ if (strncmp(buffer, "PLAIN\n", *header_size) == 0) { *is_plain = TRUE; @@ -660,6 +672,7 @@ read_rep_base(representation_t **represe return SVN_NO_ERROR; } + /* check for DELTA against empty rep */ *is_plain = FALSE; if (strncmp(buffer, "DELTA\n", *header_size) == 0) { @@ -671,7 +684,7 @@ read_rep_base(representation_t **represe str = apr_pstrndup(scratch_pool, buffer, line_end - buffer); last_str = str; - /* We hopefully have a DELTA vs. a non-empty base revision. */ + /* parse it. */ str = svn_cstring_tokenize(" ", &last_str); str = svn_cstring_tokenize(" ", &last_str); SVN_ERR(svn_revnum_parse(&revision, str, NULL)); @@ -679,10 +692,18 @@ read_rep_base(representation_t **represe str = svn_cstring_tokenize(" ", &last_str); SVN_ERR(svn_cstring_strtoui64(&temp, str, 0, APR_SIZE_MAX, 10)); + /* it should refer to a rep in an earlier revision. Look it up */ *representation = find_representation(&idx, fs, NULL, revision, (apr_size_t)temp); return SVN_NO_ERROR; } +/* Parse the representation reference (text: or props:) in VALUE, look + * it up in FS and return it in *REPRESENTATION. To be able to parse the + * base rep, we pass the FILE_CONTENT as well. + * + * If necessary, allocate the result in POOL; use SCRATCH_POOL for temp. + * allocations. + */ static svn_error_t * parse_representation(representation_t **representation, fs_fs_t *fs, @@ -700,15 +721,20 @@ parse_representation(representation_t ** apr_uint64_t expanded_size; int idx; + /* read location (revision, offset) and size */ char *c = (char *)value->data; SVN_ERR(svn_revnum_parse(&revision, svn_cstring_tokenize(" ", &c), NULL)); SVN_ERR(svn_cstring_strtoui64(&offset, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); SVN_ERR(svn_cstring_strtoui64(&size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); SVN_ERR(svn_cstring_strtoui64(&expanded_size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10)); + /* look it up */ result = find_representation(&idx, fs, &revision_info, revision, (apr_size_t)offset); if (!result) { + /* not parsed, yet (probably a rep in the same revision). + * Create a new rep object and determine its base rep as well. + */ apr_size_t header_size; svn_boolean_t is_plain; @@ -732,8 +758,10 @@ parse_representation(representation_t ** return SVN_NO_ERROR; } -/* Get the file content of revision REVISION in FS and return it in *DATA. - * Use SCRATCH_POOL for temporary allocations. +/* Get the unprocessed (i.e. still deltified) content of REPRESENTATION in + * FS and return it in *CONTENT. If no NULL, FILE_CONTENT must contain + * the contents of the revision that also contains the representation. + * Use POOL for allocations. */ static svn_error_t * get_rep_content(svn_stringbuf_t **content, @@ -765,7 +793,7 @@ get_rep_content(svn_stringbuf_t **conten offset = revision_info->offset + representation->offset + representation->header_size; - SVN_ERR(get_content(content, fs, revision, offset, + SVN_ERR(get_content(content, NULL, fs, revision, offset, representation->size, pool)); } @@ -773,8 +801,12 @@ get_rep_content(svn_stringbuf_t **conten } -/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta - window into *NWIN. */ +/* Read the delta window contents of all windows in REPRESENTATION in FS. + * If no NULL, FILE_CONTENT must contain the contents of the revision that + * also contains the representation. + * Return the data as svn_txdelta_window_t* instances in *WINDOWS. + * Use POOL for allocations. + */ static svn_error_t * read_windows(apr_array_header_t **windows, fs_fs_t *fs, @@ -789,13 +821,16 @@ read_windows(apr_array_header_t **window *windows = apr_array_make(pool, 0, sizeof(svn_txdelta_window_t *)); + /* get the whole revision content */ SVN_ERR(get_rep_content(&content, fs, representation, file_content, pool)); + /* create a read stream and position it directly after the rep header */ content->data += 3; content->len -= 3; stream = svn_stream_from_stringbuf(content, pool); SVN_ERR(svn_stream_read(stream, &version, &len)); + /* read the windows from that stream */ while (TRUE) { svn_txdelta_window_t *window; @@ -816,9 +851,12 @@ read_windows(apr_array_header_t **window return SVN_NO_ERROR; } -/* Get the undeltified window that is a result of combining all deltas - from the current desired representation identified in *RB with its - base representation. Store the window in *RESULT. */ +/* Get the undeltified representation that is a result of combining all + * deltas from the current desired REPRESENTATION in FS with its base + * representation. If no NULL, FILE_CONTENT must contain the contents of + * the revision that also contains the representation. Store the result + * in *CONTENT. Use POOL for allocations. + */ static svn_error_t * get_combined_window(svn_stringbuf_t **content, fs_fs_t *fs, @@ -830,23 +868,36 @@ get_combined_window(svn_stringbuf_t **co apr_array_header_t *windows; svn_stringbuf_t *base_content, *result; const char *source; - apr_pool_t *sub_pool = svn_pool_create(pool); - apr_pool_t *iter_pool = svn_pool_create(pool); + apr_pool_t *sub_pool; + apr_pool_t *iter_pool; + /* special case: no un-deltification necessary */ if (representation->is_plain) - return get_rep_content(content, fs, representation, file_content, pool); + { + SVN_ERR(get_rep_content(content, fs, representation, file_content, + pool)); + SVN_ERR(set_cached_window(fs, representation, *content, pool)); + return SVN_NO_ERROR; + } - *content = get_cached_window(fs, representation, pool); + /* special case: data already in cache */ + SVN_ERR(get_cached_window(content, fs, representation, pool)); if (*content) return SVN_NO_ERROR; + /* read the delta windows for this representation */ + sub_pool = svn_pool_create(pool); + iter_pool = svn_pool_create(pool); SVN_ERR(read_windows(&windows, fs, representation, file_content, sub_pool)); + + /* fetch the / create a base content */ if (representation->delta_base && representation->delta_base->revision) SVN_ERR(get_combined_window(&base_content, fs, representation->delta_base, NULL, sub_pool)); else base_content = svn_stringbuf_create_empty(sub_pool); + /* apply deltas */ result = svn_stringbuf_create_empty(pool); source = base_content->data; @@ -867,14 +918,17 @@ get_combined_window(svn_stringbuf_t **co svn_pool_clear(iter_pool); } + /* cache result and return it */ + SVN_ERR(set_cached_window(fs, representation, result, sub_pool)); + *content = result; + svn_pool_destroy(iter_pool); svn_pool_destroy(sub_pool); - - set_cached_window(fs, representation, result); - *content = result; + return SVN_NO_ERROR; } +/* forward declaration */ static svn_error_t * read_noderev(fs_fs_t *fs, svn_stringbuf_t *file_content, @@ -883,6 +937,12 @@ read_noderev(fs_fs_t *fs, apr_pool_t *pool, apr_pool_t *scratch_pool); +/* Starting at the directory in REPRESENTATION in FILE_CONTENT, read all + * DAG nodes, directories and representations linked in that tree structure. + * Store them in FS and REVISION_INFO. Also, read them only once. + * + * Use POOL for persistent allocations and SCRATCH_POOL for temporaries. + */ static svn_error_t * parse_dir(fs_fs_t *fs, svn_stringbuf_t *file_content, @@ -898,9 +958,11 @@ parse_dir(fs_fs_t *fs, const char *revision_key; apr_size_t key_len; + /* special case: empty dir rep */ if (representation == NULL) return SVN_NO_ERROR; + /* get the directory as unparsed string */ iter_pool = svn_pool_create(scratch_pool); text_pool = svn_pool_create(scratch_pool); @@ -908,14 +970,16 @@ parse_dir(fs_fs_t *fs, text_pool)); current = text->data; + /* calculate some invariants */ revision_key = apr_psprintf(text_pool, "r%ld/", representation->revision); key_len = strlen(revision_key); - /* Translate the string dir entries into real entries. */ + /* Parse and process all directory entries. */ while (*current != 'E') { char *next; + /* skip "K ???\n<name>\nV ???\n" lines*/ current = strchr(current, '\n'); if (current) current = strchr(current+1, '\n'); @@ -927,11 +991,14 @@ parse_dir(fs_fs_t *fs, _("Corrupt directory representation in rev %ld at offset %ld"), representation->revision, (long)representation->offset); - + + /* iff this entry refers to a node in the same revision as this dir, + * recurse into that node */ *next = 0; current = strstr(current, revision_key); if (current) { + /* recurse */ apr_uint64_t offset; SVN_ERR(svn_cstring_strtoui64(&offset, current + key_len, 0, @@ -949,6 +1016,13 @@ parse_dir(fs_fs_t *fs, return SVN_NO_ERROR; } +/* Starting at the noderev at OFFSET in FILE_CONTENT, read all DAG nodes, + * directories and representations linked in that tree structure. Store + * them in FS and REVISION_INFO. Also, read them only once. Return the + * result in *NODEREV. + * + * Use POOL for persistent allocations and SCRATCH_POOL for temporaries. + */ static svn_error_t * read_noderev(fs_fs_t *fs, svn_stringbuf_t *file_content, @@ -964,9 +1038,11 @@ read_noderev(fs_fs_t *fs, svn_boolean_t is_dir = FALSE; scratch_pool = svn_pool_create(scratch_pool); - + + /* parse the noderev line-by-line until we find an empty line */ while (1) { + /* for this line, extract key and value. Ignore invalid values */ svn_string_t key; svn_string_t value; char *sep; @@ -975,6 +1051,8 @@ read_noderev(fs_fs_t *fs, line = svn_string_ncreate(start, end - start, scratch_pool); offset += end - start + 1; + + /* empty line -> end of noderev data */ if (line->len == 0) break; @@ -992,6 +1070,7 @@ read_noderev(fs_fs_t *fs, value.data = sep + 2; value.len = line->len - (key.len + 2); + /* translate (key, value) into noderev elements */ if (key_matches(&key, "type")) is_dir = strcmp(value.data, "dir") == 0; else if (key_matches(&key, "text")) @@ -999,6 +1078,8 @@ read_noderev(fs_fs_t *fs, SVN_ERR(parse_representation(&text, fs, file_content, &value, revision_info, pool, scratch_pool)); + + /* if we are the first to use this rep, mark it as "text rep" */ if (++text->ref_count == 1) text->kind = is_dir ? dir_rep : file_rep; } @@ -1007,15 +1088,20 @@ read_noderev(fs_fs_t *fs, SVN_ERR(parse_representation(&props, fs, file_content, &value, revision_info, pool, scratch_pool)); + + /* if we are the first to use this rep, mark it as "prop rep" */ if (++props->ref_count == 1) props->kind = is_dir ? dir_property_rep : file_property_rep; } } + /* if this is a directory and has not been processed, yet, read and + * process it recursively */ if (is_dir && text && text->ref_count == 1) SVN_ERR(parse_dir(fs, file_content, text, revision_info, pool, scratch_pool)); + /* update stats */ if (is_dir) { revision_info->dir_noderev_size += offset - start_offset; @@ -1031,6 +1117,9 @@ read_noderev(fs_fs_t *fs, return SVN_NO_ERROR; } +/* Given the unparsed changes list in CHANGES with LEN chars, return the + * number of changed paths encoded in it. + */ static apr_size_t get_change_count(const char *changes, apr_size_t len) @@ -1038,19 +1127,27 @@ get_change_count(const char *changes, apr_size_t lines = 0; const char *end = changes + len; + /* line count */ for (; changes < end; ++changes) if (*changes == '\n') ++lines; + /* two lines per change */ return lines / 2; } -static void print_progress(svn_revnum_t revision) +/* Simple utility to print a REVISION number and make it appear immediately. + */ +static void +print_progress(svn_revnum_t revision) { printf("%8ld", revision); fflush(stdout); } +/* Read the content of the pack file staring at revision BASE and store it + * in FS. Use POOL for allocations. + */ static svn_error_t * read_pack_file(fs_fs_t *fs, svn_revnum_t base, @@ -1061,19 +1158,24 @@ read_pack_file(fs_fs_t *fs, apr_pool_t *iter_pool = svn_pool_create(local_pool); int i; apr_off_t file_size = 0; + apr_file_t *file; const char *pack_folder = get_pack_folder(fs, base, local_pool); + /* parse the manifest file */ SVN_ERR(read_manifest(&manifest, fs, pack_folder, local_pool)); if (manifest->nelts != fs->max_files_per_dir) return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, NULL); - SVN_ERR(rev_or_pack_file_size(&file_size, fs, base, pool)); + SVN_ERR(open_rev_or_pack_file(&file, fs, base, local_pool)); + SVN_ERR(get_file_size(&file_size, file, local_pool)); + /* process each revision in the pack file */ for (i = 0; i < manifest->nelts; ++i) { apr_size_t root_node_offset; svn_stringbuf_t *rev_content; + /* create the revision info for the current rev */ revision_info_t *info = apr_pcalloc(pool, sizeof(*info)); info->representations = apr_array_make(iter_pool, 4, sizeof(representation_t*)); @@ -1083,7 +1185,7 @@ read_pack_file(fs_fs_t *fs, ? APR_ARRAY_IDX(manifest, i+1 , apr_size_t) : file_size; - SVN_ERR(get_content(&rev_content, fs, info->revision, + SVN_ERR(get_content(&rev_content, file, fs, info->revision, info->offset, info->end - info->offset, iter_pool)); @@ -1103,15 +1205,20 @@ read_pack_file(fs_fs_t *fs, info->representations = apr_array_copy(pool, info->representations); APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info; + /* destroy temps */ svn_pool_clear(iter_pool); } + /* one more pack file processed */ print_progress(base); apr_pool_destroy(local_pool); return SVN_NO_ERROR; } +/* Read the content of the file for REVSION and store its contents in FS. + * Use POOL for allocations. + */ static svn_error_t * read_revision_file(fs_fs_t *fs, svn_revnum_t revision, @@ -1122,16 +1229,21 @@ read_revision_file(fs_fs_t *fs, svn_stringbuf_t *rev_content; revision_info_t *info = apr_pcalloc(pool, sizeof(*info)); apr_off_t file_size = 0; + apr_file_t *file; - SVN_ERR(rev_or_pack_file_size(&file_size, fs, revision, pool)); + /* read the whole pack file into memory */ + SVN_ERR(open_rev_or_pack_file(&file, fs, revision, local_pool)); + SVN_ERR(get_file_size(&file_size, file, local_pool)); + /* create the revision info for the current rev */ info->representations = apr_array_make(pool, 4, sizeof(representation_t*)); info->revision = revision; info->offset = 0; info->end = file_size; - SVN_ERR(get_content(&rev_content, fs, revision, 0, file_size, local_pool)); + SVN_ERR(get_content(&rev_content, file, fs, revision, 0, file_size, + local_pool)); SVN_ERR(read_revision_header(&info->changes, &info->changes_len, @@ -1139,16 +1251,19 @@ read_revision_file(fs_fs_t *fs, rev_content, local_pool)); + /* put it into our containers */ APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info; info->change_count = get_change_count(rev_content->data + info->changes, info->changes_len); + /* parse the revision content recursively. */ SVN_ERR(read_noderev(fs, rev_content, root_node_offset, info, pool, local_pool)); + /* show progress every 1000 revs or so */ if (revision % fs->max_files_per_dir == 0) print_progress(revision); @@ -1157,6 +1272,10 @@ read_revision_file(fs_fs_t *fs, return SVN_NO_ERROR; } +/* Read the repository at PATH beginning with revision START_REVISION and + * return the result in *FS. Allocate caches with MEMSIZE bytes total + * capacity. Use POOL for non-cache allocations. + */ static svn_error_t * read_revisions(fs_fs_t **fs, const char *path, @@ -1165,63 +1284,97 @@ read_revisions(fs_fs_t **fs, apr_pool_t *pool) { svn_revnum_t revision; - apr_size_t window_cache_size; + svn_cache_config_t cache_config = *svn_cache_config_get(); /* determine cache sizes */ if (memsize < 100) memsize = 100; - - window_cache_size = memsize * 1024 * 1024; + + cache_config.cache_size = memsize * 1024 * 1024; + svn_cache_config_set(&cache_config); SVN_ERR(fs_open(fs, path, pool)); + /* create data containers and caches */ (*fs)->start_revision = start_revision - (start_revision % (*fs)->max_files_per_dir); (*fs)->revisions = apr_array_make(pool, (*fs)->max_revision + 1 - (*fs)->start_revision, sizeof(revision_info_t *)); (*fs)->null_base = apr_pcalloc(pool, sizeof(*(*fs)->null_base)); - (*fs)->window_cache = create_window_cache - (apr_allocator_owner_get - (svn_pool_create_allocator(FALSE)), - 10000, window_cache_size); + SVN_ERR(svn_cache__create_membuffer_cache(&(*fs)->window_cache, + svn_cache__get_global_membuffer_cache(), + NULL, NULL, + sizeof(window_cache_key_t), + "", FALSE, pool)); + + /* read all packed revs */ for ( revision = start_revision ; revision < (*fs)->min_unpacked_rev ; revision += (*fs)->max_files_per_dir) SVN_ERR(read_pack_file(*fs, revision, pool)); - + + /* read non-packed revs */ for ( ; revision <= (*fs)->max_revision; ++revision) SVN_ERR(read_revision_file(*fs, revision, pool)); return SVN_NO_ERROR; } +/* Compression statistics we collect over a given set of representations. + */ typedef struct rep_pack_stats_t { + /* number of representations */ apr_int64_t count; + + /* total size after deltification (i.e. on disk size) */ apr_int64_t packed_size; + + /* total size after de-deltification (i.e. plain text size) */ apr_int64_t expanded_size; + + /* total on-disk header size */ apr_int64_t overhead_size; } rep_pack_stats_t; +/* Statistics we collect over a given set of representations. + * We group them into shared and non-shared ("unique") reps. + */ typedef struct representation_stats_t { + /* stats over all representations */ rep_pack_stats_t total; + + /* stats over those representations with ref_count == 1 */ rep_pack_stats_t uniques; + + /* stats over those representations with ref_count > 1 */ rep_pack_stats_t shared; + /* sum of all ref_counts */ apr_int64_t references; + + /* sum of ref_count * expanded_size, + * i.e. total plaintext content if there was no rep sharing */ apr_int64_t expanded_size; } representation_stats_t; +/* Basic statistics we collect over a given set of noderevs. + */ typedef struct node_stats_t { + /* number of noderev structs */ apr_int64_t count; + + /* their total size on disk (structs only) */ apr_int64_t size; } node_stats_t; +/* Accumulate stats of REP in STATS. + */ static void add_rep_pack_stats(rep_pack_stats_t *stats, representation_t *rep) @@ -1230,9 +1383,11 @@ add_rep_pack_stats(rep_pack_stats_t *sta stats->packed_size += rep->size; stats->expanded_size += rep->expanded_size; - stats->overhead_size += rep->header_size + 7; + stats->overhead_size += rep->header_size + 7 /* ENDREP\n */; } +/* Accumulate stats of REP in STATS. + */ static void add_rep_stats(representation_stats_t *stats, representation_t *rep) @@ -1247,6 +1402,9 @@ add_rep_stats(representation_stats_t *st stats->expanded_size += rep->ref_count * rep->expanded_size; } +/* Print statistics for the given group of representations to console. + * Use POOL for allocations. + */ static void print_rep_stats(representation_stats_t *stats, apr_pool_t *pool) @@ -1267,12 +1425,16 @@ print_rep_stats(representation_stats_t * svn__i64toa_sep(stats->references - stats->total.count, ',', pool)); } +/* Post-process stats for FS and print them to the console. + * Use POOL for allocations. + */ static void print_stats(fs_fs_t *fs, apr_pool_t *pool) { int i, k; - + + /* initialize stats to collect */ representation_stats_t file_rep_stats = { { 0 } }; representation_stats_t dir_rep_stats = { { 0 } }; representation_stats_t file_prop_rep_stats = { { 0 } }; @@ -1286,11 +1448,14 @@ print_stats(fs_fs_t *fs, apr_int64_t total_size = 0; apr_int64_t change_count = 0; apr_int64_t change_len = 0; - + + /* aggregate info from all revisions */ for (i = 0; i < fs->revisions->nelts; ++i) { revision_info_t *revision = APR_ARRAY_IDX(fs->revisions, i, revision_info_t *); + + /* data gathered on a revision level */ change_count += revision->change_count; change_len += revision->changes_len; total_size += revision->end - revision->offset; @@ -1303,11 +1468,14 @@ print_stats(fs_fs_t *fs, + revision->file_noderev_count; total_node_stats.size += revision->dir_noderev_size + revision->file_noderev_size; - + + /* process representations */ for (k = 0; k < revision->representations->nelts; ++k) { representation_t *rep = APR_ARRAY_IDX(revision->representations, k, representation_t *); + + /* accumulate in the right bucket */ switch(rep->kind) { case file_rep: @@ -1330,6 +1498,7 @@ print_stats(fs_fs_t *fs, } } + /* print results */ printf("\nGlobal statistics:\n"); printf(_("%20s bytes in %12s revisions\n" "%20s bytes in %12s changes\n" @@ -1388,6 +1557,9 @@ print_stats(fs_fs_t *fs, print_rep_stats(&file_prop_rep_stats, pool); } +/* Write tool usage info text to OSTREAM using PROGNAME as a prefix and + * POOL for allocations. + */ static void print_usage(svn_stream_t *ostream, const char *progname, apr_pool_t *pool) @@ -1404,6 +1576,7 @@ print_usage(svn_stream_t *ostream, const progname)); } +/* linear control flow */ int main(int argc, const char *argv[]) { apr_pool_t *pool;
Modified: subversion/branches/javahl-ra/tools/server-side/svnauthz-validate.c URL: http://svn.apache.org/viewvc/subversion/branches/javahl-ra/tools/server-side/svnauthz-validate.c?rev=1426654&r1=1426653&r2=1426654&view=diff ============================================================================== --- subversion/branches/javahl-ra/tools/server-side/svnauthz-validate.c (original) +++ subversion/branches/javahl-ra/tools/server-side/svnauthz-validate.c Fri Dec 28 23:03:51 2012 @@ -34,6 +34,7 @@ #include "svn_pools.h" #include "svn_repos.h" #include "svn_utf.h" +#include "svn_path.h" enum { OPT_USERNAME = SVN_OPT_FIRST_LONGOPT_ID, @@ -44,11 +45,13 @@ enum { static int usage(const char *argv0) { - printf("Usage: %s FILE [--username USER [--path FSPATH] [--repository REPOS_NAME]] FILE\n\n", argv0); - printf("Loads the authz file at FILE and validates its syntax.\n" + printf("Usage: %s [--username USER] [[--path FSPATH] [--repository REPOS_NAME]] TARGET\n\n", argv0); + printf("Loads the authz file at TARGET and validates its syntax.\n" "Optionally prints the access available to USER for FSPATH in\n" "repository with authz name REPOS_NAME. If FSPATH is omitted, reports\n" "whether USER has any access at all.\n" + "TARGET can be a path to a file or an absolute file:// URL to an authz\n" + "file in a repository, but cannot be a repository relative URL (^/).\n" "Returns:\n" " 0 when syntax is OK.\n" " 1 when syntax is invalid.\n" @@ -141,10 +144,15 @@ main(int argc, const char **argv) return 2; } - opts.authz_file = svn_dirent_internal_style(opts.authz_file, pool); + /* Can't accept repos relative urls since we don't have the path to the + * repository and URLs don't need to be converted to internal style. */ + if (svn_path_is_repos_relative_url(opts.authz_file)) + return usage(argv[0]); + else if (!svn_path_is_url(opts.authz_file)) + opts.authz_file = svn_dirent_internal_style(opts.authz_file, pool); /* Read the access file and validate it. */ - err = svn_repos_authz_read(&authz, opts.authz_file, TRUE, pool); + err = svn_repos_authz_read2(&authz, opts.authz_file, TRUE, NULL, pool); /* Optionally, print the access a USER has to a given PATH in REPOS. PATH and REPOS may be NULL. */ Modified: subversion/branches/javahl-ra/tools/server-side/svnpubsub/commit-hook.py URL: http://svn.apache.org/viewvc/subversion/branches/javahl-ra/tools/server-side/svnpubsub/commit-hook.py?rev=1426654&r1=1426653&r2=1426654&view=diff ============================================================================== --- subversion/branches/javahl-ra/tools/server-side/svnpubsub/commit-hook.py (original) +++ subversion/branches/javahl-ra/tools/server-side/svnpubsub/commit-hook.py Fri Dec 28 23:03:51 2012 @@ -42,11 +42,11 @@ def svncmd_uuid(repo): def svncmd_info(repo, revision): cmd = "%s info -r %s %s" % (SVNLOOK, revision, repo) p = svncmd(cmd) - data = p.stdout.read().strip().split("\n") + data = p.stdout.read().split("\n") #print data - return {'author': data[0], - 'date': data[1], - 'log': "\n".join(data[3:])} + return {'author': data[0].strip(), + 'date': data[1].strip(), + 'log': "\n".join(data[3:]).strip()} def svncmd_dirs(repo, revision): cmd = "%s dirs-changed -r %s %s" % (SVNLOOK, revision, repo) @@ -59,6 +59,19 @@ def svncmd_dirs(repo, revision): dirs.append(line.strip()) return dirs +def svncmd_changed(repo, revision): + cmd = "%s changed -r %s %s" % (SVNLOOK, revision, repo) + p = svncmd(cmd) + changed = {} + while True: + line = p.stdout.readline() + if not line: + break + line = line.strip() + (flags, filename) = (line[0:3], line[4:]) + changed[filename] = {'flags': flags} + return changed + def do_put(body): opener = urllib2.build_opener(urllib2.HTTPHandler) request = urllib2.Request("http://%s:%d/dirs-changed" %(HOST, PORT), data=body) @@ -72,12 +85,14 @@ def main(repo, revision): i = svncmd_info(repo, revision) data = {'revision': int(revision), 'dirs_changed': [], + 'changed': {}, 'repos': svncmd_uuid(repo), 'author': i['author'], 'log': i['log'], 'date': i['date'], } data['dirs_changed'].extend(svncmd_dirs(repo, revision)) + data['changed'].update(svncmd_changed(repo, revision)) body = json.dumps(data) #print body do_put(body) Modified: subversion/branches/javahl-ra/tools/server-side/svnpubsub/irkerbridge.py URL: http://svn.apache.org/viewvc/subversion/branches/javahl-ra/tools/server-side/svnpubsub/irkerbridge.py?rev=1426654&r1=1426653&r2=1426654&view=diff ============================================================================== --- subversion/branches/javahl-ra/tools/server-side/svnpubsub/irkerbridge.py (original) +++ subversion/branches/javahl-ra/tools/server-side/svnpubsub/irkerbridge.py Fri Dec 28 23:03:51 2012 @@ -145,6 +145,12 @@ class BigDoEverythingClass(object): return result def fill_in_extra_args(self, rev): + # Set any empty members to the string "<null>" + v = vars(rev) + for k in v.keys(): + if not v[k]: + v[k] = '<null>' + # Add entries to the rev object that are useful for # formatting. rev.log_firstline = rev.log.split("\n",1)[0] Modified: subversion/branches/javahl-ra/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd URL: http://svn.apache.org/viewvc/subversion/branches/javahl-ra/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd?rev=1426654&r1=1426653&r2=1426654&view=diff ============================================================================== --- subversion/branches/javahl-ra/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd (original) +++ subversion/branches/javahl-ra/tools/server-side/svnpubsub/rc.d/svnpubsub.freebsd Fri Dec 28 23:03:51 2012 @@ -20,11 +20,13 @@ svnpubsub_user=${svnpubsub_user-"svn"} svnpubsub_group=${svnpubsub_group-"svn"} svnpubsub_reactor=${svnpubsub_reactor-"poll"} svnpubsub_pidfile=${svnpubsub_pidfile-"/var/run/svnpubsub/svnpubsub.pid"} +svnpubsub_cmd_int=${svnpubsub_cmd_int-"python"} pidfile="${svnpubsub_pidfile}" export PYTHON_EGG_CACHE="/home/svn/.python-eggs" command="/usr/local/bin/twistd" +command_interpreter="/usr/local/bin/${svnwcsub_cmd_int}" command_args="-y /usr/local/svnpubsub/svnpubsub.tac \ --logfile=/var/log/vc/svnpubsub.log \ --pidfile=${pidfile} \ Modified: subversion/branches/javahl-ra/tools/server-side/svnpubsub/svnpubsub/client.py URL: http://svn.apache.org/viewvc/subversion/branches/javahl-ra/tools/server-side/svnpubsub/svnpubsub/client.py?rev=1426654&r1=1426653&r2=1426654&view=diff ============================================================================== --- subversion/branches/javahl-ra/tools/server-side/svnpubsub/svnpubsub/client.py (original) +++ subversion/branches/javahl-ra/tools/server-side/svnpubsub/svnpubsub/client.py Fri Dec 28 23:03:51 2012 @@ -119,10 +119,15 @@ class XMLStreamHandler(xml.sax.handler.C self.rev = None self.chars = '' + self.parent = None + self.attrs = [ ] def startElement(self, name, attrs): + self.attrs = attrs if name == 'commit': self.rev = Revision(attrs['repository'], int(attrs['revision'])) + elif name == "dirs_changed" or name == "changed": + self.parent = name # No other elements to worry about. def characters(self, data): @@ -134,10 +139,15 @@ class XMLStreamHandler(xml.sax.handler.C self.rev = None elif name == 'stillalive': self.event_callback('ping') + elif name == self.parent: + self.parent = None elif self.chars and self.rev: value = self.chars.strip() - if name == 'path': + if self.parent == 'dirs_changed' and name == 'path': self.rev.dirs_changed.append(value.decode('unicode_escape')) + elif self.parent == 'changed' and name == 'path': + path = value.decode('unicode_escape') + self.rev.changed[path] = dict(p for p in self.attrs.items()) elif name == 'author': self.rev.author = value.decode('unicode_escape') elif name == 'date': @@ -147,6 +157,8 @@ class XMLStreamHandler(xml.sax.handler.C # Toss out any accumulated characters for this element. self.chars = '' + # Toss out the saved attributes for this element. + self.attrs = [ ] class Revision(object): @@ -154,6 +166,7 @@ class Revision(object): self.uuid = uuid self.rev = rev self.dirs_changed = [ ] + self.changed = { } self.author = None self.date = None self.log = None Modified: subversion/branches/javahl-ra/tools/server-side/svnpubsub/svnpubsub/server.py URL: http://svn.apache.org/viewvc/subversion/branches/javahl-ra/tools/server-side/svnpubsub/svnpubsub/server.py?rev=1426654&r1=1426653&r2=1426654&view=diff ============================================================================== --- subversion/branches/javahl-ra/tools/server-side/svnpubsub/svnpubsub/server.py (original) +++ subversion/branches/javahl-ra/tools/server-side/svnpubsub/svnpubsub/server.py Fri Dec 28 23:03:51 2012 @@ -78,7 +78,8 @@ class Revision: # thus creating invalid XML, so the XML code paths do escaping. self.rev = r.get('revision') self.repos = r.get('repos') - self.dirs_changed = [x for x in r.get('dirs_changed')] + self.dirs_changed = r.get('dirs_changed') + self.changed = r.get('changed') self.author = r.get('author') self.log = r.get('log') self.date = r.get('date') @@ -88,6 +89,7 @@ class Revision: return json.dumps({'commit': {'repository': self.repos, 'revision': self.rev, 'dirs_changed': self.dirs_changed, + 'changed': self.changed, 'author': self.author, 'log': self.log, 'date': self.date}}) +"," @@ -100,6 +102,11 @@ class Revision: for p in self.dirs_changed: x = ET.SubElement(d, 'path') x.text = p.encode('unicode_escape') + ch = ET.SubElement(c, 'changed') + for chp in self.changed.keys(): + x = ET.SubElement(ch, 'path', self.changed[chp]) + x.text = chp.encode('unicode_escape') + str = ET.tostring(c, 'UTF-8') + "\n" return str[39:] else: Propchange: subversion/branches/javahl-ra/tools/server-side/svnpubsub/svntweet.py ------------------------------------------------------------------------------ svn:executable = *
