David Turner <dtur...@twopensource.com> writes:

> In traverse_trees, we generate the complete traverse path for a
> traverse_info.  Later, in do_compare_entry, we used to go do a bunch
> of work to compare the traverse_info to a cache_entry's name without
> computing that path.  But since we already have that path, we don't
> need to do all that work.  Instead, we can just stuff the generated
> path into the traverse_info, and do the comparison more directly.
> This makes git checkout much faster -- about 25% on Twitter's
> monorepo.  Deeper directory trees are likely to benefit more than
> shallower ones.

Great work.

IIRC, very early incarnation of the code avoided hard to build the
full path and that was why the path-component-wise comparison was
used heavily in this codepath; at some point we just gave up, I
think.  If we can pass this flattened representation around to
callees that can make good use of it, that makes tons of sense.  I
like the basic idea of this change.

I am bit worried that &base is passed to some function from here,
and they do not take "const struct strbuf *", but a non-const one,
allowing them to potentially modify its contents while this new
field in info struct wants to have the original base.buf, but I
trust you traced the callchain to make sure nothing wrong happens?
Even if that is the case, I feel that this change is setting up a
trap somebody else would easily trigger unknowingly--I wonder how
we can avoid future breakages.


> Signed-off-by: David Turner <dtur...@twopensource.com>
> ---
>  tree-walk.c    |  4 ++++
>  tree-walk.h    |  1 +
>  unpack-trees.c | 41 +++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 44 insertions(+), 2 deletions(-)
>
> diff --git a/tree-walk.c b/tree-walk.c
> index 6dccd2d..4cab3e1 100644
> --- a/tree-walk.c
> +++ b/tree-walk.c
> @@ -329,6 +329,9 @@ int traverse_trees(int n, struct tree_desc *t, struct 
> traverse_info *info)
>               make_traverse_path(base.buf, info->prev, &info->name);
>               base.buf[info->pathlen-1] = '/';
>               strbuf_setlen(&base, info->pathlen);
> +             info->traverse_path = base.buf;
> +     } else {
> +             info->traverse_path = info->name.path;
>       }
>       for (;;) {
>               int trees_used;
> @@ -411,6 +414,7 @@ int traverse_trees(int n, struct tree_desc *t, struct 
> traverse_info *info)
>       for (i = 0; i < n; i++)
>               free_extended_entry(tx + i);
>       free(tx);
> +     info->traverse_path = NULL;
>       strbuf_release(&base);
>       return error;
>  }
> diff --git a/tree-walk.h b/tree-walk.h
> index 3b2f7bf..174eb61 100644
> --- a/tree-walk.h
> +++ b/tree-walk.h
> @@ -59,6 +59,7 @@ enum follow_symlinks_result {
>  enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char 
> *tree_sha1, const char *name, unsigned char *result, struct strbuf 
> *result_path, unsigned *mode);
>  
>  struct traverse_info {
> +     const char *traverse_path;
>       struct traverse_info *prev;
>       struct name_entry name;
>       int pathlen;
> diff --git a/unpack-trees.c b/unpack-trees.c
> index 8e2032f..127dd4d 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -498,13 +498,14 @@ static int traverse_trees_recursive(int n, unsigned 
> long dirmask,
>   * itself - the caller needs to do the final check for the cache
>   * entry having more data at the end!
>   */
> -static int do_compare_entry(const struct cache_entry *ce, const struct 
> traverse_info *info, const struct name_entry *n)
> +static int do_compare_entry_piecewise(const struct cache_entry *ce, const 
> struct traverse_info *info, const struct name_entry *n)
>  {
>       int len, pathlen, ce_len;
>       const char *ce_name;
>  
>       if (info->prev) {
> -             int cmp = do_compare_entry(ce, info->prev, &info->name);
> +             int cmp = do_compare_entry_piecewise(ce, info->prev,
> +                                                  &info->name);
>               if (cmp)
>                       return cmp;
>       }
> @@ -522,6 +523,42 @@ static int do_compare_entry(const struct cache_entry 
> *ce, const struct traverse_
>       return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
>  }
>  
> +static int do_compare_entry(const struct cache_entry *ce,
> +                         const struct traverse_info *info,
> +                         const struct name_entry *n)
> +{
> +     int len, pathlen, ce_len;
> +     const char *ce_name;
> +     int cmp;
> +
> +     /*
> +      * If we have not precomputed the traverse path, it is quicker
> +      * to avoid doing so.  But if we have precomputed it,
> +      * it is quicker to use the precomputed version.
> +      */
> +     if (!info->traverse_path)
> +             return do_compare_entry_piecewise(ce, info, n);
> +
> +     cmp = strncmp(ce->name, info->traverse_path, info->pathlen);
> +     if (cmp)
> +             return cmp;
> +
> +     pathlen = info->pathlen;
> +     ce_len = ce_namelen(ce);
> +
> +     if (ce_len < pathlen) {
> +             if (do_compare_entry_piecewise(ce, info, n) >= 0)
> +                     die("pathlen");
> +             return -1;
> +     }
> +
> +     ce_len -= pathlen;
> +     ce_name = ce->name + pathlen;
> +
> +     len = tree_entry_len(n);
> +     return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
> +}
> +
>  static int compare_entry(const struct cache_entry *ce, const struct 
> traverse_info *info, const struct name_entry *n)
>  {
>       int cmp = do_compare_entry(ce, info, n);
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to