On Mon, Sep 16, 2013 at 11:42 AM, Nicolas Pitre <n...@fluxnic.net> wrote:
> On Sun, 15 Sep 2013, Duy Nguyen wrote:
>
>> On Sat, Sep 14, 2013 at 11:22 AM, Nicolas Pitre <n...@fluxnic.net> wrote:
>> > The cache is currently updated by the caller.  The caller may ask for a
>> > copy of 2 entries from a base object, but that base object may itself
>> > copy those objects from somewhere else in a larger chunk.
>> >
>> > Let's consider this example:
>> >
>> > tree A
>> > ------
>> > 0 (0) copy 2 entries from tree B starting at entry 0
>> > 1 (2) copy 1 entry from tree B starting at entry 3
>> >
>> > tree B
>> > ------
>> > 0 (0) copy 6 entries from tree C starting at entry 0
>> > 1 (6) entry "foo.txt"
>> > 2 (7) entry "bar.txt"
>> >
>> > Right now, the code calls decode_entries() to decode 2 entries from tree
>> > B but those entries are part of a copy from tree C.  When that call
>> > returns, the cache is updated as if tree B entry #2 would start at
>> > offset 1 but this is wrong because offset 0 in tree B covers 6 entries
>> > and therefore offset 1 is for entry #6.
>> >
>> > So this needs a rethink.
>>
>> I've given it some thought and see no simple/efficient way do it when
>> 2+ depth is involved. Ideally tree A should refer to tree C directly
>> for the first two entries, but in general we can't enforce that a copy
>> sequence must refer to non-copy sequences only. Caching flattened tree
>> B up until the 6th entry may help, but then there's no need to cache
>> offsets anymore because we could just cache tree A..
>
> Well... for the case where tree A should refer to tree C directly, that
> should be optimized by packv4-create/pack-objects.
>
> But as far as my offset cache is concerned, I came around to rethink it.
> I managed to write decent and logical code this time.

OK if I read your patch correctly, in the example above, after
processing tree A entry #0, the position of tree B entry #0 is cached
(instead of #6). When tree A #2 is processed, we check out the cache
and parse tree B's copy sequence again, which leads to a cached
position of tree C, correct? We still have to jump through tree B
unnnecessarily, but that can be dealt with later. Good thing that it
works, and the perf improvement is impressive too. I think we now have
the core of "struct pv4_tree_desc" :)
-- 
Duy

> The speed-up is significant even without any tuning.  Here it is:
>
> commit aa43ec18956a2c835112f086a0a59d7fbc35a021
> Author: Nicolas Pitre <n...@fluxnic.net>
> Date:   Fri Sep 13 20:56:31 2013 -0400
>
>     packv4-parse.c: add tree offset caching
>
>     It is a common pattern to see a tree object copy a few entries from 
> another
>     tree object, possibly from a non zero offset, then provide a few entries
>     of its own just to go back to the previous tree object to copy some more
>     entries.  Each time this happens, the tree object being copied has to be
>     parsed from the beginning over again up to the desired offset which is
>     rather wasteful.
>
>     Let's introduce a tree offset cache to record the entry position and 
> offset
>     when tree parsing stops so a subsequent copy from the same tree object
>     can be resumed without having to start from the beginning all the time.
>
>     Without doing further tuning on this cache, performing "git rev-list --all
>     --objects | wc -l" on my copy of git.git went from 14.5s down to 6.6s.
>
>     Signed-off-by: Nicolas Pitre <n...@fluxnic.net>
>
> diff --git a/packv4-parse.c b/packv4-parse.c
> index 38c22b0..b8af702 100644
> --- a/packv4-parse.c
> +++ b/packv4-parse.c
> @@ -378,6 +378,40 @@ static int copy_canonical_tree_entries(struct packed_git 
> *p, off_t offset,
>         return 0;
>  }
>
> +/* ordering is so that member alignment takes the least amount of space */
> +struct pv4_tree_cache {
> +       off_t base_offset;
> +       off_t offset;
> +       off_t last_copy_base;
> +       struct packed_git *p;
> +       unsigned int pos;
> +       unsigned int nb_entries;
> +};
> +
> +#define CACHE_SIZE 256
> +static struct pv4_tree_cache pv4_tree_cache[CACHE_SIZE];
> +
> +static struct pv4_tree_cache *get_tree_offset_cache(struct packed_git *p, 
> off_t base_offset)
> +{
> +       struct pv4_tree_cache *c;
> +       unsigned long hash;
> +
> +       hash = (unsigned long)p + (unsigned long)base_offset;
> +       hash += (hash >> 8) + (hash >> 16);
> +       hash %= CACHE_SIZE;
> +
> +       c = &pv4_tree_cache[hash];
> +       if (c->p != p || c->base_offset != base_offset) {
> +               c->p = p;
> +               c->base_offset = base_offset;
> +               c->offset = 0;
> +               c->last_copy_base = 0;
> +               c->pos = 0;
> +               c->nb_entries = 0;
> +       }
> +       return c;
> +}
> +
>  static int tree_entry_prefix(unsigned char *buf, unsigned long size,
>                              const unsigned char *path, int path_len,
>                              unsigned mode)
> @@ -405,39 +439,72 @@ static int tree_entry_prefix(unsigned char *buf, 
> unsigned long size,
>  }
>
>  static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
> -                         off_t offset, unsigned int start, unsigned int 
> count,
> -                         unsigned char **dstp, unsigned long *sizep,
> -                         int parse_hdr)
> +                         off_t obj_offset, unsigned int start, unsigned int 
> count,
> +                         unsigned char **dstp, unsigned long *sizep)
>  {
>         unsigned long avail;
> -       unsigned int nb_entries;
>         const unsigned char *src, *scp;
> -       off_t copy_objoffset = 0;
> +       unsigned int curpos;
> +       off_t offset, copy_objoffset;
> +       struct pv4_tree_cache *c;
> +
> +       c = get_tree_offset_cache(p, obj_offset);
> +       if (count && start < c->nb_entries && start >= c->pos &&
> +           count <= c->nb_entries - start) {
> +               offset = c->offset;
> +               copy_objoffset = c->last_copy_base;
> +               curpos = c->pos;
> +               start -= curpos;
> +               src = NULL;
> +               avail = 0;
> +       } else {
> +               unsigned int nb_entries;
>
> -       src = use_pack(p, w_curs, offset, &avail);
> -       scp = src;
> +               src = use_pack(p, w_curs, obj_offset, &avail);
> +               scp = src;
>
> -       if (parse_hdr) {
>                 /* we need to skip over the object header */
>                 while (*scp & 128)
>                         if (++scp - src >= avail - 20)
>                                 return -1;
> +
>                 /* is this a canonical tree object? */
> -               if ((*scp & 0xf) == OBJ_TREE)
> +               if ((*scp & 0xf) == OBJ_TREE) {
> +                       offset = obj_offset + (scp - src);
>                         return copy_canonical_tree_entries(p, offset,
>                                                            start, count,
>                                                            dstp, sizep);
> +               }
> +
>                 /* let's still make sure this is actually a pv4 tree */
>                 if ((*scp++ & 0xf) != OBJ_PV4_TREE)
>                         return -1;
> -       }
>
> -       nb_entries = decode_varint(&scp);
> -       if (scp == src || start > nb_entries || count > nb_entries - start)
> -               return -1;
> -       offset += scp - src;
> -       avail -= scp - src;
> -       src = scp;
> +               nb_entries = decode_varint(&scp);
> +               if (!count)
> +                       count = nb_entries;
> +               if (!nb_entries || start > nb_entries ||
> +                   count > nb_entries - start)
> +                       return -1;
> +
> +               curpos = 0;
> +               copy_objoffset = 0;
> +               offset = obj_offset + scp - src;
> +               avail -= scp - src;
> +               src = scp;
> +
> +               /*
> +                * If this is a partial copy, let's (re)initialize a cache
> +                * entry to speed things up if the remaining of this tree
> +                * is needed in the future.
> +                */
> +               if (start + count < nb_entries) {
> +                       c->offset = offset;
> +                       c->pos = 0;
> +                       c->nb_entries = nb_entries;
> +                       c->last_copy_base = 0;
> +               }
> +       }
>
>         while (count) {
>                 unsigned int what;
> @@ -464,6 +531,7 @@ static int decode_entries(struct packed_git *p, struct 
> pack_window **w_curs,
>                         else
>                                 while (*scp++ & 128);
>                         start--;
> +                       curpos++;
>                 } else if (!(what & 1) && start == 0) {
>                         /*
>                          * This is an actual tree entry to recreate.
> @@ -485,6 +553,7 @@ static int decode_entries(struct packed_git *p, struct 
> pack_window **w_curs,
>                         *dstp += len + 20;
>                         *sizep -= len + 20;
>                         count--;
> +                       curpos++;
>                 } else if (what & 1) {
>                         /*
>                          * Copy from another tree object.
> @@ -522,25 +591,34 @@ static int decode_entries(struct packed_git *p, struct 
> pack_window **w_curs,
>                                                 nth_packed_object_offset(p, 
> index - 1);
>                                 }
>                         }
> -                       if (!copy_objoffset)
> -                               return -1;
>                         copy_count >>= 1;
> +                       if (!copy_count || !copy_objoffset)
> +                               return -1;
>
>                         if (start >= copy_count) {
>                                 start -= copy_count;
> +                               curpos += copy_count;
>                         } else {
>                                 int ret;
> +
>                                 copy_count -= start;
>                                 copy_start += start;
> -                               start = 0;
> -                               if (copy_count > count)
> +                               if (copy_count > count) {
>                                         copy_count = count;
> -                               count -= copy_count;
> -                               ret = decode_entries(p, w_curs,
> -                                       copy_objoffset, copy_start, 
> copy_count,
> -                                       dstp, sizep, 1);
> +                                       count = 0;
> +                                       scp = src;
> +                               } else {
> +                                       count -= copy_count;
> +                                       curpos += start + copy_count;
> +                                       start = 0;
> +                               }
> +
> +                               ret = decode_entries(p, w_curs, 
> copy_objoffset,
> +                                                    copy_start, copy_count,
> +                                                    dstp, sizep);
>                                 if (ret)
>                                         return ret;
> +
>                                 /* force pack window readjustment */
>                                 avail = scp - src;
>                         }
> @@ -551,27 +629,30 @@ static int decode_entries(struct packed_git *p, struct 
> pack_window **w_curs,
>                 src = scp;
>         }
>
> +       /*
> +        * Update the cache if we didn't run through the entire tree.
> +        * We have to "get" it again as a recursion into decode_entries()
> +        * could have invalidated what we obtained initially.
> +        */
> +       c = get_tree_offset_cache(p, obj_offset);
> +       if (curpos < c->nb_entries) {
> +               c->pos = curpos;
> +               c->offset = offset;
> +               c->last_copy_base = copy_objoffset;
> +       }
> +
>         return 0;
>  }
>
>  void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
> -                  off_t offset, unsigned long size)
> +                  off_t obj_offset, unsigned long size)
>  {
> -       unsigned long avail;
> -       unsigned int nb_entries;
>         unsigned char *dst, *dcp;
> -       const unsigned char *src, *scp;
>         int ret;
>
> -       src = use_pack(p, w_curs, offset, &avail);
> -       scp = src;
> -       nb_entries = decode_varint(&scp);
> -       if (scp == src)
> -               return NULL;
> -
>         dst = xmallocz(size);
>         dcp = dst;
> -       ret = decode_entries(p, w_curs, offset, 0, nb_entries, &dcp, &size, 
> 0);
> +       ret = decode_entries(p, w_curs, obj_offset, 0, 0, &dcp, &size);
>         if (ret < 0 || size != 0) {
>                 free(dst);
>                 return NULL;
> diff --git a/packv4-parse.h b/packv4-parse.h
> index d674a3f..b437159 100644
> --- a/packv4-parse.h
> +++ b/packv4-parse.h
> @@ -20,6 +20,6 @@ const unsigned char *get_sha1ref(struct packed_git *p,
>  void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
>                      off_t offset, unsigned long size);
>  void *pv4_get_tree(struct packed_git *p, struct pack_window **w_curs,
> -                  off_t offset, unsigned long size);
> +                  off_t obj_offset, unsigned long size);
>
>  #endif
> diff --git a/sha1_file.c b/sha1_file.c
> index 038e22e..e98eb8b 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -2178,7 +2178,7 @@ void *unpack_entry(struct packed_git *p, off_t 
> obj_offset,
>                 type -= 8;
>                 break;
>         case OBJ_PV4_TREE:
> -               data = pv4_get_tree(p, &w_curs, curpos, size);
> +               data = pv4_get_tree(p, &w_curs, obj_offset, size);
>                 type -= 8;
>                 break;
>         case OBJ_COMMIT:
>
>
> Nicolas
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to