Re: [PATCH 01/23] pack v4: initial pack dictionary structure and code

Junio C Hamano Tue, 27 Aug 2013 08:09:13 -0700

Nicolas Pitre <n...@fluxnic.net> writes:

> Signed-off-by: Nicolas Pitre <n...@fluxnic.net>
> ---


Was there a reason not to reuse the hash-table Linus did in
hash.[ch]?

It may not make much of a difference for something so small and
isolated from the rest of the system, but if hash.[ch] can be easily
fixed with a small tweak to suit the use by this subsystem better,
it might be worth reusing the existing code with improvement, which
may help other potential users.

>  packv4-create.c | 137 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 137 insertions(+)
>  create mode 100644 packv4-create.c
>
> diff --git a/packv4-create.c b/packv4-create.c
> new file mode 100644
> index 0000000..2de6d41
> --- /dev/null
> +++ b/packv4-create.c
> @@ -0,0 +1,137 @@
> +/*
> + * packv4-create.c: management of dictionary tables used in pack v4
> + *
> + * (C) Nicolas Pitre <n...@fluxnic.net>
> + *
> + * This code is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include "cache.h"
> +
> +struct data_entry {
> +     unsigned offset;
> +     unsigned hits;
> +};
> +
> +struct dict_table {
> +     char *data;
> +     unsigned ptr;
> +     unsigned size;
> +     struct data_entry *entry;
> +     unsigned nb_entries;
> +     unsigned max_entries;
> +     unsigned *hash;
> +     unsigned hash_size;
> +};
> +
> +struct dict_table *create_dict_table(void)
> +{
> +     return xcalloc(sizeof(struct dict_table), 1);
> +}
> +
> +void destroy_dict_table(struct dict_table *t)
> +{
> +     free(t->data);
> +     free(t->entry);
> +     free(t->hash);
> +     free(t);
> +}
> +
> +static int locate_entry(struct dict_table *t, const char *str)
> +{
> +     int i = 0;
> +     const unsigned char *s = (const unsigned char *) str;
> +
> +     while (*s)
> +             i = i * 111 + *s++;
> +     i = (unsigned)i % t->hash_size;
> +
> +     while (t->hash[i]) {
> +             unsigned n = t->hash[i] - 1;
> +             if (!strcmp(str, t->data + t->entry[n].offset))
> +                     return n;
> +             if (++i >= t->hash_size)
> +                     i = 0;
> +     }
> +     return -1 - i;
> +}
> +
> +static void rehash_entries(struct dict_table *t)
> +{
> +     unsigned n;
> +
> +     t->hash_size *= 2;
> +     if (t->hash_size < 1024)
> +             t->hash_size = 1024;
> +     t->hash = xrealloc(t->hash, t->hash_size * sizeof(*t->hash));
> +     memset(t->hash, 0, t->hash_size * sizeof(*t->hash));
> +
> +     for (n = 0; n < t->nb_entries; n++) {
> +             int i = locate_entry(t, t->data + t->entry[n].offset);
> +             if (i < 0)
> +                     t->hash[-1 - i] = n + 1;
> +     }
> +}
> +
> +int dict_add_entry(struct dict_table *t, const char *str)
> +{
> +     int i, len = strlen(str) + 1;
> +
> +     if (t->ptr + len >= t->size) {
> +             t->size = (t->size + len + 1024) * 3 / 2;
> +             t->data = xrealloc(t->data, t->size);
> +     }
> +     memcpy(t->data + t->ptr, str, len);
> +
> +     i = (t->nb_entries) ? locate_entry(t, t->data + t->ptr) : -1;
> +     if (i >= 0) {
> +             t->entry[i].hits++;
> +             return i;
> +     }
> +
> +     if (t->nb_entries >= t->max_entries) {
> +             t->max_entries = (t->max_entries + 1024) * 3 / 2;
> +             t->entry = xrealloc(t->entry, t->max_entries * 
> sizeof(*t->entry));
> +     }
> +     t->entry[t->nb_entries].offset = t->ptr;
> +     t->entry[t->nb_entries].hits = 1;
> +     t->ptr += len + 1;
> +     t->nb_entries++;
> +
> +     if (t->hash_size * 3 <= t->nb_entries * 4)
> +             rehash_entries(t);
> +     else
> +             t->hash[-1 - i] = t->nb_entries;
> +
> +     return t->nb_entries - 1;
> +}
> +
> +static int cmp_dict_entries(const void *a_, const void *b_)
> +{
> +     const struct data_entry *a = a_;
> +     const struct data_entry *b = b_;
> +     int diff = b->hits - a->hits;
> +     if (!diff)
> +             diff = a->offset - b->offset;
> +     return diff;
> +}
> +
> +static void sort_dict_entries_by_hits(struct dict_table *t)
> +{
> +     qsort(t->entry, t->nb_entries, sizeof(*t->entry), cmp_dict_entries);
> +     t->hash_size = (t->nb_entries * 4 / 3) / 2;
> +     rehash_entries(t);
> +}
> +
> +void dict_dump(struct dict_table *t)
> +{
> +     int i;
> +
> +     sort_dict_entries_by_hits(t);
> +     for (i = 0; i < t->nb_entries; i++)
> +             printf("%d\t%s\n",
> +                     t->entry[i].hits,
> +                     t->data + t->entry[i].offset);
> +}
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 01/23] pack v4: initial pack dictionary structure and code

Reply via email to