Am 11.05.2018 um 19:42 schrieb Jeff King:
> On Fri, May 11, 2018 at 03:34:19PM +0200, Duy Nguyen wrote:
> 
>> On Fri, May 11, 2018 at 03:11:46PM +0200, Duy Nguyen wrote:
>>> Back to fast-export, can we just allocate a new int on heap and point
>>> it there? Allocating small pieces becomes quite cheap and fast with
>>> mem-pool.h and we can avoid this storing integer in pointer business.
>>
>> Something like this seems to work, but we use 4-ish more bytes per
>> object, or 100MB overhead on a repo with 25M objects. I think it's a
>> reasonable trade off.
> 
> I'm not sure I agree. 4 bytes per object certainly isn't the end of the
> world, but what was the problem we were solving in the first place? Just
> that we weren't comfortable with the round-trip from uintptr_t to void
> and back? Is this actually a problem on real platforms? If not, it seems
> silly to incur a run-time cost.

Storing integer values in pointers is a trick that seems to have worked
so far for fast-export.  A portable way to avoid that trick without
requiring more memory would be to use a union.

Or we could roll our own custom hash map, as I mused in an earlier post.
That would duplicate quite a bit of code; are there reusable pieces
hidden within that could be extracted into common functions?

---
 builtin/fast-export.c | 105 ++++++++++++++++++++++++++++++++----------
 1 file changed, 81 insertions(+), 24 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 530df12f05..627b0032f3 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -14,7 +14,6 @@
 #include "diffcore.h"
 #include "log-tree.h"
 #include "revision.h"
-#include "decorate.h"
 #include "string-list.h"
 #include "utf8.h"
 #include "parse-options.h"
@@ -71,9 +70,65 @@ static int parse_opt_tag_of_filtered_mode(const struct 
option *opt,
        return 0;
 }
 
-static struct decoration idnums;
+struct object_mark_entry {
+       const struct object *base;
+       uint32_t mark;
+};
+
+struct object_marks {
+       unsigned int size;
+       unsigned int nr;
+       struct object_mark_entry *entries;
+};
+
+static struct object_marks idnums;
 static uint32_t last_idnum;
 
+static unsigned int hash_obj(const struct object *obj, unsigned int n)
+{
+       return sha1hash(obj->oid.hash) % n;
+}
+
+static void set_object_mark(struct object_marks *n, const struct object *base,
+                           uint32_t mark)
+{
+       unsigned int size = n->size;
+       struct object_mark_entry *entries = n->entries;
+       unsigned int j = hash_obj(base, size);
+
+       while (entries[j].base) {
+               if (entries[j].base == base) {
+                       entries[j].mark = mark;
+                       return;
+               }
+               if (++j >= size)
+                       j = 0;
+       }
+       entries[j].base = base;
+       entries[j].mark = mark;
+       n->nr++;
+}
+
+static void grow_object_marks(struct object_marks *n)
+{
+       unsigned int i;
+       unsigned int old_size = n->size;
+       struct object_mark_entry *old_entries = n->entries;
+
+       n->size = (old_size + 1000) * 3 / 2;
+       n->entries = xcalloc(n->size, sizeof(n->entries[0]));
+       n->nr = 0;
+
+       for (i = 0; i < old_size; i++) {
+               const struct object *base = old_entries[i].base;
+               uint32_t mark = old_entries[i].mark;
+
+               if (mark)
+                       set_object_mark(n, base, mark);
+       }
+       free(old_entries);
+}
+
 static int has_unshown_parent(struct commit *commit)
 {
        struct commit_list *parent;
@@ -156,20 +211,13 @@ static void anonymize_path(struct strbuf *out, const char 
*path,
        }
 }
 
-/* Since intptr_t is C99, we do not use it here */
-static inline uint32_t *mark_to_ptr(uint32_t mark)
-{
-       return ((uint32_t *)NULL) + mark;
-}
-
-static inline uint32_t ptr_to_mark(void * mark)
-{
-       return (uint32_t *)mark - (uint32_t *)NULL;
-}
-
 static inline void mark_object(struct object *object, uint32_t mark)
 {
-       add_decoration(&idnums, object, mark_to_ptr(mark));
+       unsigned int nr = idnums.nr + 1;
+
+       if (nr > idnums.size * 2 / 3)
+               grow_object_marks(&idnums);
+       return set_object_mark(&idnums, object, mark);
 }
 
 static inline void mark_next_object(struct object *object)
@@ -179,10 +227,21 @@ static inline void mark_next_object(struct object *object)
 
 static int get_object_mark(struct object *object)
 {
-       void *decoration = lookup_decoration(&idnums, object);
-       if (!decoration)
+       unsigned int j;
+
+       /* nothing to lookup */
+       if (!idnums.size)
                return 0;
-       return ptr_to_mark(decoration);
+       j = hash_obj(object, idnums.size);
+       for (;;) {
+               struct object_mark_entry *ref = idnums.entries + j;
+               if (ref->base == object)
+                       return ref->mark;
+               if (!ref->base)
+                       return 0;
+               if (++j == idnums.size)
+                       j = 0;
+       }
 }
 
 static void show_progress(void)
@@ -897,8 +956,7 @@ static void handle_tags_and_duplicates(void)
 static void export_marks(char *file)
 {
        unsigned int i;
-       uint32_t mark;
-       struct decoration_entry *deco = idnums.entries;
+       struct object_mark_entry *entry = idnums.entries;
        FILE *f;
        int e = 0;
 
@@ -907,15 +965,14 @@ static void export_marks(char *file)
                die_errno("Unable to open marks file %s for writing.", file);
 
        for (i = 0; i < idnums.size; i++) {
-               if (deco->base && deco->base->type == 1) {
-                       mark = ptr_to_mark(deco->decoration);
-                       if (fprintf(f, ":%"PRIu32" %s\n", mark,
-                               oid_to_hex(&deco->base->oid)) < 0) {
+               if (entry->base && entry->base->type == 1) {
+                       if (fprintf(f, ":%"PRIu32" %s\n", entry->mark,
+                                   oid_to_hex(&entry->base->oid)) < 0) {
                            e = 1;
                            break;
                        }
                }
-               deco++;
+               entry++;
        }
 
        e |= ferror(f);
-- 
2.17.0

Reply via email to