[PATCH 13/13] alloc: allow arbitrary repositories for alloc functions
We have to convert all of the alloc functions at once, because alloc_report uses a funky macro for reporting. It is better for the sake of mechanical conversion to convert multiple functions at once rather than changing the structure of the reporting function. We record all memory allocation in alloc.c, and free them in clear_alloc_state, which is called for all repositories except the_repository. Signed-off-by: Stefan Beller --- alloc.c | 69 ++- alloc.h | 21 +++ blame.c | 1 + blob.c| 1 + cache.h | 16 --- commit.c | 1 + merge-recursive.c | 1 + object.c | 24 ++--- object.h | 10 ++- repository.c | 4 +-- tag.c | 1 + tree.c| 1 + 12 files changed, 104 insertions(+), 46 deletions(-) create mode 100644 alloc.h diff --git a/alloc.c b/alloc.c index 277dadd221b..66a3d07ba2d 100644 --- a/alloc.c +++ b/alloc.c @@ -4,10 +4,11 @@ * Copyright (C) 2006 Linus Torvalds * * The standard malloc/free wastes too much space for objects, partly because - * it maintains all the allocation infrastructure (which isn't needed, since - * we never free an object descriptor anyway), but even more because it ends + * it maintains all the allocation infrastructure, but even more because it ends * up with maximal alignment because it doesn't know what the object alignment * for the new allocation is. + * + * TODO: Combine this with mem-pool? */ #include "cache.h" #include "object.h" @@ -30,8 +31,25 @@ struct alloc_state { int count; /* total number of nodes allocated */ int nr;/* number of nodes left in current allocation */ void *p; /* first free node in current allocation */ + + /* bookkeeping of allocations */ + void **slabs; + int slab_nr, slab_alloc; }; +void *allocate_alloc_state(void) +{ + return xcalloc(1, sizeof(struct alloc_state)); +} + +void clear_alloc_state(struct alloc_state *s) +{ + while (s->slab_nr > 0) { + s->slab_nr--; + free(s->slabs[s->slab_nr]); + } +} + static inline void *alloc_node(struct alloc_state *s, size_t node_size) { void *ret; @@ -45,54 +63,56 @@ static inline void *alloc_node(struct alloc_state *s, size_t node_size) ret = s->p; s->p = (char *)s->p + node_size; memset(ret, 0, node_size); + + ALLOC_GROW(s->slabs, s->slab_nr + 1, s->slab_alloc); + s->slabs[s->slab_nr++] = ret; + return ret; } -static struct alloc_state blob_state; -void *alloc_blob_node_the_repository(void) +struct alloc_state the_repository_blob_state; +void *alloc_blob_node(struct repository *r) { - struct blob *b = alloc_node(&blob_state, sizeof(struct blob)); + struct blob *b = alloc_node(r->parsed_objects->blob_state, sizeof(struct blob)); b->object.type = OBJ_BLOB; return b; } -static struct alloc_state tree_state; -void *alloc_tree_node_the_repository(void) +struct alloc_state the_repository_tree_state; +void *alloc_tree_node(struct repository *r) { - struct tree *t = alloc_node(&tree_state, sizeof(struct tree)); + struct tree *t = alloc_node(r->parsed_objects->tree_state, sizeof(struct tree)); t->object.type = OBJ_TREE; return t; } -static struct alloc_state tag_state; -void *alloc_tag_node_the_repository(void) +struct alloc_state the_repository_tag_state; +void *alloc_tag_node(struct repository *r) { - struct tag *t = alloc_node(&tag_state, sizeof(struct tag)); + struct tag *t = alloc_node(r->parsed_objects->tag_state, sizeof(struct tag)); t->object.type = OBJ_TAG; return t; } -static struct alloc_state object_state; -void *alloc_object_node_the_repository(void) +struct alloc_state the_repository_object_state; +void *alloc_object_node(struct repository *r) { - struct object *obj = alloc_node(&object_state, sizeof(union any_object)); + struct object *obj = alloc_node(r->parsed_objects->object_state, sizeof(union any_object)); obj->type = OBJ_NONE; return obj; } -static struct alloc_state commit_state; - -unsigned int alloc_commit_index_the_repository(void) +unsigned int alloc_commit_index(struct repository *r) { - static unsigned int count; - return count++; + return r->parsed_objects->commit_count++; } -void *alloc_commit_node_the_repository(void) +struct alloc_state the_repository_commit_state; +void *alloc_commit_node(struct repository *r) { - struct commit *c = alloc_node(&commit_state, sizeof(struct commit)); + struct commit *c = alloc_node(r->parsed_objects->commit_state, sizeof(struct commit)); c->object.type = OBJ_COMMIT; - c->index = alloc_commit_index(the_repository); + c->index = alloc_commit_index(r); return c; } @@ -103,9 +123,10 @@ static void repo
Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions
On Tue, May 1, 2018 at 11:34 PM, Stefan Beller wrote: > #include "cache.h" > #include "object.h" > @@ -30,8 +31,25 @@ struct alloc_state { > int count; /* total number of nodes allocated */ > int nr;/* number of nodes left in current allocation */ > void *p; /* first free node in current allocation */ > + > + /* bookkeeping of allocations */ > + void **slabs; Another way to manage this is linked list: you could reserve one "object" in each slab to store the "next" (or "prev") pointer to another slab, then you can just walk through all slabs and free. It's a bit cheaper than reallocating slabs[], but I guess we reallocate so few times that readability matters more (whichever way is chosen). > + int slab_nr, slab_alloc; > }; > > +void *allocate_alloc_state(void) > +{ > + return xcalloc(1, sizeof(struct alloc_state)); > +} > + > +void clear_alloc_state(struct alloc_state *s) > +{ > + while (s->slab_nr > 0) { > + s->slab_nr--; > + free(s->slabs[s->slab_nr]); I think you're leaking memory here. Commit and tree objects may have more allocations in them (especially trees, but I think we have commit_list in struct commit too). Those need to be freed as well. > + } > +} > + > static inline void *alloc_node(struct alloc_state *s, size_t node_size) > { > void *ret; -- Duy
Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions
On Tue, 1 May 2018 14:34:03 -0700 Stefan Beller wrote: > +void *allocate_alloc_state(void) > +{ > + return xcalloc(1, sizeof(struct alloc_state)); > +} > + > +void clear_alloc_state(struct alloc_state *s) > +{ > + while (s->slab_nr > 0) { > + s->slab_nr--; > + free(s->slabs[s->slab_nr]); > + } > +} These functions are asymmetrical. I understand why it is this way (because we use pointers, and we want to use FREE_AND_NULL), but would still prefer _INIT and _release(). > static inline void *alloc_node(struct alloc_state *s, size_t node_size) > { > void *ret; > @@ -45,54 +63,56 @@ static inline void *alloc_node(struct alloc_state *s, > size_t node_size) > ret = s->p; > s->p = (char *)s->p + node_size; > memset(ret, 0, node_size); > + > + ALLOC_GROW(s->slabs, s->slab_nr + 1, s->slab_alloc); > + s->slabs[s->slab_nr++] = ret; > + > return ret; > } This unconditionally grows the slabs for each node allocation. Shouldn't more than one node fit in each slab? > +extern struct alloc_state the_repository_blob_state; > +extern struct alloc_state the_repository_tree_state; > +extern struct alloc_state the_repository_commit_state; > +extern struct alloc_state the_repository_tag_state; > +extern struct alloc_state the_repository_object_state; (Context: these were in alloc.h) These seem to be used only in object.c, and only in one function - could we declare them "static" inside that function instead? > -struct object_parser *object_parser_new(void) > +struct object_parser *object_parser_new(int is_the_repo) > { > struct object_parser *o = xmalloc(sizeof(*o)); > memset(o, 0, sizeof(*o)); > + > + if (is_the_repo) { > + o->blob_state = &the_repository_blob_state; > + o->tree_state = &the_repository_tree_state; > + o->commit_state = &the_repository_commit_state; > + o->tag_state = &the_repository_tag_state; > + o->object_state = &the_repository_object_state; > + } else { > + o->blob_state = allocate_alloc_state(); > + o->tree_state = allocate_alloc_state(); > + o->commit_state = allocate_alloc_state(); > + o->tag_state = allocate_alloc_state(); > + o->object_state = allocate_alloc_state(); > + } > return o; > } I don't think saving 5 allocations is worth the code complexity (of the additional parameter).
Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions
On Tue, May 1, 2018 at 11:34 PM, Stefan Beller wrote: > @@ -501,9 +516,12 @@ void raw_object_store_clear(struct raw_object_store *o) > void object_parser_clear(struct object_parser *o) > { > /* > -* TOOD free objects in o->obj_hash. > -* You need to free(o->obj_hash) too. If you just want to reuse existing obj_hash[] then at least clear it, leave no dangling pointers behind. > * As objects are allocated in slabs (see alloc.c), we do > * not need to free each object, but each slab instead. > */ > + clear_alloc_state(o->blob_state); > + clear_alloc_state(o->tree_state); > + clear_alloc_state(o->commit_state); > + clear_alloc_state(o->tag_state); > + clear_alloc_state(o->object_state); > } -- Duy
Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions
On Wed, May 2, 2018 at 10:44 AM, Duy Nguyen wrote: > On Tue, May 1, 2018 at 11:34 PM, Stefan Beller wrote: >> #include "cache.h" >> #include "object.h" >> @@ -30,8 +31,25 @@ struct alloc_state { >> int count; /* total number of nodes allocated */ >> int nr;/* number of nodes left in current allocation */ >> void *p; /* first free node in current allocation */ >> + >> + /* bookkeeping of allocations */ >> + void **slabs; > > Another way to manage this is linked list: you could reserve one > "object" in each slab to store the "next" (or "prev") pointer to > another slab, then you can just walk through all slabs and free. It's > a bit cheaper than reallocating slabs[], but I guess we reallocate so > few times that readability matters more (whichever way is chosen). This is a good idea. I'll do so in a resend. >> +void clear_alloc_state(struct alloc_state *s) >> +{ >> + while (s->slab_nr > 0) { >> + s->slab_nr--; >> + free(s->slabs[s->slab_nr]); > > I think you're leaking memory here. Commit and tree objects may have > more allocations in them (especially trees, but I think we have > commit_list in struct commit too). Those need to be freed as well. I would think that tree and commit memory will be free'd in the parsed_objects release function? (TODO: I need to add it over there) Thanks, Stefan
Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions
On Wed, May 2, 2018 at 1:50 PM, Jonathan Tan wrote: > On Tue, 1 May 2018 14:34:03 -0700 > Stefan Beller wrote: > >> +void *allocate_alloc_state(void) >> +{ >> + return xcalloc(1, sizeof(struct alloc_state)); >> +} >> + >> +void clear_alloc_state(struct alloc_state *s) >> +{ >> + while (s->slab_nr > 0) { >> + s->slab_nr--; >> + free(s->slabs[s->slab_nr]); >> + } >> +} > > These functions are asymmetrical. I understand why it is this way > (because we use pointers, and we want to use FREE_AND_NULL), but would > still prefer _INIT and _release(). > >> static inline void *alloc_node(struct alloc_state *s, size_t node_size) >> { >> void *ret; >> @@ -45,54 +63,56 @@ static inline void *alloc_node(struct alloc_state *s, >> size_t node_size) >> ret = s->p; >> s->p = (char *)s->p + node_size; >> memset(ret, 0, node_size); >> + >> + ALLOC_GROW(s->slabs, s->slab_nr + 1, s->slab_alloc); >> + s->slabs[s->slab_nr++] = ret; >> + >> return ret; >> } > > This unconditionally grows the slabs for each node allocation. Shouldn't > more than one node fit in each slab? Yes. I'll go with Duy's idea and make it a linked list by using the first object as a pointer to the next slab. > >> +extern struct alloc_state the_repository_blob_state; >> +extern struct alloc_state the_repository_tree_state; >> +extern struct alloc_state the_repository_commit_state; >> +extern struct alloc_state the_repository_tag_state; >> +extern struct alloc_state the_repository_object_state; > > (Context: these were in alloc.h) > > These seem to be used only in object.c, and only in one function - could > we declare them "static" inside that function instead? ok > >> -struct object_parser *object_parser_new(void) >> +struct object_parser *object_parser_new(int is_the_repo) >> { >> struct object_parser *o = xmalloc(sizeof(*o)); >> memset(o, 0, sizeof(*o)); >> + >> + if (is_the_repo) { >> + o->blob_state = &the_repository_blob_state; >> + o->tree_state = &the_repository_tree_state; >> + o->commit_state = &the_repository_commit_state; >> + o->tag_state = &the_repository_tag_state; >> + o->object_state = &the_repository_object_state; >> + } else { >> + o->blob_state = allocate_alloc_state(); >> + o->tree_state = allocate_alloc_state(); >> + o->commit_state = allocate_alloc_state(); >> + o->tag_state = allocate_alloc_state(); >> + o->object_state = allocate_alloc_state(); >> + } >> return o; >> } > > I don't think saving 5 allocations is worth the code complexity (of the > additional parameter). Ok, I'll remove this overhead. Thanks, Stefan
Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions
On Thu, May 3, 2018 at 7:24 PM, Stefan Beller wrote: >>> +void clear_alloc_state(struct alloc_state *s) >>> +{ >>> + while (s->slab_nr > 0) { >>> + s->slab_nr--; >>> + free(s->slabs[s->slab_nr]); >> >> I think you're leaking memory here. Commit and tree objects may have >> more allocations in them (especially trees, but I think we have >> commit_list in struct commit too). Those need to be freed as well. > > I would think that tree and commit memory will be free'd in the > parsed_objects release function? (TODO: I need to add it over there) What release function? I know tree->buffer is often released since you don't want to keep much in memory. But the user should not be required to release all objects before calling object_parser_clear(). For struct commit, I'm pretty we make the commit_list (for commit parents) and never free. Now we need to do it, somewhere. Oh you mean object_parser_clear() as release function? Yes. I've been wondering if it makes more sense to go through obj_hash[] and release objects before free(obj_hash) than doing it here. It's probably better to do it there since obj_hash[] would contain the very last pointers to these objects and look like the right place to release resources. > Thanks, > Stefan -- Duy