[PATCH 13/13] alloc: allow arbitrary repositories for alloc functions

2018-05-01 Thread Stefan Beller
We have to convert all of the alloc functions at once, because alloc_report
uses a funky macro for reporting. It is better for the sake of mechanical
conversion to convert multiple functions at once rather than changing the
structure of the reporting function.

We record all memory allocation in alloc.c, and free them in
clear_alloc_state, which is called for all repositories except
the_repository.

Signed-off-by: Stefan Beller 
---
 alloc.c   | 69 ++-
 alloc.h   | 21 +++
 blame.c   |  1 +
 blob.c|  1 +
 cache.h   | 16 ---
 commit.c  |  1 +
 merge-recursive.c |  1 +
 object.c  | 24 ++---
 object.h  | 10 ++-
 repository.c  |  4 +--
 tag.c |  1 +
 tree.c|  1 +
 12 files changed, 104 insertions(+), 46 deletions(-)
 create mode 100644 alloc.h

diff --git a/alloc.c b/alloc.c
index 277dadd221b..66a3d07ba2d 100644
--- a/alloc.c
+++ b/alloc.c
@@ -4,10 +4,11 @@
  * Copyright (C) 2006 Linus Torvalds
  *
  * The standard malloc/free wastes too much space for objects, partly because
- * it maintains all the allocation infrastructure (which isn't needed, since
- * we never free an object descriptor anyway), but even more because it ends
+ * it maintains all the allocation infrastructure, but even more because it 
ends
  * up with maximal alignment because it doesn't know what the object alignment
  * for the new allocation is.
+ *
+ * TODO: Combine this with mem-pool?
  */
 #include "cache.h"
 #include "object.h"
@@ -30,8 +31,25 @@ struct alloc_state {
int count; /* total number of nodes allocated */
int nr;/* number of nodes left in current allocation */
void *p;   /* first free node in current allocation */
+
+   /* bookkeeping of allocations */
+   void **slabs;
+   int slab_nr, slab_alloc;
 };
 
+void *allocate_alloc_state(void)
+{
+   return xcalloc(1, sizeof(struct alloc_state));
+}
+
+void clear_alloc_state(struct alloc_state *s)
+{
+   while (s->slab_nr > 0) {
+   s->slab_nr--;
+   free(s->slabs[s->slab_nr]);
+   }
+}
+
 static inline void *alloc_node(struct alloc_state *s, size_t node_size)
 {
void *ret;
@@ -45,54 +63,56 @@ static inline void *alloc_node(struct alloc_state *s, 
size_t node_size)
ret = s->p;
s->p = (char *)s->p + node_size;
memset(ret, 0, node_size);
+
+   ALLOC_GROW(s->slabs, s->slab_nr + 1, s->slab_alloc);
+   s->slabs[s->slab_nr++] = ret;
+
return ret;
 }
 
-static struct alloc_state blob_state;
-void *alloc_blob_node_the_repository(void)
+struct alloc_state the_repository_blob_state;
+void *alloc_blob_node(struct repository *r)
 {
-   struct blob *b = alloc_node(&blob_state, sizeof(struct blob));
+   struct blob *b = alloc_node(r->parsed_objects->blob_state, 
sizeof(struct blob));
b->object.type = OBJ_BLOB;
return b;
 }
 
-static struct alloc_state tree_state;
-void *alloc_tree_node_the_repository(void)
+struct alloc_state the_repository_tree_state;
+void *alloc_tree_node(struct repository *r)
 {
-   struct tree *t = alloc_node(&tree_state, sizeof(struct tree));
+   struct tree *t = alloc_node(r->parsed_objects->tree_state, 
sizeof(struct tree));
t->object.type = OBJ_TREE;
return t;
 }
 
-static struct alloc_state tag_state;
-void *alloc_tag_node_the_repository(void)
+struct alloc_state the_repository_tag_state;
+void *alloc_tag_node(struct repository *r)
 {
-   struct tag *t = alloc_node(&tag_state, sizeof(struct tag));
+   struct tag *t = alloc_node(r->parsed_objects->tag_state, sizeof(struct 
tag));
t->object.type = OBJ_TAG;
return t;
 }
 
-static struct alloc_state object_state;
-void *alloc_object_node_the_repository(void)
+struct alloc_state the_repository_object_state;
+void *alloc_object_node(struct repository *r)
 {
-   struct object *obj = alloc_node(&object_state, sizeof(union 
any_object));
+   struct object *obj = alloc_node(r->parsed_objects->object_state, 
sizeof(union any_object));
obj->type = OBJ_NONE;
return obj;
 }
 
-static struct alloc_state commit_state;
-
-unsigned int alloc_commit_index_the_repository(void)
+unsigned int alloc_commit_index(struct repository *r)
 {
-   static unsigned int count;
-   return count++;
+   return r->parsed_objects->commit_count++;
 }
 
-void *alloc_commit_node_the_repository(void)
+struct alloc_state the_repository_commit_state;
+void *alloc_commit_node(struct repository *r)
 {
-   struct commit *c = alloc_node(&commit_state, sizeof(struct commit));
+   struct commit *c = alloc_node(r->parsed_objects->commit_state, 
sizeof(struct commit));
c->object.type = OBJ_COMMIT;
-   c->index = alloc_commit_index(the_repository);
+   c->index = alloc_commit_index(r);
return c;
 }
 
@@ -103,9 +123,10 @@ static void repo

Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions

2018-05-02 Thread Duy Nguyen
On Tue, May 1, 2018 at 11:34 PM, Stefan Beller  wrote:
>  #include "cache.h"
>  #include "object.h"
> @@ -30,8 +31,25 @@ struct alloc_state {
> int count; /* total number of nodes allocated */
> int nr;/* number of nodes left in current allocation */
> void *p;   /* first free node in current allocation */
> +
> +   /* bookkeeping of allocations */
> +   void **slabs;

Another way to manage this is linked list: you could reserve one
"object" in each slab to store the "next" (or "prev") pointer to
another slab, then you can just walk through all slabs and free. It's
a bit cheaper than reallocating slabs[], but I guess we reallocate so
few times that readability matters more (whichever way is chosen).

> +   int slab_nr, slab_alloc;
>  };
>
> +void *allocate_alloc_state(void)
> +{
> +   return xcalloc(1, sizeof(struct alloc_state));
> +}
> +
> +void clear_alloc_state(struct alloc_state *s)
> +{
> +   while (s->slab_nr > 0) {
> +   s->slab_nr--;
> +   free(s->slabs[s->slab_nr]);

I think you're leaking memory here. Commit and tree objects may have
more allocations in them (especially trees, but I think we have
commit_list in struct commit too). Those need to be freed as well.

> +   }
> +}
> +
>  static inline void *alloc_node(struct alloc_state *s, size_t node_size)
>  {
> void *ret;
-- 
Duy


Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions

2018-05-02 Thread Jonathan Tan
On Tue,  1 May 2018 14:34:03 -0700
Stefan Beller  wrote:

> +void *allocate_alloc_state(void)
> +{
> + return xcalloc(1, sizeof(struct alloc_state));
> +}
> +
> +void clear_alloc_state(struct alloc_state *s)
> +{
> + while (s->slab_nr > 0) {
> + s->slab_nr--;
> + free(s->slabs[s->slab_nr]);
> + }
> +}

These functions are asymmetrical. I understand why it is this way
(because we use pointers, and we want to use FREE_AND_NULL), but would
still prefer _INIT and _release().

>  static inline void *alloc_node(struct alloc_state *s, size_t node_size)
>  {
>   void *ret;
> @@ -45,54 +63,56 @@ static inline void *alloc_node(struct alloc_state *s, 
> size_t node_size)
>   ret = s->p;
>   s->p = (char *)s->p + node_size;
>   memset(ret, 0, node_size);
> +
> + ALLOC_GROW(s->slabs, s->slab_nr + 1, s->slab_alloc);
> + s->slabs[s->slab_nr++] = ret;
> +
>   return ret;
>  }

This unconditionally grows the slabs for each node allocation. Shouldn't
more than one node fit in each slab?

> +extern struct alloc_state the_repository_blob_state;
> +extern struct alloc_state the_repository_tree_state;
> +extern struct alloc_state the_repository_commit_state;
> +extern struct alloc_state the_repository_tag_state;
> +extern struct alloc_state the_repository_object_state;

(Context: these were in alloc.h)

These seem to be used only in object.c, and only in one function - could
we declare them "static" inside that function instead?

> -struct object_parser *object_parser_new(void)
> +struct object_parser *object_parser_new(int is_the_repo)
>  {
>   struct object_parser *o = xmalloc(sizeof(*o));
>   memset(o, 0, sizeof(*o));
> +
> + if (is_the_repo) {
> + o->blob_state = &the_repository_blob_state;
> + o->tree_state = &the_repository_tree_state;
> + o->commit_state = &the_repository_commit_state;
> + o->tag_state = &the_repository_tag_state;
> + o->object_state = &the_repository_object_state;
> + } else {
> + o->blob_state = allocate_alloc_state();
> + o->tree_state = allocate_alloc_state();
> + o->commit_state = allocate_alloc_state();
> + o->tag_state = allocate_alloc_state();
> + o->object_state = allocate_alloc_state();
> + }
>   return o;
>  }

I don't think saving 5 allocations is worth the code complexity (of the
additional parameter).


Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions

2018-05-03 Thread Duy Nguyen
On Tue, May 1, 2018 at 11:34 PM, Stefan Beller  wrote:
> @@ -501,9 +516,12 @@ void raw_object_store_clear(struct raw_object_store *o)
>  void object_parser_clear(struct object_parser *o)
>  {
> /*
> -* TOOD free objects in o->obj_hash.
> -*

You need to free(o->obj_hash) too. If you just want to reuse existing
obj_hash[] then at least clear it, leave no dangling pointers behind.

>  * As objects are allocated in slabs (see alloc.c), we do
>  * not need to free each object, but each slab instead.
>  */
> +   clear_alloc_state(o->blob_state);
> +   clear_alloc_state(o->tree_state);
> +   clear_alloc_state(o->commit_state);
> +   clear_alloc_state(o->tag_state);
> +   clear_alloc_state(o->object_state);
>  }
-- 
Duy


Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions

2018-05-03 Thread Stefan Beller
On Wed, May 2, 2018 at 10:44 AM, Duy Nguyen  wrote:
> On Tue, May 1, 2018 at 11:34 PM, Stefan Beller  wrote:
>>  #include "cache.h"
>>  #include "object.h"
>> @@ -30,8 +31,25 @@ struct alloc_state {
>> int count; /* total number of nodes allocated */
>> int nr;/* number of nodes left in current allocation */
>> void *p;   /* first free node in current allocation */
>> +
>> +   /* bookkeeping of allocations */
>> +   void **slabs;
>
> Another way to manage this is linked list: you could reserve one
> "object" in each slab to store the "next" (or "prev") pointer to
> another slab, then you can just walk through all slabs and free. It's
> a bit cheaper than reallocating slabs[], but I guess we reallocate so
> few times that readability matters more (whichever way is chosen).

This is a good idea. I'll do so in a resend.

>> +void clear_alloc_state(struct alloc_state *s)
>> +{
>> +   while (s->slab_nr > 0) {
>> +   s->slab_nr--;
>> +   free(s->slabs[s->slab_nr]);
>
> I think you're leaking memory here. Commit and tree objects may have
> more allocations in them (especially trees, but I think we have
> commit_list in struct commit too). Those need to be freed as well.

I would think that tree and commit memory will be free'd in the
parsed_objects release function? (TODO: I need to add it over there)

Thanks,
Stefan


Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions

2018-05-03 Thread Stefan Beller
On Wed, May 2, 2018 at 1:50 PM, Jonathan Tan  wrote:
> On Tue,  1 May 2018 14:34:03 -0700
> Stefan Beller  wrote:
>
>> +void *allocate_alloc_state(void)
>> +{
>> + return xcalloc(1, sizeof(struct alloc_state));
>> +}
>> +
>> +void clear_alloc_state(struct alloc_state *s)
>> +{
>> + while (s->slab_nr > 0) {
>> + s->slab_nr--;
>> + free(s->slabs[s->slab_nr]);
>> + }
>> +}
>
> These functions are asymmetrical. I understand why it is this way
> (because we use pointers, and we want to use FREE_AND_NULL), but would
> still prefer _INIT and _release().
>
>>  static inline void *alloc_node(struct alloc_state *s, size_t node_size)
>>  {
>>   void *ret;
>> @@ -45,54 +63,56 @@ static inline void *alloc_node(struct alloc_state *s, 
>> size_t node_size)
>>   ret = s->p;
>>   s->p = (char *)s->p + node_size;
>>   memset(ret, 0, node_size);
>> +
>> + ALLOC_GROW(s->slabs, s->slab_nr + 1, s->slab_alloc);
>> + s->slabs[s->slab_nr++] = ret;
>> +
>>   return ret;
>>  }
>
> This unconditionally grows the slabs for each node allocation. Shouldn't
> more than one node fit in each slab?

Yes. I'll go with Duy's idea and make it a linked list by using the first
object as a pointer to the next slab.

>
>> +extern struct alloc_state the_repository_blob_state;
>> +extern struct alloc_state the_repository_tree_state;
>> +extern struct alloc_state the_repository_commit_state;
>> +extern struct alloc_state the_repository_tag_state;
>> +extern struct alloc_state the_repository_object_state;
>
> (Context: these were in alloc.h)
>
> These seem to be used only in object.c, and only in one function - could
> we declare them "static" inside that function instead?

ok

>
>> -struct object_parser *object_parser_new(void)
>> +struct object_parser *object_parser_new(int is_the_repo)
>>  {
>>   struct object_parser *o = xmalloc(sizeof(*o));
>>   memset(o, 0, sizeof(*o));
>> +
>> + if (is_the_repo) {
>> + o->blob_state = &the_repository_blob_state;
>> + o->tree_state = &the_repository_tree_state;
>> + o->commit_state = &the_repository_commit_state;
>> + o->tag_state = &the_repository_tag_state;
>> + o->object_state = &the_repository_object_state;
>> + } else {
>> + o->blob_state = allocate_alloc_state();
>> + o->tree_state = allocate_alloc_state();
>> + o->commit_state = allocate_alloc_state();
>> + o->tag_state = allocate_alloc_state();
>> + o->object_state = allocate_alloc_state();
>> + }
>>   return o;
>>  }
>
> I don't think saving 5 allocations is worth the code complexity (of the
> additional parameter).

Ok, I'll remove this overhead.

Thanks,
Stefan


Re: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions

2018-05-03 Thread Duy Nguyen
On Thu, May 3, 2018 at 7:24 PM, Stefan Beller  wrote:
>>> +void clear_alloc_state(struct alloc_state *s)
>>> +{
>>> +   while (s->slab_nr > 0) {
>>> +   s->slab_nr--;
>>> +   free(s->slabs[s->slab_nr]);
>>
>> I think you're leaking memory here. Commit and tree objects may have
>> more allocations in them (especially trees, but I think we have
>> commit_list in struct commit too). Those need to be freed as well.
>
> I would think that tree and commit memory will be free'd in the
> parsed_objects release function? (TODO: I need to add it over there)

What release function? I know tree->buffer is often released since you
don't want to keep much in memory. But the user should not be required
to release all objects before calling object_parser_clear(). For
struct commit, I'm pretty we make the commit_list (for commit parents)
and never free. Now we need to do it, somewhere.

Oh you mean object_parser_clear() as release function? Yes. I've been
wondering if it makes more sense to go through obj_hash[] and release
objects before free(obj_hash) than doing it here. It's probably better
to do it there since obj_hash[] would contain the very last pointers
to these objects and look like the right place to release resources.

> Thanks,
> Stefan
-- 
Duy