[PATCH 08/27] pack: move approximate object count to object store

2018-03-23 Thread Nguyễn Thái Ngọc Duy
From: Stefan Beller 

The approximate_object_count() function maintains a rough count of
objects in a repository to estimate how long object name abbreviates
should be.  Object names are scoped to a repository and the
appropriate length may differ by repository, so the object count
should not be global.

Signed-off-by: Stefan Beller 
Signed-off-by: Jonathan Nieder 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 object-store.h |  8 
 packfile.c | 11 +--
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/object-store.h b/object-store.h
index 6a07a14d63..b53e125902 100644
--- a/object-store.h
+++ b/object-store.h
@@ -99,6 +99,14 @@ struct raw_object_store {
/* A most-recently-used ordered version of the packed_git list. */
struct list_head packed_git_mru;
 
+   /*
+* A fast, rough count of the number of objects in the repository.
+* These two fields are not meant for direct access. Use
+* approximate_object_count() instead.
+*/
+   unsigned long approximate_object_count;
+   unsigned approximate_object_count_valid : 1;
+
/*
 * Whether packed_git has already been populated with this repository's
 * packs.
diff --git a/packfile.c b/packfile.c
index 2a053711cf..b0b24ea9b8 100644
--- a/packfile.c
+++ b/packfile.c
@@ -803,8 +803,6 @@ static void prepare_packed_git_one(char *objdir, int local)
strbuf_release();
 }
 
-static int approximate_object_count_valid;
-
 /*
  * Give a fast, rough count of the number of objects in the repository. This
  * ignores loose objects completely. If you have a lot of them, then either
@@ -814,8 +812,8 @@ static int approximate_object_count_valid;
  */
 unsigned long approximate_object_count(void)
 {
-   static unsigned long count;
-   if (!approximate_object_count_valid) {
+   if (!the_repository->objects->approximate_object_count_valid) {
+   unsigned long count;
struct packed_git *p;
 
prepare_packed_git();
@@ -825,8 +823,9 @@ unsigned long approximate_object_count(void)
continue;
count += p->num_objects;
}
+   the_repository->objects->approximate_object_count = count;
}
-   return count;
+   return the_repository->objects->approximate_object_count;
 }
 
 static void *get_next_packed_git(const void *p)
@@ -901,7 +900,7 @@ void prepare_packed_git(void)
 
 void reprepare_packed_git(void)
 {
-   approximate_object_count_valid = 0;
+   the_repository->objects->approximate_object_count_valid = 0;
the_repository->objects->packed_git_initialized = 0;
prepare_packed_git();
 }
-- 
2.17.0.rc0.348.gd5a49e0b6f



Re: [PATCH 08/27] pack: move approximate object count to object store

2018-02-26 Thread Stefan Beller
On Mon, Feb 26, 2018 at 12:55 AM, Jeff King  wrote:
> On Fri, Feb 23, 2018 at 02:22:14PM -0800, Stefan Beller wrote:
>
>> >> + /*
>> >> +  * A fast, rough count of the number of objects in the repository.
>> >> +  * These two fields are not meant for direct access. Use
>> >> +  * approximate_object_count() instead.
>> >> +  */
>> >> + unsigned long approximate_object_count;
>> >> + unsigned approximate_object_count_valid : 1;
>> >
>> > Patch looks fine and is effectively a no-op, though what is the need for
>> > both of these variables?  Maybe it can be simplified down to just use
>> > one?  Just musing as its out of the scope of this patch and we probably
>> > shouldn't try to change that in this series.
>>
>> I agree we should. It was introduced in e323de4ad7f (sha1_file:
>> allow sha1_loose_object_info to handle arbitrary repositories, 2017-08-30)
>> and I think it was seen as a clever optimization trick back then?
>
> I think you meant 8e3f52d778 (find_unique_abbrev: move logic out of
> get_short_sha1(), 2016-10-03)?

Yes, copy paste error.

>
> Yes, it was just to avoid the dual-meaning of "0" for "not set" and a
> repository with no packfiles.  It would probably be fine to get rid of
> it. If you have no packfiles then you probably don't have enough objects
> to worry about micro-optimizing. And anyway, the "wasted" case wouldn't
> even make any syscalls (it would do a noop prepare_packed_git() and then
> realize the packed_git list is empty).

Good point!

>
> -Peff


Re: [PATCH 08/27] pack: move approximate object count to object store

2018-02-26 Thread Jeff King
On Fri, Feb 23, 2018 at 02:22:14PM -0800, Stefan Beller wrote:

> >> + /*
> >> +  * A fast, rough count of the number of objects in the repository.
> >> +  * These two fields are not meant for direct access. Use
> >> +  * approximate_object_count() instead.
> >> +  */
> >> + unsigned long approximate_object_count;
> >> + unsigned approximate_object_count_valid : 1;
> >
> > Patch looks fine and is effectively a no-op, though what is the need for
> > both of these variables?  Maybe it can be simplified down to just use
> > one?  Just musing as its out of the scope of this patch and we probably
> > shouldn't try to change that in this series.
> 
> I agree we should. It was introduced in e323de4ad7f (sha1_file:
> allow sha1_loose_object_info to handle arbitrary repositories, 2017-08-30)
> and I think it was seen as a clever optimization trick back then?

I think you meant 8e3f52d778 (find_unique_abbrev: move logic out of
get_short_sha1(), 2016-10-03)?

Yes, it was just to avoid the dual-meaning of "0" for "not set" and a
repository with no packfiles.  It would probably be fine to get rid of
it. If you have no packfiles then you probably don't have enough objects
to worry about micro-optimizing. And anyway, the "wasted" case wouldn't
even make any syscalls (it would do a noop prepare_packed_git() and then
realize the packed_git list is empty).

-Peff


Re: [PATCH 08/27] pack: move approximate object count to object store

2018-02-23 Thread Stefan Beller
On Wed, Feb 21, 2018 at 4:47 PM, Brandon Williams  wrote:
> On 02/20, Stefan Beller wrote:
>> The approximate_object_count() function maintains a rough count of
>> objects in a repository to estimate how long object name abbreviates
>> should be.  Object names are scoped to a repository and the
>> appropriate length may differ by repository, so the object count
>> should not be global.
>>
>> Signed-off-by: Stefan Beller 
>> Signed-off-by: Jonathan Nieder 
>> ---
>>  object-store.h | 10 +-
>>  packfile.c | 11 +--
>>  2 files changed, 14 insertions(+), 7 deletions(-)
>>
>> diff --git a/object-store.h b/object-store.h
>> index 6cecba3951..bd1e4fcd8b 100644
>> --- a/object-store.h
>> +++ b/object-store.h
>> @@ -93,6 +93,14 @@ struct raw_object_store {
>>   struct alternate_object_database *alt_odb_list;
>>   struct alternate_object_database **alt_odb_tail;
>>
>> + /*
>> +  * A fast, rough count of the number of objects in the repository.
>> +  * These two fields are not meant for direct access. Use
>> +  * approximate_object_count() instead.
>> +  */
>> + unsigned long approximate_object_count;
>> + unsigned approximate_object_count_valid : 1;
>
> Patch looks fine and is effectively a no-op, though what is the need for
> both of these variables?  Maybe it can be simplified down to just use
> one?  Just musing as its out of the scope of this patch and we probably
> shouldn't try to change that in this series.

I agree we should. It was introduced in e323de4ad7f (sha1_file:
allow sha1_loose_object_info to handle arbitrary repositories, 2017-08-30)
and I think it was seen as a clever optimization trick back then?


Re: [PATCH 08/27] pack: move approximate object count to object store

2018-02-21 Thread Brandon Williams
On 02/20, Stefan Beller wrote:
> The approximate_object_count() function maintains a rough count of
> objects in a repository to estimate how long object name abbreviates
> should be.  Object names are scoped to a repository and the
> appropriate length may differ by repository, so the object count
> should not be global.
> 
> Signed-off-by: Stefan Beller 
> Signed-off-by: Jonathan Nieder 
> ---
>  object-store.h | 10 +-
>  packfile.c | 11 +--
>  2 files changed, 14 insertions(+), 7 deletions(-)
> 
> diff --git a/object-store.h b/object-store.h
> index 6cecba3951..bd1e4fcd8b 100644
> --- a/object-store.h
> +++ b/object-store.h
> @@ -93,6 +93,14 @@ struct raw_object_store {
>   struct alternate_object_database *alt_odb_list;
>   struct alternate_object_database **alt_odb_tail;
>  
> + /*
> +  * A fast, rough count of the number of objects in the repository.
> +  * These two fields are not meant for direct access. Use
> +  * approximate_object_count() instead.
> +  */
> + unsigned long approximate_object_count;
> + unsigned approximate_object_count_valid : 1;

Patch looks fine and is effectively a no-op, though what is the need for
both of these variables?  Maybe it can be simplified down to just use
one?  Just musing as its out of the scope of this patch and we probably
shouldn't try to change that in this series.

> +
>   /*
>* Whether packed_git has already been populated with this repository's
>* packs.
> @@ -107,7 +115,7 @@ struct raw_object_store {
>   * that macro on member variables. Use NULL instead as that is defined
>   * and accepted, deferring the real init to prepare_packed_git_mru(). */
>  #define __MRU_INIT { NULL, NULL }
> -#define RAW_OBJECT_STORE_INIT { NULL, NULL, __MRU_INIT, NULL, NULL, 0 }
> +#define RAW_OBJECT_STORE_INIT { NULL, NULL, __MRU_INIT, NULL, NULL, 0, 0, 0 }
>  
>  void raw_object_store_clear(struct raw_object_store *o);
>  
> diff --git a/packfile.c b/packfile.c
> index a8a2e7fe43..693bafbc98 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -803,8 +803,6 @@ static void prepare_packed_git_one(char *objdir, int 
> local)
>   strbuf_release();
>  }
>  
> -static int approximate_object_count_valid;
> -
>  /*
>   * Give a fast, rough count of the number of objects in the repository. This
>   * ignores loose objects completely. If you have a lot of them, then either
> @@ -814,8 +812,8 @@ static int approximate_object_count_valid;
>   */
>  unsigned long approximate_object_count(void)
>  {
> - static unsigned long count;
> - if (!approximate_object_count_valid) {
> + if (!the_repository->objects.approximate_object_count_valid) {
> + unsigned long count;
>   struct packed_git *p;
>  
>   prepare_packed_git();
> @@ -825,8 +823,9 @@ unsigned long approximate_object_count(void)
>   continue;
>   count += p->num_objects;
>   }
> + the_repository->objects.approximate_object_count = count;
>   }
> - return count;
> + return the_repository->objects.approximate_object_count;
>  }
>  
>  static void *get_next_packed_git(const void *p)
> @@ -901,7 +900,7 @@ void prepare_packed_git(void)
>  
>  void reprepare_packed_git(void)
>  {
> - approximate_object_count_valid = 0;
> + the_repository->objects.approximate_object_count_valid = 0;
>   the_repository->objects.packed_git_initialized = 0;
>   prepare_packed_git();
>  }
> -- 
> 2.16.1.291.g4437f3f132-goog
> 

-- 
Brandon Williams


[PATCH 08/27] pack: move approximate object count to object store

2018-02-20 Thread Stefan Beller
The approximate_object_count() function maintains a rough count of
objects in a repository to estimate how long object name abbreviates
should be.  Object names are scoped to a repository and the
appropriate length may differ by repository, so the object count
should not be global.

Signed-off-by: Stefan Beller 
Signed-off-by: Jonathan Nieder 
---
 object-store.h | 10 +-
 packfile.c | 11 +--
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/object-store.h b/object-store.h
index 6cecba3951..bd1e4fcd8b 100644
--- a/object-store.h
+++ b/object-store.h
@@ -93,6 +93,14 @@ struct raw_object_store {
struct alternate_object_database *alt_odb_list;
struct alternate_object_database **alt_odb_tail;
 
+   /*
+* A fast, rough count of the number of objects in the repository.
+* These two fields are not meant for direct access. Use
+* approximate_object_count() instead.
+*/
+   unsigned long approximate_object_count;
+   unsigned approximate_object_count_valid : 1;
+
/*
 * Whether packed_git has already been populated with this repository's
 * packs.
@@ -107,7 +115,7 @@ struct raw_object_store {
  * that macro on member variables. Use NULL instead as that is defined
  * and accepted, deferring the real init to prepare_packed_git_mru(). */
 #define __MRU_INIT { NULL, NULL }
-#define RAW_OBJECT_STORE_INIT { NULL, NULL, __MRU_INIT, NULL, NULL, 0 }
+#define RAW_OBJECT_STORE_INIT { NULL, NULL, __MRU_INIT, NULL, NULL, 0, 0, 0 }
 
 void raw_object_store_clear(struct raw_object_store *o);
 
diff --git a/packfile.c b/packfile.c
index a8a2e7fe43..693bafbc98 100644
--- a/packfile.c
+++ b/packfile.c
@@ -803,8 +803,6 @@ static void prepare_packed_git_one(char *objdir, int local)
strbuf_release();
 }
 
-static int approximate_object_count_valid;
-
 /*
  * Give a fast, rough count of the number of objects in the repository. This
  * ignores loose objects completely. If you have a lot of them, then either
@@ -814,8 +812,8 @@ static int approximate_object_count_valid;
  */
 unsigned long approximate_object_count(void)
 {
-   static unsigned long count;
-   if (!approximate_object_count_valid) {
+   if (!the_repository->objects.approximate_object_count_valid) {
+   unsigned long count;
struct packed_git *p;
 
prepare_packed_git();
@@ -825,8 +823,9 @@ unsigned long approximate_object_count(void)
continue;
count += p->num_objects;
}
+   the_repository->objects.approximate_object_count = count;
}
-   return count;
+   return the_repository->objects.approximate_object_count;
 }
 
 static void *get_next_packed_git(const void *p)
@@ -901,7 +900,7 @@ void prepare_packed_git(void)
 
 void reprepare_packed_git(void)
 {
-   approximate_object_count_valid = 0;
+   the_repository->objects.approximate_object_count_valid = 0;
the_repository->objects.packed_git_initialized = 0;
prepare_packed_git();
 }
-- 
2.16.1.291.g4437f3f132-goog