Re: [PATCH v2 2/6] commit-graph write: add more progress output

2018-11-20 Thread SZEDER Gábor
On Tue, Nov 20, 2018 at 07:50:23PM +, Ævar Arnfjörð Bjarmason wrote:
> Add more progress output to the output already added in
> 7b0f229222 ("commit-graph write: add progress output", 2018-09-17).
> 
> As noted in that commit most of the progress output isn't displayed on
> small repositories, but before this change we'd noticeably hang for
> 2-3 seconds at the end on medium sized repositories such as linux.git.
> 
> Now we'll instead show output like this, and have no human-observable
> point at which we're not producing progress output:
> 
> $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
> Finding commits for commit graph: 6365492, done.
> Computing commit graph generation numbers: 100% (797222/797222), done.
> Writing out commit graph: 2399912, done.
> 
> This "writing out" number is not meant to be meaningful to the user,
> but just to show that we're doing work and the command isn't
> hanging.
> 
> In the current implementation it's approximately 4x the number of
> commits.

"approximately" only, because the current implementation is buggy :)
If done right it's exactly 4x the number of commits.

> As noted in on-list discussion[1] we could add the loops up
> and show percentage progress here, but I don't think it's worth it. It
> would make the implementation more complex and harder to maintain for
> very little gain.

I think that if we can cheaply and accurately figure out the total,
then we should display it, so onlooking users can guesstimate how much
work is still left to be done.

> On a much larger in-house repository I have we'll show (note how we
> also say "Annotating[...]"):
> 
> $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
> Finding commits for commit graph: 50026015, done.
> Annotating commit graph: 21567407, done.
> Computing commit graph generation numbers: 100% (7144680/7144680), done.
> Writing out commit graph: 21434417, done.
> 
> 1. https://public-inbox.org/git/20181120165800.gb30...@szeder.dev/
> 
> Signed-off-by: Ævar Arnfjörð Bjarmason 
> ---
>  commit-graph.c | 41 -
>  1 file changed, 32 insertions(+), 9 deletions(-)
> 
> diff --git a/commit-graph.c b/commit-graph.c
> index e6d0d7722b..6f6409b292 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -433,7 +433,9 @@ struct tree *get_commit_tree_in_graph(struct repository 
> *r, const struct commit
>  
>  static void write_graph_chunk_fanout(struct hashfile *f,
>struct commit **commits,
> -  int nr_commits)
> +  int nr_commits,
> +  struct progress *progress,
> +  uint64_t *progress_cnt)
>  {
>   int i, count = 0;
>   struct commit **list = commits;
> @@ -445,6 +447,7 @@ static void write_graph_chunk_fanout(struct hashfile *f,
>*/
>   for (i = 0; i < 256; i++) {
>   while (count < nr_commits) {
> + display_progress(progress, ++*progress_cnt);

I think this display_progress() should be places after the condition,
so no one has to waste brain cycles on figuring out, why it always
counts 255 more than the number of commits.

>   if ((*list)->object.oid.hash[0] != i)
>   break;
>   count++;
> @@ -456,12 +459,16 @@ static void write_graph_chunk_fanout(struct hashfile *f,
>  }
>  
>  static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
> -struct commit **commits, int nr_commits)
> +struct commit **commits, int nr_commits,
> +struct progress *progress,
> +uint64_t *progress_cnt)
>  {
>   struct commit **list = commits;
>   int count;
> - for (count = 0; count < nr_commits; count++, list++)
> + for (count = 0; count < nr_commits; count++, list++) {
> + display_progress(progress, ++*progress_cnt);
>   hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
> + }
>  }
>  
>  static const unsigned char *commit_to_sha1(size_t index, void *table)
> @@ -471,7 +478,9 @@ static const unsigned char *commit_to_sha1(size_t index, 
> void *table)
>  }
>  
>  static void write_graph_chunk_data(struct hashfile *f, int hash_len,
> -struct commit **commits, int nr_commits)
> +struct commit **commits, int nr_commits,
> +struct progress *progress,
> +uint64_t *progress_cnt)
>  {
>   struct commit **list = commits;
>   struct commit **last = commits + nr_commits;
> @@ -481,6 +490,7 @@ static void write_graph_chunk_data(struct hashfile *f, 
> int hash_len,
>   struct commit_list *parent;
>   int edge_value;
>   uint32_t 

[PATCH v2 2/6] commit-graph write: add more progress output

2018-11-20 Thread Ævar Arnfjörð Bjarmason
Add more progress output to the output already added in
7b0f229222 ("commit-graph write: add progress output", 2018-09-17).

As noted in that commit most of the progress output isn't displayed on
small repositories, but before this change we'd noticeably hang for
2-3 seconds at the end on medium sized repositories such as linux.git.

Now we'll instead show output like this, and have no human-observable
point at which we're not producing progress output:

$ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
Finding commits for commit graph: 6365492, done.
Computing commit graph generation numbers: 100% (797222/797222), done.
Writing out commit graph: 2399912, done.

This "writing out" number is not meant to be meaningful to the user,
but just to show that we're doing work and the command isn't
hanging.

In the current implementation it's approximately 4x the number of
commits. As noted in on-list discussion[1] we could add the loops up
and show percentage progress here, but I don't think it's worth it. It
would make the implementation more complex and harder to maintain for
very little gain.

On a much larger in-house repository I have we'll show (note how we
also say "Annotating[...]"):

$ ~/g/git/git --exec-path=$HOME/g/git commit-graph write
Finding commits for commit graph: 50026015, done.
Annotating commit graph: 21567407, done.
Computing commit graph generation numbers: 100% (7144680/7144680), done.
Writing out commit graph: 21434417, done.

1. https://public-inbox.org/git/20181120165800.gb30...@szeder.dev/

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 commit-graph.c | 41 -
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index e6d0d7722b..6f6409b292 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -433,7 +433,9 @@ struct tree *get_commit_tree_in_graph(struct repository *r, 
const struct commit
 
 static void write_graph_chunk_fanout(struct hashfile *f,
 struct commit **commits,
-int nr_commits)
+int nr_commits,
+struct progress *progress,
+uint64_t *progress_cnt)
 {
int i, count = 0;
struct commit **list = commits;
@@ -445,6 +447,7 @@ static void write_graph_chunk_fanout(struct hashfile *f,
 */
for (i = 0; i < 256; i++) {
while (count < nr_commits) {
+   display_progress(progress, ++*progress_cnt);
if ((*list)->object.oid.hash[0] != i)
break;
count++;
@@ -456,12 +459,16 @@ static void write_graph_chunk_fanout(struct hashfile *f,
 }
 
 static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
-  struct commit **commits, int nr_commits)
+  struct commit **commits, int nr_commits,
+  struct progress *progress,
+  uint64_t *progress_cnt)
 {
struct commit **list = commits;
int count;
-   for (count = 0; count < nr_commits; count++, list++)
+   for (count = 0; count < nr_commits; count++, list++) {
+   display_progress(progress, ++*progress_cnt);
hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
+   }
 }
 
 static const unsigned char *commit_to_sha1(size_t index, void *table)
@@ -471,7 +478,9 @@ static const unsigned char *commit_to_sha1(size_t index, 
void *table)
 }
 
 static void write_graph_chunk_data(struct hashfile *f, int hash_len,
-  struct commit **commits, int nr_commits)
+  struct commit **commits, int nr_commits,
+  struct progress *progress,
+  uint64_t *progress_cnt)
 {
struct commit **list = commits;
struct commit **last = commits + nr_commits;
@@ -481,6 +490,7 @@ static void write_graph_chunk_data(struct hashfile *f, int 
hash_len,
struct commit_list *parent;
int edge_value;
uint32_t packedDate[2];
+   display_progress(progress, ++*progress_cnt);
 
parse_commit(*list);
hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len);
@@ -542,7 +552,9 @@ static void write_graph_chunk_data(struct hashfile *f, int 
hash_len,
 
 static void write_graph_chunk_large_edges(struct hashfile *f,
  struct commit **commits,
- int nr_commits)
+ int nr_commits,
+ struct progress *progress,
+ uint64_t *progress_cnt)
 {
struct