from:"Derrick Stolee"

[PATCH v6 10/14] commit-graph: close under reachability

2018-03-14 Thread Derrick Stolee

From: Derrick Stolee <dsto...@microsoft.com>

Teach write_commit_graph() to walk all parents from the commits
discovered in packfiles. This prevents gaps given by loose objects or
previously-missed packfiles.

Also automatically add commits from the existing graph file, if it
exists.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 commit-graph.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/commit-graph.c b/commit-graph.c
index 2f2e2c7083..fc7b4fa622 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -369,6 +369,28 @@ static int add_packed_commits(const struct object_id *oid,
return 0;
 }
 
+static void close_reachable(struct packed_oid_list *oids)
+{
+   int i;
+   struct rev_info revs;
+   struct commit *commit;
+   init_revisions(, NULL);
+   for (i = 0; i < oids->nr; i++) {
+   commit = lookup_commit(>list[i]);
+   if (commit && !parse_commit(commit))
+   revs.commits = commit_list_insert(commit, 
);
+   }
+
+   if (prepare_revision_walk())
+   die(_("revision walk setup failed"));
+
+   while ((commit = get_revision()) != NULL) {
+   ALLOC_GROW(oids->list, oids->nr + 1, oids->alloc);
+   oidcpy(>list[oids->nr], &(commit->object.oid));
+   (oids->nr)++;
+   }
+}
+
 void write_commit_graph(const char *obj_dir)
 {
struct packed_oid_list oids;
@@ -392,6 +414,7 @@ void write_commit_graph(const char *obj_dir)
ALLOC_ARRAY(oids.list, oids.alloc);
 
for_each_packed_object(add_packed_commits, , 0);
+   close_reachable();
 
QSORT(oids.list, oids.nr, commit_compare);
 
-- 
2.14.1

[PATCH v6 07/14] commit-graph: implement 'git-commit-graph write'

2018-03-14 Thread Derrick Stolee

From: Derrick Stolee <dsto...@microsoft.com>

Teach git-commit-graph to write graph files. Create new test script to verify
this command succeeds without failure.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 Documentation/git-commit-graph.txt |  39 
 builtin/commit-graph.c |  33 ++
 t/t5318-commit-graph.sh| 125 +
 3 files changed, 197 insertions(+)
 create mode 100755 t/t5318-commit-graph.sh

diff --git a/Documentation/git-commit-graph.txt 
b/Documentation/git-commit-graph.txt
index 5913340fad..e688843808 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -5,6 +5,45 @@ NAME
 
 git-commit-graph - Write and verify Git commit graph files
 
+
+SYNOPSIS
+
+[verse]
+'git commit-graph write'  [--object-dir ]
+
+
+DESCRIPTION
+---
+
+Manage the serialized commit graph file.
+
+
+OPTIONS
+---
+--object-dir::
+   Use given directory for the location of packfiles and commit graph
+   file. The commit graph file is expected to be at /info/commit-graph
+   and the packfiles are expected to be in /pack.
+
+
+COMMANDS
+
+'write'::
+
+Write a commit graph file based on the commits found in packfiles.
+Includes all commits from the existing commit graph file.
+
+
+EXAMPLES
+
+
+* Write a commit graph file for the packed commits in your local .git folder.
++
+
+$ git commit-graph write
+
+
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 8ff7336527..a9d61f649a 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -1,9 +1,18 @@
 #include "builtin.h"
 #include "config.h"
+#include "dir.h"
+#include "lockfile.h"
 #include "parse-options.h"
+#include "commit-graph.h"
 
 static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph [--object-dir ]"),
+   N_("git commit-graph write [--object-dir ]"),
+   NULL
+};
+
+static const char * const builtin_commit_graph_write_usage[] = {
+   N_("git commit-graph write [--object-dir ]"),
NULL
 };
 
@@ -11,6 +20,25 @@ static struct opts_commit_graph {
const char *obj_dir;
 } opts;
 
+static int graph_write(int argc, const char **argv)
+{
+   static struct option builtin_commit_graph_write_options[] = {
+   OPT_STRING(0, "object-dir", _dir,
+   N_("dir"),
+   N_("The object directory to store the graph")),
+   OPT_END(),
+   };
+
+   argc = parse_options(argc, argv, NULL,
+builtin_commit_graph_write_options,
+builtin_commit_graph_write_usage, 0);
+
+   if (!opts.obj_dir)
+   opts.obj_dir = get_object_directory();
+
+   write_commit_graph(opts.obj_dir);
+   return 0;
+}
 
 int cmd_commit_graph(int argc, const char **argv, const char *prefix)
 {
@@ -31,6 +59,11 @@ int cmd_commit_graph(int argc, const char **argv, const char 
*prefix)
 builtin_commit_graph_usage,
 PARSE_OPT_STOP_AT_NON_OPTION);
 
+   if (argc > 0) {
+   if (!strcmp(argv[0], "write"))
+   return graph_write(argc, argv);
+   }
+
usage_with_options(builtin_commit_graph_usage,
   builtin_commit_graph_options);
 }
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
new file mode 100755
index 00..43707ce5bb
--- /dev/null
+++ b/t/t5318-commit-graph.sh
@@ -0,0 +1,125 @@
+#!/bin/sh
+
+test_description='commit graph'
+. ./test-lib.sh
+
+test_expect_success 'setup full repo' '
+   mkdir full &&
+   cd "$TRASH_DIRECTORY/full" &&
+   git init &&
+   objdir=".git/objects"
+'
+
+test_expect_success 'write graph with no packs' '
+cd "$TRASH_DIRECTORY/full" &&
+   git commit-graph write --object-dir . &&
+   test_path_is_file info/commit-graph
+'
+
+test_expect_success 'create commits and repack' '
+cd "$TRASH_DIRECTORY/full" &&
+   for i in $(test_seq 3)
+   do
+   test_commit $i &&
+   git branch commits/$i
+   done &&
+   git repack
+'
+
+test_expect_success 'write graph' '
+cd "$TRASH_DIRECTORY/full" &&
+   graph1=$(git commit-graph write) &&
+   test_path_is_file $objdir/info/commit-graph
+'
+
+test_expect_success 'Add more commits' '
+cd "$TRASH_DIRECTORY/full" &&
+   git reset --hard commits/1 &&
+   for i in $(test_seq 4 5)

[PATCH v6 05/14] commit-graph: create git-commit-graph builtin

2018-03-14 Thread Derrick Stolee

From: Derrick Stolee <dsto...@microsoft.com>

Teach git the 'commit-graph' builtin that will be used for writing and
reading packed graph files. The current implementation is mostly
empty, except for an '--object-dir' option.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 .gitignore |  1 +
 Documentation/git-commit-graph.txt | 11 ++
 Makefile   |  1 +
 builtin.h  |  1 +
 builtin/commit-graph.c | 37 ++
 command-list.txt   |  1 +
 contrib/completion/git-completion.bash |  2 ++
 git.c  |  1 +
 8 files changed, 55 insertions(+)
 create mode 100644 Documentation/git-commit-graph.txt
 create mode 100644 builtin/commit-graph.c

diff --git a/.gitignore b/.gitignore
index 833ef3b0b7..e82f90184d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@
 /git-clone
 /git-column
 /git-commit
+/git-commit-graph
 /git-commit-tree
 /git-config
 /git-count-objects
diff --git a/Documentation/git-commit-graph.txt 
b/Documentation/git-commit-graph.txt
new file mode 100644
index 00..5913340fad
--- /dev/null
+++ b/Documentation/git-commit-graph.txt
@@ -0,0 +1,11 @@
+git-commit-graph(1)
+===
+
+NAME
+
+git-commit-graph - Write and verify Git commit graph files
+
+GIT
+---
+Part of the linkgit:git[1] suite
+
diff --git a/Makefile b/Makefile
index de4b8f0c02..a928d4de66 100644
--- a/Makefile
+++ b/Makefile
@@ -946,6 +946,7 @@ BUILTIN_OBJS += builtin/clone.o
 BUILTIN_OBJS += builtin/column.o
 BUILTIN_OBJS += builtin/commit-tree.o
 BUILTIN_OBJS += builtin/commit.o
+BUILTIN_OBJS += builtin/commit-graph.o
 BUILTIN_OBJS += builtin/config.o
 BUILTIN_OBJS += builtin/count-objects.o
 BUILTIN_OBJS += builtin/credential.o
diff --git a/builtin.h b/builtin.h
index 42378f3aa4..079855b6d4 100644
--- a/builtin.h
+++ b/builtin.h
@@ -149,6 +149,7 @@ extern int cmd_clone(int argc, const char **argv, const 
char *prefix);
 extern int cmd_clean(int argc, const char **argv, const char *prefix);
 extern int cmd_column(int argc, const char **argv, const char *prefix);
 extern int cmd_commit(int argc, const char **argv, const char *prefix);
+extern int cmd_commit_graph(int argc, const char **argv, const char *prefix);
 extern int cmd_commit_tree(int argc, const char **argv, const char *prefix);
 extern int cmd_config(int argc, const char **argv, const char *prefix);
 extern int cmd_count_objects(int argc, const char **argv, const char *prefix);
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
new file mode 100644
index 00..8ff7336527
--- /dev/null
+++ b/builtin/commit-graph.c
@@ -0,0 +1,37 @@
+#include "builtin.h"
+#include "config.h"
+#include "parse-options.h"
+
+static char const * const builtin_commit_graph_usage[] = {
+   N_("git commit-graph [--object-dir ]"),
+   NULL
+};
+
+static struct opts_commit_graph {
+   const char *obj_dir;
+} opts;
+
+
+int cmd_commit_graph(int argc, const char **argv, const char *prefix)
+{
+   static struct option builtin_commit_graph_options[] = {
+   OPT_STRING(0, "object-dir", _dir,
+   N_("dir"),
+   N_("The object directory to store the graph")),
+   OPT_END(),
+   };
+
+   if (argc == 2 && !strcmp(argv[1], "-h"))
+   usage_with_options(builtin_commit_graph_usage,
+  builtin_commit_graph_options);
+
+   git_config(git_default_config, NULL);
+   argc = parse_options(argc, argv, prefix,
+builtin_commit_graph_options,
+builtin_commit_graph_usage,
+PARSE_OPT_STOP_AT_NON_OPTION);
+
+   usage_with_options(builtin_commit_graph_usage,
+  builtin_commit_graph_options);
+}
+
diff --git a/command-list.txt b/command-list.txt
index a1fad28fd8..835c5890be 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -34,6 +34,7 @@ git-clean   mainporcelain
 git-clone   mainporcelain   init
 git-column  purehelpers
 git-commit  mainporcelain   history
+git-commit-graphplumbingmanipulators
 git-commit-tree plumbingmanipulators
 git-config  ancillarymanipulators
 git-count-objects   ancillaryinterrogators
diff --git a/contrib/completion/git-completion.bash 
b/contrib/completion/git-completion.bash
index 91536d831c..a24af902d8 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -841,6 +841,7 @@ __git_list_porcelain_commands ()
check-ref-format) : pl

[PATCH v6 01/14] csum-file: rename hashclose() to finalize_hashfile()

2018-03-14 Thread Derrick Stolee

From: Derrick Stolee <dsto...@microsoft.com>

The hashclose() method behaves very differently depending on the flags
parameter. In particular, the file descriptor is not always closed.

Perform a simple rename of "hashclose()" to "finalize_hashfile()" in
preparation for functional changes.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 builtin/index-pack.c   | 2 +-
 builtin/pack-objects.c | 6 +++---
 bulk-checkin.c | 4 ++--
 csum-file.c| 2 +-
 csum-file.h| 4 ++--
 fast-import.c  | 2 +-
 pack-bitmap-write.c| 2 +-
 pack-write.c   | 4 ++--
 8 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 59878e70b8..157bceb264 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1269,7 +1269,7 @@ static void conclude_pack(int fix_thin_pack, const char 
*curr_pack, unsigned cha
nr_objects - nr_objects_initial);
stop_progress_msg(, msg.buf);
strbuf_release();
-   hashclose(f, tail_hash, 0);
+   finalize_hashfile(f, tail_hash, 0);
hashcpy(read_hash, pack_hash);
fixup_pack_header_footer(output_fd, pack_hash,
 curr_pack, nr_objects,
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index a197926eaa..84e9f57b7f 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -837,11 +837,11 @@ static void write_pack_file(void)
 * If so, rewrite it like in fast-import
 */
if (pack_to_stdout) {
-   hashclose(f, oid.hash, CSUM_CLOSE);
+   finalize_hashfile(f, oid.hash, CSUM_CLOSE);
} else if (nr_written == nr_remaining) {
-   hashclose(f, oid.hash, CSUM_FSYNC);
+   finalize_hashfile(f, oid.hash, CSUM_FSYNC);
} else {
-   int fd = hashclose(f, oid.hash, 0);
+   int fd = finalize_hashfile(f, oid.hash, 0);
fixup_pack_header_footer(fd, oid.hash, pack_tmp_name,
 nr_written, oid.hash, offset);
close(fd);
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 9d87eac07b..227cc9f3b1 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -35,9 +35,9 @@ static void finish_bulk_checkin(struct bulk_checkin_state 
*state)
unlink(state->pack_tmp_name);
goto clear_exit;
} else if (state->nr_written == 1) {
-   hashclose(state->f, oid.hash, CSUM_FSYNC);
+   finalize_hashfile(state->f, oid.hash, CSUM_FSYNC);
} else {
-   int fd = hashclose(state->f, oid.hash, 0);
+   int fd = finalize_hashfile(state->f, oid.hash, 0);
fixup_pack_header_footer(fd, oid.hash, state->pack_tmp_name,
 state->nr_written, oid.hash,
 state->offset);
diff --git a/csum-file.c b/csum-file.c
index 5eda7fb6af..e6c95a6915 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -53,7 +53,7 @@ void hashflush(struct hashfile *f)
}
 }
 
-int hashclose(struct hashfile *f, unsigned char *result, unsigned int flags)
+int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int 
flags)
 {
int fd;
 
diff --git a/csum-file.h b/csum-file.h
index 992e5c0141..9ba87f0a6c 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -26,14 +26,14 @@ struct hashfile_checkpoint {
 extern void hashfile_checkpoint(struct hashfile *, struct hashfile_checkpoint 
*);
 extern int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *);
 
-/* hashclose flags */
+/* finalize_hashfile flags */
 #define CSUM_CLOSE 1
 #define CSUM_FSYNC 2
 
 extern struct hashfile *hashfd(int fd, const char *name);
 extern struct hashfile *hashfd_check(const char *name);
 extern struct hashfile *hashfd_throughput(int fd, const char *name, struct 
progress *tp);
-extern int hashclose(struct hashfile *, unsigned char *, unsigned int);
+extern int finalize_hashfile(struct hashfile *, unsigned char *, unsigned int);
 extern void hashwrite(struct hashfile *, const void *, unsigned int);
 extern void hashflush(struct hashfile *f);
 extern void crc32_begin(struct hashfile *);
diff --git a/fast-import.c b/fast-import.c
index 58ef360da4..2e5d17318d 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1016,7 +1016,7 @@ static void end_packfile(void)
struct tag *t;
 
close_pack_windows(pack_data);
-   hashclose(pack_file, cur_pack_oid.hash, 0);
+   finalize_hashfile(pack_file, cur_pack_oid.hash, 0);
fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,

[PATCH v6 12/14] commit-graph: read only from specific pack-indexes

2018-03-14 Thread Derrick Stolee

From: Derrick Stolee <dsto...@microsoft.com>

Teach git-commit-graph to inspect the objects only in a certain list
of pack-indexes within the given pack directory. This allows updating
the commit graph iteratively.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 Documentation/git-commit-graph.txt | 11 ++-
 builtin/commit-graph.c | 33 ++---
 commit-graph.c | 26 --
 commit-graph.h |  4 +++-
 packfile.c |  4 ++--
 packfile.h |  2 ++
 t/t5318-commit-graph.sh| 10 ++
 7 files changed, 81 insertions(+), 9 deletions(-)

diff --git a/Documentation/git-commit-graph.txt 
b/Documentation/git-commit-graph.txt
index 51cb038f3d..b945510f0f 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -32,7 +32,9 @@ COMMANDS
 'write'::
 
 Write a commit graph file based on the commits found in packfiles.
-Includes all commits from the existing commit graph file.
++
+With the `--stdin-packs` option, generate the new commit graph by
+walking objects only in the specified packfiles.
 
 'read'::
 
@@ -49,6 +51,13 @@ EXAMPLES
 $ git commit-graph write
 
 
+* Write a graph file, extending the current graph file using commits
+* in .
++
+
+$ echo  | git commit-graph write --stdin-packs
+
+
 * Read basic information from the commit-graph file.
 +
 
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 0e164becff..eebca57e6f 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -8,7 +8,7 @@
 static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph [--object-dir ]"),
N_("git commit-graph read [--object-dir ]"),
-   N_("git commit-graph write [--object-dir ]"),
+   N_("git commit-graph write [--object-dir ] [--stdin-packs]"),
NULL
 };
 
@@ -18,12 +18,13 @@ static const char * const builtin_commit_graph_read_usage[] 
= {
 };
 
 static const char * const builtin_commit_graph_write_usage[] = {
-   N_("git commit-graph write [--object-dir ]"),
+   N_("git commit-graph write [--object-dir ] [--stdin-packs]"),
NULL
 };
 
 static struct opts_commit_graph {
const char *obj_dir;
+   int stdin_packs;
 } opts;
 
 static int graph_read(int argc, const char **argv)
@@ -76,10 +77,18 @@ static int graph_read(int argc, const char **argv)
 
 static int graph_write(int argc, const char **argv)
 {
+   const char **pack_indexes = NULL;
+   int packs_nr = 0;
+   const char **lines = NULL;
+   int lines_nr = 0;
+   int lines_alloc = 0;
+
static struct option builtin_commit_graph_write_options[] = {
OPT_STRING(0, "object-dir", _dir,
N_("dir"),
N_("The object directory to store the graph")),
+   OPT_BOOL(0, "stdin-packs", _packs,
+   N_("scan packfiles listed by stdin for commits")),
OPT_END(),
};
 
@@ -90,7 +99,25 @@ static int graph_write(int argc, const char **argv)
if (!opts.obj_dir)
opts.obj_dir = get_object_directory();
 
-   write_commit_graph(opts.obj_dir);
+   if (opts.stdin_packs) {
+   struct strbuf buf = STRBUF_INIT;
+   lines_nr = 0;
+   lines_alloc = 128;
+   ALLOC_ARRAY(lines, lines_alloc);
+
+   while (strbuf_getline(, stdin) != EOF) {
+   ALLOC_GROW(lines, lines_nr + 1, lines_alloc);
+   lines[lines_nr++] = strbuf_detach(, NULL);
+   }
+
+   pack_indexes = lines;
+   packs_nr = lines_nr;
+   }
+
+   write_commit_graph(opts.obj_dir,
+  pack_indexes,
+  packs_nr);
+
return 0;
 }
 
diff --git a/commit-graph.c b/commit-graph.c
index 98e2b89b94..f0d7585ddb 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -529,7 +529,9 @@ static void close_reachable(struct packed_oid_list *oids)
}
 }
 
-void write_commit_graph(const char *obj_dir)
+void write_commit_graph(const char *obj_dir,
+   const char **pack_indexes,
+   int nr_packs)
 {
struct packed_oid_list oids;
struct packed_commit_list commits;
@@ -551,7 +553,27 @@ void write_commit_graph(const char *obj_dir)
oids.alloc = 1024;
ALLOC_ARRAY(oids.list, oids.alloc);
 
-   for_each_packed_object(add_packed_commits, , 0);
+   if (pack_indexes) {
+   struct strbuf packname = STRBUF_I

[PATCH v6 13/14] commit-graph: build graph from starting commits

2018-03-14 Thread Derrick Stolee

From: Derrick Stolee <dsto...@microsoft.com>

Teach git-commit-graph to read commits from stdin when the
--stdin-commits flag is specified. Commits reachable from these
commits are added to the graph. This is a much faster way to construct
the graph than inspecting all packed objects, but is restricted to
known tips.

For the Linux repository, 700,000+ commits were added to the graph
file starting from 'master' in 7-9 seconds, depending on the number
of packfiles in the repo (1, 24, or 120).

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 Documentation/git-commit-graph.txt | 14 +-
 builtin/commit-graph.c | 27 +--
 commit-graph.c | 27 +--
 commit-graph.h |  4 +++-
 t/t5318-commit-graph.sh| 13 +
 5 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/Documentation/git-commit-graph.txt 
b/Documentation/git-commit-graph.txt
index b945510f0f..0710a68f2d 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -34,7 +34,13 @@ COMMANDS
 Write a commit graph file based on the commits found in packfiles.
 +
 With the `--stdin-packs` option, generate the new commit graph by
-walking objects only in the specified packfiles.
+walking objects only in the specified packfiles. (Cannot be combined
+with --stdin-commits.)
++
+With the `--stdin-commits` option, generate the new commit graph by
+walking commits starting at the commits specified in stdin as a list
+of OIDs in hex, one OID per line. (Cannot be combined with
+--stdin-packs.)
 
 'read'::
 
@@ -58,6 +64,12 @@ $ git commit-graph write
 $ echo  | git commit-graph write --stdin-packs
 
 
+* Write a graph file containing all reachable commits.
++
+
+$ git show-ref -s | git commit-graph write --stdin-commits
+
+
 * Read basic information from the commit-graph file.
 +
 
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index eebca57e6f..1c7b7e72b0 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -8,7 +8,7 @@
 static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph [--object-dir ]"),
N_("git commit-graph read [--object-dir ]"),
-   N_("git commit-graph write [--object-dir ] [--stdin-packs]"),
+   N_("git commit-graph write [--object-dir ] 
[--stdin-packs|--stdin-commits]"),
NULL
 };
 
@@ -18,13 +18,14 @@ static const char * const builtin_commit_graph_read_usage[] 
= {
 };
 
 static const char * const builtin_commit_graph_write_usage[] = {
-   N_("git commit-graph write [--object-dir ] [--stdin-packs]"),
+   N_("git commit-graph write [--object-dir ] 
[--stdin-packs|--stdin-commits]"),
NULL
 };
 
 static struct opts_commit_graph {
const char *obj_dir;
int stdin_packs;
+   int stdin_commits;
 } opts;
 
 static int graph_read(int argc, const char **argv)
@@ -79,6 +80,8 @@ static int graph_write(int argc, const char **argv)
 {
const char **pack_indexes = NULL;
int packs_nr = 0;
+   const char **commit_hex = NULL;
+   int commits_nr = 0;
const char **lines = NULL;
int lines_nr = 0;
int lines_alloc = 0;
@@ -89,6 +92,8 @@ static int graph_write(int argc, const char **argv)
N_("The object directory to store the graph")),
OPT_BOOL(0, "stdin-packs", _packs,
N_("scan packfiles listed by stdin for commits")),
+   OPT_BOOL(0, "stdin-commits", _commits,
+   N_("start walk at commits listed by stdin")),
OPT_END(),
};
 
@@ -96,10 +101,12 @@ static int graph_write(int argc, const char **argv)
 builtin_commit_graph_write_options,
 builtin_commit_graph_write_usage, 0);
 
+   if (opts.stdin_packs && opts.stdin_commits)
+   die(_("cannot use both --stdin-commits and --stdin-packs"));
if (!opts.obj_dir)
opts.obj_dir = get_object_directory();
 
-   if (opts.stdin_packs) {
+   if (opts.stdin_packs || opts.stdin_commits) {
struct strbuf buf = STRBUF_INIT;
lines_nr = 0;
lines_alloc = 128;
@@ -110,13 +117,21 @@ static int graph_write(int argc, const char **argv)
lines[lines_nr++] = strbuf_detach(, NULL);
}
 
-   pack_indexes = lines;
-   packs_nr = lines_nr;
+   if (opts.stdin_packs) {
+   pack_indexes = lines;
+

[PATCH v6 09/14] commit-graph: add core.commitGraph setting

2018-03-14 Thread Derrick Stolee

From: Derrick Stolee <dsto...@microsoft.com>

The commit graph feature is controlled by the new core.commitGraph config
setting. This defaults to 0, so the feature is opt-in.

The intention of core.commitGraph is that a user can always stop checking
for or parsing commit graph files if core.commitGraph=0.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 Documentation/config.txt | 3 +++
 cache.h  | 1 +
 config.c | 5 +
 environment.c| 1 +
 4 files changed, 10 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ce9102cea8..9e3da629b8 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -898,6 +898,9 @@ core.notesRef::
 This setting defaults to "refs/notes/commits", and it can be overridden by
 the `GIT_NOTES_REF` environment variable.  See linkgit:git-notes[1].
 
+core.commitGraph::
+   Enable git commit graph feature. Allows reading from .graph files.
+
 core.sparseCheckout::
Enable "sparse checkout" feature. See section "Sparse checkout" in
linkgit:git-read-tree[1] for more information.
diff --git a/cache.h b/cache.h
index d06932ed0b..e62569fbb1 100644
--- a/cache.h
+++ b/cache.h
@@ -801,6 +801,7 @@ extern char *git_replace_ref_base;
 
 extern int fsync_object_files;
 extern int core_preload_index;
+extern int core_commit_graph;
 extern int core_apply_sparse_checkout;
 extern int precomposed_unicode;
 extern int protect_hfs;
diff --git a/config.c b/config.c
index b0c20e6cb8..25ee4a676c 100644
--- a/config.c
+++ b/config.c
@@ -1226,6 +1226,11 @@ static int git_default_core_config(const char *var, 
const char *value)
return 0;
}
 
+   if (!strcmp(var, "core.commitgraph")) {
+   core_commit_graph = git_config_bool(var, value);
+   return 0;
+   }
+
if (!strcmp(var, "core.sparsecheckout")) {
core_apply_sparse_checkout = git_config_bool(var, value);
return 0;
diff --git a/environment.c b/environment.c
index d6dd64662c..8853e2f0dd 100644
--- a/environment.c
+++ b/environment.c
@@ -62,6 +62,7 @@ enum push_default_type push_default = 
PUSH_DEFAULT_UNSPECIFIED;
 enum object_creation_mode object_creation_mode = OBJECT_CREATION_MODE;
 char *notes_ref_name;
 int grafts_replace_parents = 1;
+int core_commit_graph;
 int core_apply_sparse_checkout;
 int merge_log_config = -1;
 int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */
-- 
2.14.1

[PATCH v6 14/14] commit-graph: implement "--additive" option

2018-03-14 Thread Derrick Stolee

From: Derrick Stolee <dsto...@microsoft.com>

Teach git-commit-graph to add all commits from the existing
commit-graph file to the file about to be written. This should be
used when adding new commits without performing garbage collection.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 Documentation/git-commit-graph.txt | 10 ++
 builtin/commit-graph.c | 10 +++---
 commit-graph.c | 17 -
 commit-graph.h |  3 ++-
 t/t5318-commit-graph.sh| 10 ++
 5 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/Documentation/git-commit-graph.txt 
b/Documentation/git-commit-graph.txt
index 0710a68f2d..ccf5e203ce 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -41,6 +41,9 @@ With the `--stdin-commits` option, generate the new commit 
graph by
 walking commits starting at the commits specified in stdin as a list
 of OIDs in hex, one OID per line. (Cannot be combined with
 --stdin-packs.)
++
+With the `--additive` option, include all commits that are present
+in the existing commit-graph file.
 
 'read'::
 
@@ -70,6 +73,13 @@ $ echo  | git commit-graph write --stdin-packs
 $ git show-ref -s | git commit-graph write --stdin-commits
 
 
+* Write a graph file containing all commits in the current
+* commit-graph file along with those reachable from HEAD.
++
+
+$ git rev-parse HEAD | git commit-graph write --stdin-commits --additive
+
+
 * Read basic information from the commit-graph file.
 +
 
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 1c7b7e72b0..d26a6d6de3 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -8,7 +8,7 @@
 static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph [--object-dir ]"),
N_("git commit-graph read [--object-dir ]"),
-   N_("git commit-graph write [--object-dir ] 
[--stdin-packs|--stdin-commits]"),
+   N_("git commit-graph write [--object-dir ] [--additive] 
[--stdin-packs|--stdin-commits]"),
NULL
 };
 
@@ -18,7 +18,7 @@ static const char * const builtin_commit_graph_read_usage[] = 
{
 };
 
 static const char * const builtin_commit_graph_write_usage[] = {
-   N_("git commit-graph write [--object-dir ] 
[--stdin-packs|--stdin-commits]"),
+   N_("git commit-graph write [--object-dir ] [--additive] 
[--stdin-packs|--stdin-commits]"),
NULL
 };
 
@@ -26,6 +26,7 @@ static struct opts_commit_graph {
const char *obj_dir;
int stdin_packs;
int stdin_commits;
+   int additive;
 } opts;
 
 static int graph_read(int argc, const char **argv)
@@ -94,6 +95,8 @@ static int graph_write(int argc, const char **argv)
N_("scan packfiles listed by stdin for commits")),
OPT_BOOL(0, "stdin-commits", _commits,
N_("start walk at commits listed by stdin")),
+   OPT_BOOL(0, "additive", ,
+   N_("include all commits already in the commit-graph 
file")),
OPT_END(),
};
 
@@ -131,7 +134,8 @@ static int graph_write(int argc, const char **argv)
   pack_indexes,
   packs_nr,
   commit_hex,
-  commits_nr);
+  commits_nr,
+  opts.additive);
 
return 0;
 }
diff --git a/commit-graph.c b/commit-graph.c
index 9f1ba9bff6..6348bab82b 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -533,7 +533,8 @@ void write_commit_graph(const char *obj_dir,
const char **pack_indexes,
int nr_packs,
const char **commit_hex,
-   int nr_commits)
+   int nr_commits,
+   int additive)
 {
struct packed_oid_list oids;
struct packed_commit_list commits;
@@ -551,10 +552,24 @@ void write_commit_graph(const char *obj_dir,
oids.nr = 0;
oids.alloc = approximate_object_count() / 4;
 
+   if (additive) {
+   prepare_commit_graph_one(obj_dir);
+   if (commit_graph)
+   oids.alloc += commit_graph->num_commits;
+   }
+
if (oids.alloc < 1024)
oids.alloc = 1024;
ALLOC_ARRAY(oids.list, oids.alloc);
 
+   if (additive && commit_graph) {
+   for (i = 0; i < commit_graph->num_commits; i++) {
+   const unsigned char *hash = 
commit_graph->chunk_oid_lookup +
+   com

[PATCH 1/3] commit: create get_commit_tree() method

2018-04-03 Thread Derrick Stolee

While walking the commit graph, we load struct commit objects into
the object cache. During this process, we also load struct tree
objects for the root tree of each of these commits. We load these
objects even if we are only computing commit reachability information,
such as a merge base or ahead/behind information.

Create get_commit_tree() as a first step to removing direct
references to the 'tree' member of struct commit.

Create get_commit_tree_oid() as a shortcut for several references
to ">tree->object.oid" in the codebase.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 commit.c | 10 ++
 commit.h |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/commit.c b/commit.c
index 3e39c86abf..d65c7b3b47 100644
--- a/commit.c
+++ b/commit.c
@@ -296,6 +296,16 @@ void free_commit_buffer(struct commit *commit)
}
 }
 
+struct tree *get_commit_tree(const struct commit *commit)
+{
+   return commit->tree;
+}
+
+struct object_id *get_commit_tree_oid(const struct commit *commit)
+{
+   return >tree->object.oid;
+}
+
 const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep)
 {
struct commit_buffer *v = buffer_slab_peek(_slab, commit);
diff --git a/commit.h b/commit.h
index e57ae4b583..fa79cc4d1f 100644
--- a/commit.h
+++ b/commit.h
@@ -102,6 +102,9 @@ void unuse_commit_buffer(const struct commit *, const void 
*buffer);
  */
 void free_commit_buffer(struct commit *);
 
+struct tree *get_commit_tree(const struct commit *);
+struct object_id *get_commit_tree_oid(const struct commit *);
+
 /*
  * Disassociate any cached object buffer from the commit, but do not free it.
  * The buffer (or NULL, if none) is returned.
-- 
2.17.0.20.g9f30ba16e1

[PATCH 3/3] commit-graph: lazy-load trees

2018-04-03 Thread Derrick Stolee

The commit-graph file provides quick access to commit data, including
the OID of the root tree for each commit in the graph. When performing
a deep commit-graph walk, we may not need to load most of the trees
for these commits.

Delay loading the tree object for a commit loaded from the graph
until requested via get_commit_tree(). Do not lazy-load trees for
commits not in the graph, since that requires duplicate parsing
and the relative peformance improvement when trees are not needed
is small.

On the Linux repository, performance tests were run for the following
command:

git log --graph --oneline -1000

Before: 0.83s
After:  0.65s
Rel %: -21.6%

Adding '-- kernel/' to the command requires loading the root tree
for every commit that is walked. There was no measureable performance
change as a result of this patch.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 commit-graph.c | 25 ++---
 commit-graph.h |  7 +++
 commit.c   | 10 --
 3 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 3080a87940..a3eeb25f22 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -247,7 +247,6 @@ static struct commit_list **insert_parent_or_die(struct 
commit_graph *g,
 
 static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, 
uint32_t pos)
 {
-   struct object_id oid;
uint32_t edge_value;
uint32_t *parent_data_ptr;
uint64_t date_low, date_high;
@@ -257,8 +256,7 @@ static int fill_commit_in_graph(struct commit *item, struct 
commit_graph *g, uin
item->object.parsed = 1;
item->graph_pos = pos;
 
-   hashcpy(oid.hash, commit_data);
-   item->tree = lookup_tree();
+   item->tree = NULL;
 
date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
date_low = get_be32(commit_data + g->hash_len + 12);
@@ -317,6 +315,27 @@ int parse_commit_in_graph(struct commit *item)
return 0;
 }
 
+static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit 
*c)
+{
+   struct object_id oid;
+   const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len 
+ 16) * (c->graph_pos);
+
+   hashcpy(oid.hash, commit_data);
+   c->tree = lookup_tree();
+
+   return c->tree;
+}
+
+struct tree *get_commit_tree_in_graph(const struct commit *c)
+{
+   if (c->tree)
+   return c->tree;
+   if (c->graph_pos == COMMIT_NOT_FROM_GRAPH)
+   BUG("get_commit_tree_in_graph called from non-commit-graph 
commit");
+
+   return load_tree_for_commit(commit_graph, (struct commit *)c);
+}
+
 static void write_graph_chunk_fanout(struct hashfile *f,
 struct commit **commits,
 int nr_commits)
diff --git a/commit-graph.h b/commit-graph.h
index e1d8580c98..3ab45818e2 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -17,6 +17,13 @@ char *get_commit_graph_filename(const char *obj_dir);
  */
 int parse_commit_in_graph(struct commit *item);
 
+/*
+ * For performance reasons, a commit loaded from the graph does not
+ * have a tree loaded until trying to consume it for the first time.
+ * Load that tree into the commit and return the object.
+ */
+struct tree *get_commit_tree_in_graph(const struct commit *c);
+
 struct commit_graph {
int graph_fd;
 
diff --git a/commit.c b/commit.c
index d65c7b3b47..d4293ae8f6 100644
--- a/commit.c
+++ b/commit.c
@@ -298,12 +298,18 @@ void free_commit_buffer(struct commit *commit)
 
 struct tree *get_commit_tree(const struct commit *commit)
 {
-   return commit->tree;
+   if (commit->tree || !commit->object.parsed)
+   return commit->tree;
+
+   if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH)
+   BUG("commit has NULL tree, but was not loaded from 
commit-graph");
+
+   return get_commit_tree_in_graph(commit);
 }
 
 struct object_id *get_commit_tree_oid(const struct commit *commit)
 {
-   return >tree->object.oid;
+   return _commit_tree(commit)->object.oid;
 }
 
 const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep)
-- 
2.17.0.20.g9f30ba16e1

Re: [PATCH v7 08/14] commit-graph: implement git commit-graph read

2018-04-03 Thread Derrick Stolee


On 4/2/2018 5:33 PM, Junio C Hamano wrote:

Derrick Stolee <sto...@gmail.com> writes:


From: Derrick Stolee <dsto...@microsoft.com>
...
+static int graph_read(int argc, const char **argv)
+{
+   struct commit_graph *graph = 0;

The previous round said NULL above, not 0, and NULL is the better
way to spell it, I would think.


Sorry about that. Hopefully it is easy to squash.

[PATCH 2/3] treewide: use get_commit_tree() for tree access

2018-04-03 Thread Derrick Stolee

Replace all direct accesses of the 'tree' member in 'struct commit'
with calls to get_commit_tree() or get_commit_tree_oid().

This patch was constructed starting with the following Coccinelle
script, then removing false-positives:

@@
expression c;
@@
- >tree->object.oid
+ get_commit_tree_oid(c)

@@
expression c;
symbol m;
@@
- c->tree->object.oid.m
+ get_commit_tree_oid(c)->m

@@
expression c;
@@
- c->tree
+ get_commit_tree(c)

To ensure all references were removed, the 'tree' member was renamed
to 'tree_renamed' along with the few allowed accessors. A successful
compilation demonstrated a correct transformation.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 blame.c   | 18 +-
 builtin/checkout.c| 17 +
 builtin/diff.c|  2 +-
 builtin/fast-export.c |  6 +++---
 builtin/log.c |  4 ++--
 builtin/reflog.c  |  2 +-
 commit-graph.c|  2 +-
 fsck.c|  8 +---
 http-push.c   |  2 +-
 line-log.c|  4 ++--
 list-objects.c| 10 +-
 log-tree.c|  6 +++---
 merge-recursive.c |  3 ++-
 notes-merge.c |  8 
 packfile.c|  2 +-
 pretty.c  |  5 +++--
 ref-filter.c  |  2 +-
 revision.c|  8 
 sequencer.c   | 12 ++--
 sha1_name.c   |  2 +-
 tree.c|  4 ++--
 walker.c  |  2 +-
 22 files changed, 67 insertions(+), 62 deletions(-)

diff --git a/blame.c b/blame.c
index 200e0ad9a2..7f5700b324 100644
--- a/blame.c
+++ b/blame.c
@@ -553,10 +553,10 @@ static struct blame_origin *find_origin(struct commit 
*parent,
diff_setup_done(_opts);
 
if (is_null_oid(>commit->object.oid))
-   do_diff_cache(>tree->object.oid, _opts);
+   do_diff_cache(get_commit_tree_oid(parent), _opts);
else
-   diff_tree_oid(>tree->object.oid,
- >commit->tree->object.oid,
+   diff_tree_oid(get_commit_tree_oid(parent),
+ get_commit_tree_oid(origin->commit),
  "", _opts);
diffcore_std(_opts);
 
@@ -622,10 +622,10 @@ static struct blame_origin *find_rename(struct commit 
*parent,
diff_setup_done(_opts);
 
if (is_null_oid(>commit->object.oid))
-   do_diff_cache(>tree->object.oid, _opts);
+   do_diff_cache(get_commit_tree_oid(parent), _opts);
else
-   diff_tree_oid(>tree->object.oid,
- >commit->tree->object.oid,
+   diff_tree_oid(get_commit_tree_oid(parent),
+ get_commit_tree_oid(origin->commit),
  "", _opts);
diffcore_std(_opts);
 
@@ -1257,10 +1257,10 @@ static void find_copy_in_parent(struct blame_scoreboard 
*sb,
diff_opts.flags.find_copies_harder = 1;
 
if (is_null_oid(>commit->object.oid))
-   do_diff_cache(>tree->object.oid, _opts);
+   do_diff_cache(get_commit_tree_oid(parent), _opts);
else
-   diff_tree_oid(>tree->object.oid,
- >commit->tree->object.oid,
+   diff_tree_oid(get_commit_tree_oid(parent),
+ get_commit_tree_oid(target->commit),
  "", _opts);
 
if (!diff_opts.flags.find_copies_harder)
diff --git a/builtin/checkout.c b/builtin/checkout.c
index d76e13c852..0b448fd179 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -484,7 +484,8 @@ static int merge_working_tree(const struct checkout_opts 
*opts,
 
resolve_undo_clear();
if (opts->force) {
-   ret = reset_tree(new_branch_info->commit->tree, opts, 1, 
writeout_error);
+   ret = reset_tree(get_commit_tree(new_branch_info->commit),
+opts, 1, writeout_error);
if (ret)
return ret;
} else {
@@ -570,19 +571,19 @@ static int merge_working_tree(const struct checkout_opts 
*opts,
o.verbosity = 0;
work = write_tree_from_memory();
 
-   ret = reset_tree(new_branch_info->commit->tree, opts, 1,
-writeout_error);
+   ret = 
reset_tree(get_commit_tree(new_branch_info->commit),
+opts, 1, writeout_error);
if (ret)
return ret;
o.ancestor = old_branch_info->name;
o.branch1 = new_branch_info->name;
o.branch2 = "local";
-   ret = merge_trees(,

[PATCH 0/3] Lazy-load trees when reading commit-graph

2018-04-03 Thread Derrick Stolee

There are several commit-graph walks that require loading many commits
but never walk the trees reachable from those commits. However, the
current logic in parse_commit() requires the root tree to be loaded.
This only uses lookup_tree(), but when reading commits from the commit-
graph file, the hashcpy() to load the root tree hash and the time spent
checking the object cache take more time than parsing the rest of the
commit.

In this patch series, all direct references to accessing the 'tree'
member of struct commit are replaced instead by one of the following
methods:

struct tree *get_commit_tree(struct commit *)
struct object_id *get_commit_tree_oid(struct commit *)

This replacement was assisted by a Coccinelle script, but the 'tree'
member is overloaded in other types, so the script gave false-positives
that were removed from the diff.

After all access is restricted to use these methods, we can then
change the postcondition of parse_commit_in_graph() to allow 'tree'
to be NULL. If the tree is accessed later, we can load the tree's
OID from the commit-graph in constant time and perform the lookup_tree().

On the Linux repository, performance tests were run for the following
command:

git log --graph --oneline -1000

Before: 0.83s
After:  0.65s
Rel %: -21.6%

Adding '-- kernel/' to the command requires loading the root tree
for every commit that is walked. There was no measureable performance
change as a result of this patch.

This patch series depends on v7 of ds/commit-graph.

Derrick Stolee (3):
  commit: create get_commit_tree() method
  treewide: use get_commit_tree() for tree access
  commit-graph: lazy-load trees

 blame.c   | 18 +-
 builtin/checkout.c| 17 +
 builtin/diff.c|  2 +-
 builtin/fast-export.c |  6 +++---
 builtin/log.c |  4 ++--
 builtin/reflog.c  |  2 +-
 commit-graph.c| 27 +++
 commit-graph.h|  7 +++
 commit.c  | 16 
 commit.h  |  3 +++
 fsck.c|  8 +---
 http-push.c   |  2 +-
 line-log.c|  4 ++--
 list-objects.c| 10 +-
 log-tree.c|  6 +++---
 merge-recursive.c |  3 ++-
 notes-merge.c |  8 
 packfile.c|  2 +-
 pretty.c  |  5 +++--
 ref-filter.c  |  2 +-
 revision.c|  8 
 sequencer.c   | 12 ++--
 sha1_name.c   |  2 +-
 tree.c|  4 ++--
 walker.c  |  2 +-
 25 files changed, 115 insertions(+), 65 deletions(-)

-- 
2.17.0.20.g9f30ba16e1

Re: [PATCH v2 1/5] core.aheadbehind: add new config setting

2018-04-03 Thread Derrick Stolee

On 4/3/2018 6:18 AM, Ævar Arnfjörð Bjarmason wrote:

On Tue, Apr 03 2018, Lars Schneider wrote:

What is the state of this series? I can't find it in git/git nor in
git-for-windows/git. I think Stolee mentioned the config in
his Git Merge talk [1] and I was about to test it/roll it out :-)

It's in the gvfs branch of g...@github.com:Microsoft/git.git, i.e. it's
not in Git for Windows, but used in Microsoft's own in-house version
used for Windows.git.

Thanks for adding me to CC. I mentioned it in my talk because that was
one thing we shipped internally as a "quick fix" until we could do the
right thing.

If I remember correctly, Jeff abandoned shipping this upstream because
it did have the feel of a hack and we wanted to see if users used the
config setting or really cared about the output values. We saw fast
adoption of the feature and even turned the config setting on
automatically in the following version of GVFS.

I may be misunderstanding this feature, but my impression was that it
was a kludge as a workaround until the commit graph code landed, because
once we have that then surely we can just cheaply report the actual (or
approximate?) number in the common case, but of course it may still be
slow if your commit graph file is out of date.

You are correct that the commit-graph file may be out of date, causing
slower performance. Even worse: the current graph patch only provides a
constant-multiple speedup (still walking the same number of commits, but
each commit is parsed much faster).

Speaking of our GVFS-specific fork [0], the 'gvfs' branch was updated
just yesterday with a couple of changes that I am prepping for
submission upstream:

* Lazy-load trees when parsing commits from commit-graph [1]
* Compute and consume generation numbers [2]

Each of these will speed up this ahead/behind calculation in different
ways. [1] makes the cost of loading each commit a bit faster, saving up
to 20% overall. [2] uses generation numbers in paint_down_to_common() to
make the while() condition O(1) instead of O(Q) where Q is the size of
the priority queue. The Windows repo is particularly "wide" with many
parallel branches being merged in complicated ways, so the queue becomes
quite large. This use of generation numbers saves about 4% on some
ahead/behind calculations. This speedup is modest, but the existing code
already made good use of limiting the commit walk to be mostly the
"important" commits.

The real benefit of generation numbers will manifest in a way to make
--topo-order much faster when rendering a small number of commits.

The generation numbers _could_ be used to approximate the ahead/behind
calculation in the following way: When comparing A and B, and gen(A) <
gen(B), then A is at least (gen(B) - gen(A)) behind. That's the only
information that can be gathered directly from those values, but may be
enough to short circuit an exact count.

To truly accelerate these ahead/behind calculations to be sub-linear* in
the ahead/behind counts, we would need a bitmap-based approach. The
object-reachability bitmap is a non-starter for client machines in the
Windows repo, but perhaps a commit-reachability bitmap could be
interesting. Performing set operations on the bitmaps could more quickly
answer these questions. Just thinking about it makes me want to go down
a deep rabbit hole, investigating ways to compute, store, and use these
bitmaps. However: let's wait and see how necessary it is as the
commit-graph feature stabilizes. (*These bitmap approaches are not
guaranteed to be sub-linear, because it may include iterating through a
list of O(N) bits, but good run-length encodings will likely make the
count operation very fast, even with a set-difference operation included.)

There are too many fun things to work on, not enough time!

Thanks,
-Stolee

[0] https://github.com/microsoft/git
Fork of GitForWindows that ships to Windows developers

[1]
https://github.com/Microsoft/git/commit/29114bf86f591f5c87075f779a1faa2d0f17b92f
Lazy-load trees when parsing commits from commit-graph
(accidentally squashed to one commit)

[2]
https://github.com/microsoft/git/compare/879b7d3b1bddea2587b28cdd656c9c655018683a...a0731ca93a35fd042560c4b30e8e0edbdfa4bf9f

Compute and consume generation numbers

Re: [PATCH 0/3] Lazy-load trees when reading commit-graph

2018-04-03 Thread Derrick Stolee


On 4/3/2018 8:00 AM, Derrick Stolee wrote:

There are several commit-graph walks that require loading many commits
but never walk the trees reachable from those commits. However, the
current logic in parse_commit() requires the root tree to be loaded.
This only uses lookup_tree(), but when reading commits from the commit-
graph file, the hashcpy() to load the root tree hash and the time spent
checking the object cache take more time than parsing the rest of the
commit.

In this patch series, all direct references to accessing the 'tree'
member of struct commit are replaced instead by one of the following
methods:

struct tree *get_commit_tree(struct commit *)
struct object_id *get_commit_tree_oid(struct commit *)

This replacement was assisted by a Coccinelle script, but the 'tree'
member is overloaded in other types, so the script gave false-positives
that were removed from the diff.

After all access is restricted to use these methods, we can then
change the postcondition of parse_commit_in_graph() to allow 'tree'
to be NULL. If the tree is accessed later, we can load the tree's
OID from the commit-graph in constant time and perform the lookup_tree().

On the Linux repository, performance tests were run for the following
command:

 git log --graph --oneline -1000

Before: 0.83s
After:  0.65s
Rel %: -21.6%

Adding '-- kernel/' to the command requires loading the root tree
for every commit that is walked. There was no measureable performance
change as a result of this patch.

This patch series depends on v7 of ds/commit-graph.

Derrick Stolee (3):
   commit: create get_commit_tree() method
   treewide: use get_commit_tree() for tree access
   commit-graph: lazy-load trees



This patch series is also available as a GitHub pull request [1]

[1] https://github.com/derrickstolee/git/pull/4

[PATCH v2 2/4] commit: create get_commit_tree() method

2018-04-06 Thread Derrick Stolee

While walking the commit graph, we load struct commit objects into
the object cache. During this process, we also load struct tree
objects for the root tree of each of these commits. We load these
objects even if we are only computing commit reachability information,
such as a merge base or ahead/behind information.

Create get_commit_tree() as a first step to removing direct
references to the 'maybe_tree' member of struct commit.

Create get_commit_tree_oid() as a shortcut for several references
to ">maybe_tree->object.oid" in the codebase.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 commit.c | 10 ++
 commit.h |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/commit.c b/commit.c
index fbc092808c..aea2ca1f8b 100644
--- a/commit.c
+++ b/commit.c
@@ -296,6 +296,16 @@ void free_commit_buffer(struct commit *commit)
}
 }
 
+struct tree *get_commit_tree(const struct commit *commit)
+{
+   return commit->maybe_tree;
+}
+
+struct object_id *get_commit_tree_oid(const struct commit *commit)
+{
+   return _commit_tree(commit)->object.oid;
+}
+
 const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep)
 {
struct commit_buffer *v = buffer_slab_peek(_slab, commit);
diff --git a/commit.h b/commit.h
index c4d6e6e064..dc4bf97d9f 100644
--- a/commit.h
+++ b/commit.h
@@ -102,6 +102,9 @@ void unuse_commit_buffer(const struct commit *, const void 
*buffer);
  */
 void free_commit_buffer(struct commit *);
 
+struct tree *get_commit_tree(const struct commit *);
+struct object_id *get_commit_tree_oid(const struct commit *);
+
 /*
  * Disassociate any cached object buffer from the commit, but do not free it.
  * The buffer (or NULL, if none) is returned.
-- 
2.17.0

[PATCH v2 1/4] treewide: rename tree to maybe_tree

2018-04-06 Thread Derrick Stolee

Using the commit-graph file to walk commit history removes the large
cost of parsing commits during the walk. This exposes a performance
issue: lookup_tree() takes a large portion of the computation time,
even when Git never uses those trees.

In anticipation of lazy-loading these trees, rename the 'tree' member
of struct commit to 'maybe_tree'. This serves two purposes: it hints
at the future role of possibly being NULL even if the commit has a
valid tree, and it allows for unambiguous transformation from simple
member access (i.e. commit->maybe_tree) to method access.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 blame.c   | 18 +-
 builtin/checkout.c| 12 ++--
 builtin/diff.c|  2 +-
 builtin/fast-export.c |  6 +++---
 builtin/log.c |  4 ++--
 builtin/reflog.c  |  2 +-
 commit-graph.c|  4 ++--
 commit.c  |  2 +-
 commit.h  |  2 +-
 fsck.c|  6 +++---
 http-push.c   |  2 +-
 line-log.c|  4 ++--
 list-objects.c| 10 +-
 log-tree.c|  6 +++---
 merge-recursive.c |  5 +++--
 notes-merge.c |  8 
 packfile.c|  2 +-
 pretty.c  |  4 ++--
 ref-filter.c  |  2 +-
 revision.c|  8 
 sequencer.c   | 12 ++--
 sha1_name.c   |  2 +-
 tree.c|  4 ++--
 walker.c  |  2 +-
 24 files changed, 65 insertions(+), 64 deletions(-)

diff --git a/blame.c b/blame.c
index 200e0ad9a2..b78e649cac 100644
--- a/blame.c
+++ b/blame.c
@@ -553,10 +553,10 @@ static struct blame_origin *find_origin(struct commit 
*parent,
diff_setup_done(_opts);
 
if (is_null_oid(>commit->object.oid))
-   do_diff_cache(>tree->object.oid, _opts);
+   do_diff_cache(>maybe_tree->object.oid, _opts);
else
-   diff_tree_oid(>tree->object.oid,
- >commit->tree->object.oid,
+   diff_tree_oid(>maybe_tree->object.oid,
+ >commit->maybe_tree->object.oid,
  "", _opts);
diffcore_std(_opts);
 
@@ -622,10 +622,10 @@ static struct blame_origin *find_rename(struct commit 
*parent,
diff_setup_done(_opts);
 
if (is_null_oid(>commit->object.oid))
-   do_diff_cache(>tree->object.oid, _opts);
+   do_diff_cache(>maybe_tree->object.oid, _opts);
else
-   diff_tree_oid(>tree->object.oid,
- >commit->tree->object.oid,
+   diff_tree_oid(>maybe_tree->object.oid,
+ >commit->maybe_tree->object.oid,
  "", _opts);
diffcore_std(_opts);
 
@@ -1257,10 +1257,10 @@ static void find_copy_in_parent(struct blame_scoreboard 
*sb,
diff_opts.flags.find_copies_harder = 1;
 
if (is_null_oid(>commit->object.oid))
-   do_diff_cache(>tree->object.oid, _opts);
+   do_diff_cache(>maybe_tree->object.oid, _opts);
else
-   diff_tree_oid(>tree->object.oid,
- >commit->tree->object.oid,
+   diff_tree_oid(>maybe_tree->object.oid,
+ >commit->maybe_tree->object.oid,
  "", _opts);
 
if (!diff_opts.flags.find_copies_harder)
diff --git a/builtin/checkout.c b/builtin/checkout.c
index d76e13c852..b15fed5d85 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -484,7 +484,7 @@ static int merge_working_tree(const struct checkout_opts 
*opts,
 
resolve_undo_clear();
if (opts->force) {
-   ret = reset_tree(new_branch_info->commit->tree, opts, 1, 
writeout_error);
+   ret = reset_tree(new_branch_info->commit->maybe_tree, opts, 1, 
writeout_error);
if (ret)
return ret;
} else {
@@ -570,18 +570,18 @@ static int merge_working_tree(const struct checkout_opts 
*opts,
o.verbosity = 0;
work = write_tree_from_memory();
 
-   ret = reset_tree(new_branch_info->commit->tree, opts, 1,
+   ret = reset_tree(new_branch_info->commit->maybe_tree, 
opts, 1,
 writeout_error);
if (ret)
return ret;
o.ancestor = old_branch_info->name;
o.branch1 = new_branch_info->name;
o.branch2 = "local";
-   ret = merge_trees(, new_branch_info->commit->tree, 
work,
-

[PATCH v2 0/4] Lazy-load trees when reading commit-graph

2018-04-06 Thread Derrick Stolee

There are several commit-graph walks that require loading many commits
but never walk the trees reachable from those commits. However, the
current logic in parse_commit() requires the root tree to be loaded.
This only uses lookup_tree(), but when reading commits from the commit-
graph file, the hashcpy() to load the root tree hash and the time spent
checking the object cache take more time than parsing the rest of the
commit.

In this patch series, all direct references to accessing the 'tree'
member of struct commit are replaced instead by one of the following
methods:

struct tree *get_commit_tree(struct commit *)
struct object_id *get_commit_tree_oid(struct commit *)

This replacement was assisted by a Coccinelle script, but the 'tree'
member is overloaded in other types, so we first rename the 'tree'
member to 'maybe_tree' and use the compiler to ensure we caught all
examples. Then, contrib/coccinelle/commit.cocci generates the patch
to replace all accessors of 'maybe_tree' to the methods above.

After all access is restricted to use these methods, we can then
change the postcondition of parse_commit_in_graph() to allow 'maybe_tree'
to be NULL. If the tree is accessed later, we can load the tree's
OID from the commit-graph in constant time and perform the lookup_tree().

On the Linux repository, performance tests were run for the following
command:

git log --graph --oneline -1000

Before: 0.92s
After:  0.66s
Rel %: -28.3%

Adding '-- kernel/' to the command requires loading the root tree
for every commit that is walked. There was no measureable performance
change as a result of this patch.

This patch series depends on v7 of ds/commit-graph.

Derrick Stolee (4):
  treewide: rename tree to maybe_tree
  commit: create get_commit_tree() method
  treewide: replace maybe_tree with accessor methods
  commit-graph: lazy-load trees for commits

 blame.c | 18 +-
 builtin/checkout.c  | 18 --
 builtin/diff.c  |  2 +-
 builtin/fast-export.c   |  6 +++---
 builtin/log.c   |  4 ++--
 builtin/reflog.c|  2 +-
 commit-graph.c  | 27 +++
 commit-graph.h  |  7 +++
 commit.c| 18 +-
 commit.h|  5 -
 contrib/coccinelle/commit.cocci | 30 ++
 fsck.c  |  8 +---
 http-push.c |  2 +-
 line-log.c  |  4 ++--
 list-objects.c  | 10 +-
 log-tree.c  |  6 +++---
 merge-recursive.c   |  5 +++--
 notes-merge.c   |  9 +
 packfile.c  |  2 +-
 pretty.c|  5 +++--
 ref-filter.c|  2 +-
 revision.c  |  8 
 sequencer.c | 12 ++--
 sha1_name.c |  2 +-
 tree.c  |  4 ++--
 walker.c|  2 +-
 26 files changed, 152 insertions(+), 66 deletions(-)
 create mode 100644 contrib/coccinelle/commit.cocci

-- 
2.17.0

Re: [PATCH 3/3] ref-filter: factor ref_array pushing into its own function

2018-04-06 Thread Derrick Stolee


On 4/6/2018 2:59 PM, Jeff King wrote:

In preparation for callers constructing their own ref_array
structs, let's move our own internal push operation into its
own function.

While we're at it, we can replace REALLOC_ARRAY() with
ALLOC_GROW(), which should give the growth operation
amortized linear complexity (as opposed to growing by one,
which is potentially quadratic, though in-place realloc
growth often makes this faster in practice).

Signed-off-by: Jeff King <p...@peff.net>
---
  ref-filter.c | 16 +---
  ref-filter.h |  8 
  2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index c1c3cc9480..6e9328b274 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1840,6 +1840,18 @@ static struct ref_array_item *new_ref_array_item(const 
char *refname,
return ref;
  }
  
+struct ref_array_item *ref_array_push(struct ref_array *array,

+ const char *refname,
+ const struct object_id *oid)
+{
+   struct ref_array_item *ref = new_ref_array_item(refname, oid);
+
+   ALLOC_GROW(array->items, array->nr + 1, array->alloc);
+   array->items[array->nr++] = ref;
+
+   return ref;
+}
+
  static int ref_kind_from_refname(const char *refname)
  {
unsigned int i;
@@ -1930,13 +1942,11 @@ static int ref_filter_handler(const char *refname, 
const struct object_id *oid,
 * to do its job and the resulting list may yet to be pruned
 * by maxcount logic.
 */
-   ref = new_ref_array_item(refname, oid);
+   ref = ref_array_push(ref_cbdata->array, refname, oid);
ref->commit = commit;
ref->flag = flag;
ref->kind = kind;
  
-	REALLOC_ARRAY(ref_cbdata->array->items, ref_cbdata->array->nr + 1);

-   ref_cbdata->array->items[ref_cbdata->array->nr++] = ref;
return 0;
  }
  
diff --git a/ref-filter.h b/ref-filter.h

index 68268f9ebc..76cf87cb6c 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -135,4 +135,12 @@ void setup_ref_filter_porcelain_msg(void);
  void pretty_print_ref(const char *name, const struct object_id *oid,
  const struct ref_format *format);
  
+/*

+ * Push a single ref onto the array; this can be used to construct your own
+ * ref_array without using filter_refs().
+ */
+struct ref_array_item *ref_array_push(struct ref_array *array,
+ const char *refname,
+ const struct object_id *oid);
+
  #endif /*  REF_FILTER_H  */


The three patches in this series look good to me.

Reviewed-by: Derrick Stolee <dsto...@microsoft.com>

Re: [PATCH v2 0/4] Lazy-load trees when reading commit-graph

2018-04-06 Thread Derrick Stolee


On 4/6/2018 3:21 PM, Jeff King wrote:

On Fri, Apr 06, 2018 at 07:09:30PM +, Derrick Stolee wrote:


Derrick Stolee (4):
   treewide: rename tree to maybe_tree
   commit: create get_commit_tree() method
   treewide: replace maybe_tree with accessor methods
   commit-graph: lazy-load trees for commits

I gave this only a cursory read, but it addresses my concern from the
previous round.

If I were doing it myself, I probably would have folded patches 1 and 3
together. They are touching all the same spots, and it would be an error
for any case converted in patch 1 to not get converted in patch 3. I'm
assuming you caught them all due to Coccinelle, though IMHO it is
somewhat overkill here. By folding them together the compiler could tell
you which spots you missed.

And going forward, I doubt it is going to be a common error for people
to use maybe_tree directly. Between the name and the warning comment,
you'd have to really try to shoot yourself in the foot with it. The
primary concern was catching people using the existing "tree" name,
whose semantics changed.

All that said, I'm fine with having it done this way, too.


Thanks. As a double-check that I caught all of the 'maybe_tree' 
accesses, I ran the following:


$ git grep maybe_tree | grep -v get_commit_tree
commit-graph.c: item->maybe_tree = NULL;
commit-graph.c: c->maybe_tree = lookup_tree();
commit-graph.c: return c->maybe_tree;
commit-graph.c: if (c->maybe_tree)
commit-graph.c: return c->maybe_tree;
commit.c:   if (commit->maybe_tree || !commit->object.parsed)
commit.c:   return commit->maybe_tree;
commit.c:   item->maybe_tree = lookup_tree();
commit.h:   struct tree *maybe_tree;
contrib/coccinelle/commit.cocci:- >maybe_tree->object.oid
contrib/coccinelle/commit.cocci:- c->maybe_tree->object.oid.hash
contrib/coccinelle/commit.cocci:- c->maybe_tree
contrib/coccinelle/commit.cocci:+ c->maybe_tree = s
contrib/coccinelle/commit.cocci:+ return c->maybe_tree;
merge-recursive.c:  commit->maybe_tree = tree;

Thanks,
-Stolee

[PATCH v2 4/4] commit-graph: lazy-load trees for commits

2018-04-06 Thread Derrick Stolee

The commit-graph file provides quick access to commit data, including
the OID of the root tree for each commit in the graph. When performing
a deep commit-graph walk, we may not need to load most of the trees
for these commits.

Delay loading the tree object for a commit loaded from the graph
until requested via get_commit_tree(). Do not lazy-load trees for
commits not in the graph, since that requires duplicate parsing
and the relative peformance improvement when trees are not needed
is small.

On the Linux repository, performance tests were run for the following
command:

git log --graph --oneline -1000

Before: 0.92s
After:  0.66s
Rel %: -28.3%

Adding '-- kernel/' to the command requires loading the root tree
for every commit that is walked. There was no measureable performance
change as a result of this patch.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 commit-graph.c | 26 +++---
 commit-graph.h |  2 ++
 commit.c   |  8 +++-
 commit.h   |  6 ++
 4 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 9f37d84209..a5de6f3102 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -247,7 +247,6 @@ static struct commit_list **insert_parent_or_die(struct 
commit_graph *g,
 
 static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, 
uint32_t pos)
 {
-   struct object_id oid;
uint32_t edge_value;
uint32_t *parent_data_ptr;
uint64_t date_low, date_high;
@@ -257,8 +256,7 @@ static int fill_commit_in_graph(struct commit *item, struct 
commit_graph *g, uin
item->object.parsed = 1;
item->graph_pos = pos;
 
-   hashcpy(oid.hash, commit_data);
-   item->maybe_tree = lookup_tree();
+   item->maybe_tree = NULL;
 
date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
date_low = get_be32(commit_data + g->hash_len + 12);
@@ -317,6 +315,28 @@ int parse_commit_in_graph(struct commit *item)
return 0;
 }
 
+static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit 
*c)
+{
+   struct object_id oid;
+   const unsigned char *commit_data = g->chunk_commit_data +
+  GRAPH_DATA_WIDTH * (c->graph_pos);
+
+   hashcpy(oid.hash, commit_data);
+   c->maybe_tree = lookup_tree();
+
+   return c->maybe_tree;
+}
+
+struct tree *get_commit_tree_in_graph(const struct commit *c)
+{
+   if (c->maybe_tree)
+   return c->maybe_tree;
+   if (c->graph_pos == COMMIT_NOT_FROM_GRAPH)
+   BUG("get_commit_tree_in_graph called from non-commit-graph 
commit");
+
+   return load_tree_for_commit(commit_graph, (struct commit *)c);
+}
+
 static void write_graph_chunk_fanout(struct hashfile *f,
 struct commit **commits,
 int nr_commits)
diff --git a/commit-graph.h b/commit-graph.h
index e1d8580c98..260a468e73 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -17,6 +17,8 @@ char *get_commit_graph_filename(const char *obj_dir);
  */
 int parse_commit_in_graph(struct commit *item);
 
+struct tree *get_commit_tree_in_graph(const struct commit *c);
+
 struct commit_graph {
int graph_fd;
 
diff --git a/commit.c b/commit.c
index aea2ca1f8b..711f674c18 100644
--- a/commit.c
+++ b/commit.c
@@ -298,7 +298,13 @@ void free_commit_buffer(struct commit *commit)
 
 struct tree *get_commit_tree(const struct commit *commit)
 {
-   return commit->maybe_tree;
+   if (commit->maybe_tree || !commit->object.parsed)
+   return commit->maybe_tree;
+
+   if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH)
+   BUG("commit has NULL tree, but was not loaded from 
commit-graph");
+
+   return get_commit_tree_in_graph(commit);
 }
 
 struct object_id *get_commit_tree_oid(const struct commit *commit)
diff --git a/commit.h b/commit.h
index dc4bf97d9f..23a3f364ed 100644
--- a/commit.h
+++ b/commit.h
@@ -22,6 +22,12 @@ struct commit {
unsigned int index;
timestamp_t date;
struct commit_list *parents;
+
+   /*
+* If the commit is loaded from the commit-graph file, then this
+* member may be NULL. Only access it through get_commit_tree()
+* or get_commit_tree_oid().
+*/
struct tree *maybe_tree;
uint32_t graph_pos;
 };
-- 
2.17.0

[PATCH v2 09/10] commit: use generation numbers for in_merge_bases()

2018-04-09 Thread Derrick Stolee

The containment algorithm for 'git branch --contains' is different
from that for 'git tag --contains' in that it uses is_descendant_of()
instead of contains_tag_algo(). The expensive portion of the branch
algorithm is computing merge bases.

When a commit-graph file exists with generation numbers computed,
we can avoid this merge-base calculation when the target commit has
a larger generation number than the target commits.

Performance tests were run on a copy of the Linux repository where
HEAD is contained in v4.13 but no earlier tag. Also, all tags were
copied to branches and 'git branch --contains' was tested:

Before: 60.0s
After:   0.4s
Rel %: -99.3%

Reported-by: Jeff King <p...@peff.net>
Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 commit.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/commit.c b/commit.c
index 00bdc2ab21..0b155dece8 100644
--- a/commit.c
+++ b/commit.c
@@ -1059,12 +1059,19 @@ int in_merge_bases_many(struct commit *commit, int 
nr_reference, struct commit *
 {
struct commit_list *bases;
int ret = 0, i;
+   uint32_t min_generation = GENERATION_NUMBER_INFINITY;
 
if (parse_commit(commit))
return ret;
-   for (i = 0; i < nr_reference; i++)
+   for (i = 0; i < nr_reference; i++) {
if (parse_commit(reference[i]))
return ret;
+   if (min_generation > reference[i]->generation)
+   min_generation = reference[i]->generation;
+   }
+
+   if (commit->generation > min_generation)
+   return 0;
 
bases = paint_down_to_common(commit, nr_reference, reference);
if (commit->object.flags & PARENT2)
-- 
2.17.0

[PATCH v2 02/10] merge: check config before loading commits

2018-04-09 Thread Derrick Stolee

In anticipation of using generation numbers from the commit-graph,
we must ensure that all commits that exist in the commit-graph are
loaded from that file instead of from the object database. Since
the commit-graph file is only checked if core.commitGraph is true,
we must check the default config before we load any commits.

In the merge builtin, the config was checked after loading the HEAD
commit. This was due to the use of the global 'branch' when checking
merge-specific config settings.

Move the config load to be between the initialization of 'branch'
and the commit lookup. Also add a test to t5318-commit-graph.sh
that exercises this code path to prevent a regression.

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 builtin/merge.c | 5 +++--
 t/t5318-commit-graph.sh | 9 +
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index ee050a47f3..20897f8223 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -1183,13 +1183,14 @@ int cmd_merge(int argc, const char **argv, const char 
*prefix)
branch = branch_to_free = resolve_refdup("HEAD", 0, _oid, NULL);
if (branch)
skip_prefix(branch, "refs/heads/", );
+   init_diff_ui_defaults();
+   git_config(git_merge_config, NULL);
+
if (!branch || is_null_oid(_oid))
head_commit = NULL;
else
head_commit = lookup_commit_or_die(_oid, "HEAD");
 
-   init_diff_ui_defaults();
-   git_config(git_merge_config, NULL);
 
if (branch_mergeoptions)
parse_branch_merge_options(branch_mergeoptions);
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index a380419b65..77d85aefe7 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -221,4 +221,13 @@ test_expect_success 'write graph in bare repo' '
 graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 
merge/1
 graph_git_behavior 'bare repo with graph, commit 8 vs merge 2' bare commits/8 
merge/2
 
+test_expect_success 'perform fast-forward merge in full repo' '
+   cd "$TRASH_DIRECTORY/full" &&
+   git checkout -b merge-5-to-8 commits/5 &&
+   git merge commits/8 &&
+   git show-ref -s merge-5-to-8 >output &&
+   git show-ref -s commits/8 >expect &&
+   test_cmp expect output
+'
+
 test_done
-- 
2.17.0

[PATCH v2 10/10] commit: add short-circuit to paint_down_to_common()

2018-04-09 Thread Derrick Stolee

When running 'git branch --contains', the in_merge_bases_many()
method calls paint_down_to_common() to discover if a specific
commit is reachable from a set of branches. Commits with lower
generation number are not needed to correctly answer the
containment query of in_merge_bases_many().

Add a new parameter, min_generation, to paint_down_to_common() that
prevents walking commits with generation number strictly less than
min_generation. If 0 is given, then there is no functional change.

For in_merge_bases_many(), we can pass commit->generation as the
cutoff, and this saves time during 'git branch --contains' queries
that would otherwise walk "around" the commit we are inspecting.

For a copy of the Linux repository, where HEAD is checked out at
v4.13~100, we get the following performance improvement for
'git branch --contains' over the previous commit:

Before: 0.21s
After:  0.13s
Rel %: -38%

Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 commit.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/commit.c b/commit.c
index 0b155dece8..7348075e38 100644
--- a/commit.c
+++ b/commit.c
@@ -796,7 +796,9 @@ static int queue_has_nonstale(struct prio_queue *queue, 
uint32_t min_gen)
 }
 
 /* all input commits in one and twos[] must have been parsed! */
-static struct commit_list *paint_down_to_common(struct commit *one, int n, 
struct commit **twos)
+static struct commit_list *paint_down_to_common(struct commit *one, int n,
+   struct commit **twos,
+   int min_generation)
 {
struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
struct commit_list *result = NULL;
@@ -830,6 +832,9 @@ static struct commit_list *paint_down_to_common(struct 
commit *one, int n, struc
 
last_gen = commit->generation;
 
+   if (commit->generation < min_generation)
+   break;
+
flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
if (flags == (PARENT1 | PARENT2)) {
if (!(commit->object.flags & RESULT)) {
@@ -882,7 +887,7 @@ static struct commit_list *merge_bases_many(struct commit 
*one, int n, struct co
return NULL;
}
 
-   list = paint_down_to_common(one, n, twos);
+   list = paint_down_to_common(one, n, twos, 0);
 
while (list) {
struct commit *commit = pop_commit();
@@ -949,7 +954,7 @@ static int remove_redundant(struct commit **array, int cnt)
filled_index[filled] = j;
work[filled++] = array[j];
}
-   common = paint_down_to_common(array[i], filled, work);
+   common = paint_down_to_common(array[i], filled, work, 0);
if (array[i]->object.flags & PARENT2)
redundant[i] = 1;
for (j = 0; j < filled; j++)
@@ -1073,7 +1078,7 @@ int in_merge_bases_many(struct commit *commit, int 
nr_reference, struct commit *
if (commit->generation > min_generation)
return 0;
 
-   bases = paint_down_to_common(commit, nr_reference, reference);
+   bases = paint_down_to_common(commit, nr_reference, reference, 
commit->generation);
if (commit->object.flags & PARENT2)
ret = 1;
clear_commit_marks(commit, all_flags);
-- 
2.17.0

[PATCH v2 08/10] ref-filter: use generation number for --contains

2018-04-09 Thread Derrick Stolee

A commit A can reach a commit B only if the generation number of A
is strictly larger than the generation number of B. This condition
allows significantly short-circuiting commit-graph walks.

Use generation number for '--contains' type queries.

On a copy of the Linux repository where HEAD is containd in v4.13
but no earlier tag, the command 'git tag --contains HEAD' had the
following peformance improvement:

Before: 0.81s
After:  0.04s
Rel %:  -95%

Helped-by: Jeff King <p...@peff.net>
Signed-off-by: Derrick Stolee <dsto...@microsoft.com>
---
 ref-filter.c | 24 +++-
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 45fc56216a..2f5e79b5de 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1584,7 +1584,8 @@ static int in_commit_list(const struct commit_list *want, 
struct commit *c)
  */
 static enum contains_result contains_test(struct commit *candidate,
  const struct commit_list *want,
- struct contains_cache *cache)
+ struct contains_cache *cache,
+ uint32_t cutoff)
 {
enum contains_result *cached = contains_cache_at(cache, candidate);
 
@@ -1598,8 +1599,11 @@ static enum contains_result contains_test(struct commit 
*candidate,
return CONTAINS_YES;
}
 
-   /* Otherwise, we don't know; prepare to recurse */
parse_commit_or_die(candidate);
+
+   if (candidate->generation < cutoff)
+   return CONTAINS_NO;
+
return CONTAINS_UNKNOWN;
 }
 
@@ -1615,8 +1619,18 @@ static enum contains_result contains_tag_algo(struct 
commit *candidate,
  struct contains_cache *cache)
 {
struct contains_stack contains_stack = { 0, 0, NULL };
-   enum contains_result result = contains_test(candidate, want, cache);
+   enum contains_result result;
+   uint32_t cutoff = GENERATION_NUMBER_INFINITY;
+   const struct commit_list *p;
+
+   for (p = want; p; p = p->next) {
+   struct commit *c = p->item;
+   parse_commit_or_die(c);
+   if (c->generation < cutoff)
+   cutoff = c->generation;
+   }
 
+   result = contains_test(candidate, want, cache, cutoff);
if (result != CONTAINS_UNKNOWN)
return result;
 
@@ -1634,7 +1648,7 @@ static enum contains_result contains_tag_algo(struct 
commit *candidate,
 * If we just popped the stack, parents->item has been marked,
 * therefore contains_test will return a meaningful yes/no.
 */
-   else switch (contains_test(parents->item, want, cache)) {
+   else switch (contains_test(parents->item, want, cache, cutoff)) 
{
case CONTAINS_YES:
*contains_cache_at(cache, commit) = CONTAINS_YES;
contains_stack.nr--;
@@ -1648,7 +1662,7 @@ static enum contains_result contains_tag_algo(struct 
commit *candidate,
}
}
free(contains_stack.contains_stack);
-   return contains_test(candidate, want, cache);
+   return contains_test(candidate, want, cache, cutoff);
 }
 
 static int commit_contains(struct ref_filter *filter, struct commit *commit,
-- 
2.17.0

< 4 5 6 7 8 9 10 11 12 13 >

801 - 900 of 1362 matches

Mail list logo