On 10/10/2018 9:21 PM, Jonathan Tan wrote:
diff --git a/commit-graph.c b/commit-graph.c
index f415d3b41f..90b0b3df90 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -715,13 +715,11 @@ static int add_ref_to_list(const char *refname,
  static void add_changes_to_bloom_filter(struct bloom_filter *bf,
                                        struct commit *parent,
                                        struct commit *commit,
+                                       int index,
                                        struct diff_options *diffopt)
  {
-       unsigned char p_c_hash[GIT_MAX_RAWSZ];
        int i;
- hashxor(parent->object.oid.hash, commit->object.oid.hash, p_c_hash);
-
        diff_tree_oid(&parent->object.oid, &commit->object.oid, "", diffopt);
        diffcore_std(diffopt);
@@ -756,8 +754,8 @@ static void add_changes_to_bloom_filter(struct bloom_filter *bf,
                        the_hash_algo->update_fn(&ctx, path, p - path);
                        the_hash_algo->final_fn(name_hash, &ctx);
- hashxor(name_hash, p_c_hash, hash);
-                       bloom_filter_add_hash(bf, hash);
+                       hashxor(name_hash, parent->object.oid.hash, hash);
+                       bloom_filter_add_hash(bf, index, hash);
                } while (*p);
diff_free_filepair(diff_queued_diff.queue[i]);
[snip]
@@ -768,11 +766,10 @@ static void add_changes_to_bloom_filter(struct 
bloom_filter *bf,
  }
static void fill_bloom_filter(struct bloom_filter *bf,
-                                   struct progress *progress)
+                                   struct progress *progress, struct commit 
**commits, int commit_nr)
  {
        struct rev_info revs;
        const char *revs_argv[] = {NULL, "--all", NULL};
-       struct commit *commit;
        int i = 0;
/* We (re-)create the bloom filter from scratch every time for now. */
@@ -783,18 +780,19 @@ static void fill_bloom_filter(struct bloom_filter *bf,
        if (prepare_revision_walk(&revs))
                die("revision walk setup failed while preparing bloom filter");
- while ((commit = get_revision(&revs))) {
+       for (i = 0; i < commit_nr; i++) {
+               struct commit *commit = commits[i];
                struct commit_list *parent;
for (parent = commit->parents; parent; parent = parent->next)
-                       add_changes_to_bloom_filter(bf, parent->item, commit,
+                       add_changes_to_bloom_filter(bf, parent->item, commit, i,
                                                    &revs.diffopt);
[snip]
- hashxor(pi->name_hash, p_c_hash, hash);
-               if (bloom_filter_check_hash(&bf, hash)) {
+               hashxor(pi->name_hash, parent->object.oid.hash, hash);
+               if (bloom_filter_check_hash(&bf, commit->graph_pos, hash)) {
                        /*
                         * At least one of the interesting pathspecs differs,
                         * so we can return early and let the diff machinery
One main benefit of storing on Bloom filter per commit is to avoid recomputing hashes at every commit. Currently, this patch only improves locality when checking membership at the cost of taking up more space. Drop the dependence on the parent oid and then we can save the time spent hashing during history queries.

-Stolee

Reply via email to