The idea is to reverse the DAG and perform a modified breadth-first search
on it, starting on all sources of the reversed DAG.
Before each visit of a commit, its weight is induced.
This only works for non-merge commits, so the BFS stops prematurely on
merge commits (that are collected in a list).
Merge commits from that collection are considered for further visits
as soon as all their parents have been visited.
Their weights are computed using compute_weight().
Each BFS walk ends when the computed weight is falling or halfway.

Signed-off-by: Stephan Beyer <s-be...@gmx.net>
---

In other words: walk the history up, compute the weight of each
commit and stop when you know a better commit cannot follow.

This way, the expensive "compute_weight()" function is not called
on merge commits that will not help to find the maximum distance.

I have to mention that I also had a more complex idea, involving
a binary search on specifically ordered merge commits, and then
inducing the weights of the commits between those merge commits.
However, this is pretty nontrivial, error-prone by definition,
and the compute_weight() calls during the binary search can be more
expensive than during the "bottom-up" BFS walk.
So this is clearly the better idea. (And also follows the KISS rule.)

For the ads: ~3-4 seconds

 bisect.c | 246 ++++++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 158 insertions(+), 88 deletions(-)

diff --git a/bisect.c b/bisect.c
index 1d1f61c..827a82a 100644
--- a/bisect.c
+++ b/bisect.c
@@ -30,6 +30,9 @@ static unsigned marker;
 struct node_data {
        int weight;
        unsigned marked;
+       unsigned parents;
+       unsigned visited : 1;
+       struct commit_list *children;
 };
 
 #define DEBUG_BISECT 0
@@ -110,16 +113,6 @@ static int count_interesting_parents(struct commit *commit)
        return count;
 }
 
-static inline int halfway(struct commit *commit)
-{
-       /*
-        * Don't short-cut something we are not going to return!
-        */
-       if (commit->object.flags & TREESAME)
-               return 0;
-       return !distance_direction(commit);
-}
-
 #if !DEBUG_BISECT
 #define show_list(a,b,c) do { ; } while (0)
 #else
@@ -340,90 +333,167 @@ static void compute_all_weights(struct commit_list *list,
        show_list("bisection 2 counted all", counted, list);
 }
 
-/* At the moment this is basically the same as compute_all_weights()
- * but with a halfway shortcut */
+static struct commit_list *build_reversed_dag(struct commit_list *list,
+                                             struct node_data *nodes)
+{
+       struct commit_list *sources = NULL;
+       struct commit_list *p;
+       int n = 0;
+       for (p = list; p; p = p->next)
+               p->item->util = &nodes[n++];
+       for (p = list; p; p = p->next) {
+               struct commit_list *parent;
+               struct commit *commit = p->item;
+               for (parent = commit->parents; parent; parent = parent->next) {
+                       if (!(parent->item->object.flags & UNINTERESTING)) {
+                               struct commit_list **next = 
&node_data(parent->item)->children;
+                               commit_list_insert(commit, next);
+                               node_data(commit)->parents++;
+                       }
+               }
+       }
+
+       /* find all sources */
+       for (p = list; p; p = p->next) {
+               if (node_data(p->item)->parents == 0)
+                       commit_list_insert(p->item, &sources);
+       }
+
+       return sources;
+}
+
+static void commit_list_insert_unique(struct commit *item,
+                                     struct commit_list **list)
+{
+       if (!*list || item < (*list)->item) /* empty list or item will be first 
*/
+               commit_list_insert(item, list);
+       else if (item != (*list)->item) { /* item will not be first or not 
inserted */
+               struct commit_list *p = *list;
+               for (; p->next && p->next->item < item; p = p->next);
+               if (!p->next || item != p->next->item) /* not already inserted 
*/
+                       commit_list_insert(item, &p->next);
+       }
+}
+
+/* do a BFS on the reversed DAG (starting from commits in queue), stop at 
merge commits */
+static void bfs_up_to_merges(struct commit_list *queue,
+                            struct commit_list **merges)
+{
+       assert(queue);
+       struct commit_list **last;
+       for (last = &queue->next; *last; last = &(*last)->next);
+
+       while (queue) {
+               struct commit *top = queue->item;
+               struct commit_list *p;
+
+               if (distance_direction(top) > 0) {
+                       node_data(top)->visited = 1;
+
+                       /* queue children */
+                       for (p = node_data(top)->children; p; p = p->next) {
+                               if (node_data(p->item)->parents > 1) /* child 
is a merge */
+                                       commit_list_insert_unique(p->item, 
merges);
+                               else {
+                                       node_data(p->item)->weight = 
node_data(top)->weight;
+                                       if (!(p->item->object.flags & TREESAME))
+                                               node_data(p->item)->weight++;
+                                       last = commit_list_append(p->item, 
last);
+                               }
+                       }
+               }
+
+               pop_commit(&queue);
+       }
+}
+
+static int all_parents_are_visited(struct commit *merge)
+{
+       struct commit_list *p;
+       for (p = merge->parents; p; p = p->next) {
+               if (p->item->util && !node_data(p->item)->visited)
+                       return 0;
+       }
+       return 1;
+}
+
+static struct commit *extract_merge_to_queue(struct commit_list **merges)
+{
+       assert(merges);
+
+       struct commit_list *p, *q;
+       struct commit *found;
+
+       /* find a merge that is ready, i.e. all parents have been computed */
+       for (q = NULL, p = *merges; p && !all_parents_are_visited(p->item);
+            q = p, p = p->next);
+       if (!p)
+               return NULL;
+
+       /* remove that commit from the list and return it */
+       if (q) {
+               assert(q->next == p);
+               q->next = p->next;
+       } else /* found first element of list */
+               *merges = p->next;
+       found = p->item;
+       free(p);
+
+       return found;
+}
+
+static int find_new_queue_from_merges(struct commit_list **queue,
+                                     struct commit_list **merges)
+{
+       if (*merges) {
+               struct commit *merge = extract_merge_to_queue(merges);
+               *queue = NULL;
+               if (merge) {
+                       commit_list_insert(merge, queue);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+static void compute_merge_weights(struct commit_list *merges)
+{
+       struct commit_list *p;
+       for (p = merges; p; p = p->next)
+               compute_weight(p->item);
+}
+
+static void modified_bfs(struct commit_list *queue)
+{
+       struct commit_list *merges = NULL;
+       bfs_up_to_merges(queue, &merges);
+       while (find_new_queue_from_merges(&queue, &merges)) {
+               compute_merge_weights(queue);
+               bfs_up_to_merges(queue, &merges);
+       }
+}
+
+/* The idea is to reverse the DAG and perform a modified breadth-first search
+ * on it, starting on all sources of the reversed DAG.
+ * Before each visit of a commit, its weight is induced.
+ * This only works for non-merge commits, so the BFS stops prematurely on
+ * merge commits (that are collected in a list).
+ * Merge commits from that collection are considered for further visits
+ * as soon as all parents have been visited.
+ * Their weights are computed using compute_weight().
+ * Each BFS branch ends when the computed weight is falling or halfway.
+ */
 static void compute_relevant_weights(struct commit_list *list,
                                     struct node_data *weights)
 {
-       int n, counted;
        struct commit_list *p;
+       struct commit_list *sources = build_reversed_dag(list, weights);
 
-       counted = 0;
+       for (p = sources; p; p = p->next)
+               node_data(p->item)->weight = 1;
+       modified_bfs(sources);
 
-       for (n = 0, p = list; p; p = p->next) {
-               struct commit *commit = p->item;
-               unsigned flags = commit->object.flags;
-
-               commit->util = &weights[n++];
-               switch (count_interesting_parents(commit)) {
-               case 0:
-                       if (!(flags & TREESAME)) {
-                               node_data(commit)->weight = 1;
-                               counted++;
-                               show_list("bisection 2 count one",
-                                         counted, list);
-                       }
-                       break;
-               case 1:
-                       node_data(commit)->weight = -1;
-                       break;
-               default:
-                       node_data(commit)->weight = -2;
-                       break;
-               }
-       }
-
-       show_list("bisection 2 initialize", counted, list);
-
-       for (p = list; p; p = p->next) {
-               if (!(p->item->object.flags & UNINTERESTING)
-                && (node_data(p->item)->weight == -2)) {
-                       compute_weight(p->item);
-
-                       /* Does it happen to be at exactly half-way? */
-                       if (halfway(p->item))
-                               return;
-                       counted++;
-               }
-       }
-
-       show_list("bisection 2 compute_weight", counted, list);
-
-       while (counted < total) {
-               for (p = list; p; p = p->next) {
-                       struct commit_list *q;
-                       unsigned flags = p->item->object.flags;
-
-                       if (0 <= node_data(p->item)->weight)
-                               continue;
-                       for (q = p->item->parents; q; q = q->next) {
-                               if (q->item->object.flags & UNINTERESTING)
-                                       continue;
-                               if (0 <= node_data(q->item)->weight)
-                                       break;
-                       }
-                       if (!q)
-                               continue;
-
-                       /*
-                        * weight for p is unknown but q is known.
-                        * add one for p itself if p is to be counted,
-                        * otherwise inherit it from q directly.
-                        */
-                       node_data(p->item)->weight = node_data(q->item)->weight;
-                       if (!(flags & TREESAME)) {
-                               node_data(p->item)->weight++;
-                               counted++;
-                               show_list("bisection 2 count one",
-                                         counted, list);
-                       }
-
-                       /* Does it happen to be at exactly half-way? */
-                       if (halfway(p->item))
-                               return;
-               }
-       }
-       show_list("bisection 2 counted all", counted, list);
+       show_list("bisection 3 result", total, list);
 }
 
 struct commit_list *find_bisection(struct commit_list *list,
-- 
2.7.1.354.gd492730.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to