"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: Landry's firefox repository
To:
Christian Weisgerber <naddy@mips.inka.de>
Cc:
gameoftrees@openbsd.org
Date:
Tue, 26 Mar 2024 14:53:22 +0100

Download raw body.

Thread
On Wed, Jan 31, 2024 at 07:27:10PM +0100, Christian Weisgerber wrote:
> Stefan Sperling:
> 
> > > That merge commit de0fe38 prevents rebasing "master" on "origin/master".
> > 
> > Does "master" above mean commit 9cd4661 (HEAD -> master)?
> 
> Yes.

Below is a series of 3 patches which fix this problem by teaching the
commit graph about topological sort order.
You can now run 'got rebase -t' and will forward the reference instead
of attempting a bogus rebase which causes needless merge conflicts.

However, there is a cost. The overhead I mention in the log message
is quite significant on large repositories:

$ pwd /git/linux.git
$ time got log -l5 -b -s -t
2024-03-13 master  Merge tag 'printk-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux
2024-03-13 f88c3fb mm, slab: remove last vestiges of SLAB_MEM_SPREAD
2024-02-07 7412dc6 dump_stack: Do not get cpu_sync for panic CPU
2024-03-13 0ea680e Merge tag 'slab-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
2024-02-07 d988d9a panic: Flush kernel log buffer at the end
    1m32.27s real     1m03.10s user     0m30.97s system
$ time got log -l5 -b -s
2024-03-13 master  Merge tag 'printk-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux
2024-03-13 f88c3fb mm, slab: remove last vestiges of SLAB_MEM_SPREAD
2024-02-07 7412dc6 dump_stack: Do not get cpu_sync for panic CPU
2024-03-13 0ea680e Merge tag 'slab-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
2024-02-07 d988d9a panic: Flush kernel log buffer at the end
    0m01.03s real     0m00.72s user     0m00.31s system
$

Anyway, having this feature available is better than not having it.
And there are ways to improve performance going forward.

The 3 diffs should all apply with 'got patch' in one go.

ok?

-----------------------------------------------
 
 add support for topological sorting to the commit graph
 
 The algorithm implemented here is based on a description I read
 on github's blog. See code comments for details.
 
diff 78e82c8a2a2cd0fed316492b18264f5d8727f961 849e12c1d2dd36d497ec579cefa88b1b2dc117c8
commit - 78e82c8a2a2cd0fed316492b18264f5d8727f961
commit + 849e12c1d2dd36d497ec579cefa88b1b2dc117c8
blob - 1a1732a2994a8428496e498b70ebb81e222507aa
blob + a776f0d370166a89c45161d249a492830e104ced
--- got/got.c
+++ got/got.c
@@ -4481,8 +4481,13 @@ print_commits(struct got_object_id *root_id, struct go
 	err = got_commit_graph_open(&graph, path, !log_branches);
 	if (err)
 		return err;
-	err = got_commit_graph_iter_start(graph, root_id, repo,
-	    check_cancelled, NULL);
+	if (log_branches) {
+		err = got_commit_graph_toposort(graph, root_id, repo,
+		    check_cancelled, NULL);
+	} else {
+		err = got_commit_graph_iter_start(graph, root_id, repo,
+		    check_cancelled, NULL);
+	}
 	if (err)
 		goto done;
 	for (;;) {
blob - 9f0a8aa211fe8d5dd0defcc6fe994cafada284c3
blob + 35d7c7072f5ce67648f7f2c4a878396746ad3e45
--- include/got_commit_graph.h
+++ include/got_commit_graph.h
@@ -23,6 +23,8 @@ void got_commit_graph_close(struct got_commit_graph *)
 const struct got_error *got_commit_graph_iter_start(
     struct got_commit_graph *, struct got_object_id *, struct got_repository *,
     got_cancel_cb, void *);
+const struct got_error *got_commit_graph_toposort(struct got_commit_graph *,
+    struct got_object_id *, struct got_repository *, got_cancel_cb, void *);
 const struct got_error *got_commit_graph_iter_next(struct got_object_id *,
     struct got_commit_graph *, struct got_repository *, got_cancel_cb, void *);
 const struct got_error *got_commit_graph_intersect(struct got_object_id **,
blob - 514ea5bf403b3a7ac375edcf405ab96bcd94cb20
blob + 976b20aab20831ca6c66bb5793542bf438ec1688
--- lib/commit_graph.c
+++ lib/commit_graph.c
@@ -38,10 +38,21 @@
 #include "got_lib_inflate.h"
 #include "got_lib_object.h"
 #include "got_lib_object_idset.h"
+#include "got_lib_object_qid.h"
 
+#ifndef nitems
+#define nitems(_a)	(sizeof((_a)) / sizeof((_a)[0]))
+#endif
+
 struct got_commit_graph_node {
 	struct got_object_id id;
 
+	/* Used for topological sorting. */
+	struct got_commit_graph_node *parents[2];
+	struct got_commit_graph_node **more_parents;
+	int nparents;
+	int indegree;
+
 	/* Used only during iteration. */
 	time_t timestamp;
 	TAILQ_ENTRY(got_commit_graph_node) entry;
@@ -61,6 +72,7 @@ struct got_commit_graph {
 
 	int flags;
 #define GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL		0x01
+#define GOT_COMMIT_GRAPH_TOPOSORT			0x02
 
 	/*
 	 * A set of object IDs of known parent commits which we have not yet
@@ -88,7 +100,8 @@ struct got_commit_graph {
 
 	/*
 	 * Nodes which will be passed to the API user next, sorted by
-	 * commit timestmap.
+	 * commit timestamp. Sorted in topological order only if topological
+	 * sorting was requested.
 	 */
 	struct got_commit_graph_iter_list iter_list;
 };
@@ -181,7 +194,8 @@ add_node(struct got_commit_graph_node **new_node,
 		return got_error_from_errno("calloc");
 
 	memcpy(&node->id, commit_id, sizeof(node->id));
-	err = got_object_idset_add(graph->node_ids, &node->id, NULL);
+	node->nparents = -1;
+	err = got_object_idset_add(graph->node_ids, &node->id, node);
 	if (err)
 		free(node);
 	else
@@ -547,6 +561,7 @@ got_commit_graph_close(struct got_commit_graph *graph)
 
 	while ((node = TAILQ_FIRST(&graph->iter_list))) {
 		TAILQ_REMOVE(&graph->iter_list, node, entry);
+		free(node->more_parents);
 		free(node);
 	}
 
@@ -575,6 +590,9 @@ got_commit_graph_iter_start(struct got_commit_graph *g
 	const struct got_error *err = NULL;
 	struct got_commit_graph_node *node;
 
+	/* First-parent traversal is implicitly topological. */
+	graph->flags &= ~GOT_COMMIT_GRAPH_TOPOSORT;
+
 	/* Clear left-over state from previous iteration attempts. */
 	while ((node = TAILQ_FIRST(&graph->iter_list)))
 		TAILQ_REMOVE(&graph->iter_list, node, entry);
@@ -615,7 +633,8 @@ got_commit_graph_iter_next(struct got_object_id *id,
     got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
-	struct got_commit_graph_node *node;
+	struct got_commit_graph_node *node, *pnode;
+	int i;
 
 	node = TAILQ_FIRST(&graph->iter_list);
 	if (node == NULL) {
@@ -623,17 +642,33 @@ got_commit_graph_iter_next(struct got_object_id *id,
 		return got_error(GOT_ERR_ITER_COMPLETED);
 	}
 
-	while (TAILQ_NEXT(node, entry) == NULL &&
-	    got_object_idset_num_elements(graph->open_branches) > 0) {
-		err = fetch_commits_from_open_branches(graph, repo,
-		    cancel_cb, cancel_arg);
-		if (err)
-			return err;
+	if (graph->flags & GOT_COMMIT_GRAPH_TOPOSORT) {
+		/* At least one node with in-degree zero must exist. */
+		while (node->indegree != 0)
+			node = TAILQ_NEXT(node, entry);
+	} else {
+		while (TAILQ_NEXT(node, entry) == NULL &&
+		    got_object_idset_num_elements(graph->open_branches) > 0) {
+			err = fetch_commits_from_open_branches(graph, repo,
+			    cancel_cb, cancel_arg);
+			if (err)
+				return err;
+		}
 	}
 
 	memcpy(id, &node->id, sizeof(*id));
 
 	TAILQ_REMOVE(&graph->iter_list, node, entry);
+	if (graph->flags & GOT_COMMIT_GRAPH_TOPOSORT) {
+		/* When visiting a commit decrement in-degree of all parents. */
+		for (i = 0; i < node->nparents; i++) {
+			if (i < nitems(node->parents))
+				pnode = node->parents[i];
+			else
+				pnode = node->more_parents[i];
+			pnode->indegree--;
+		}
+	}
 	free(node);
 	return NULL;
 }
@@ -749,3 +784,164 @@ done:
 		got_commit_graph_close(graph2);
 	return err;
 }
+
+/*
+ * Sort the graph for traversal in topological order.
+ *
+ * This implementation is based on the description of topological sorting
+ * of git commits by Derrick Stolee at
+ * https://github.blog/2022-08-30-gits-database-internals-ii-commit-history-queries/#topological-sorting
+ * which reads as follows:
+ *
+ * The basic algorithm for topological sorting is Kahn’s algorithm which
+ * follows two big steps:
+ * 1. Walk all reachable commits, counting the number of times a commit appears
+ * as a parent of another commit. Call these numbers the in-degree of the
+ * commit, referencing the number of incoming edges.
+ * 2. Walk the reachable commits, but only visit a commit if its in-degree
+ * value is zero. When visiting a commit, decrement the in-degree value of
+ * each parent.
+ * 
+ * This algorithm works because at least one of our starting points will
+ * have in-degree zero, and then decrementing the in-degree value is similar
+ * to deleting the commit from the graph, always having at least one commit
+ * with in-degree zero.
+ */
+const struct got_error *
+got_commit_graph_toposort(struct got_commit_graph *graph,
+    struct got_object_id *id, struct got_repository *repo,
+    got_cancel_cb cancel_cb, void *cancel_arg)
+{
+	const struct got_error *err = NULL;
+	struct got_commit_graph_node *node = NULL, *pnode = NULL;
+	struct got_commit_object *commit = NULL;
+	struct got_object_id_queue commits;
+	const struct got_object_id_queue *parent_ids;
+	struct got_object_qid *qid = NULL, *pid;
+	int i;
+
+	STAILQ_INIT(&commits);
+
+	if (graph->flags & GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL)
+		return got_commit_graph_iter_start(graph, id, repo,
+		    cancel_cb, cancel_arg);
+
+	/* Clear left-over state from previous iteration attempts. */
+	while ((node = TAILQ_FIRST(&graph->iter_list)))
+		TAILQ_REMOVE(&graph->iter_list, node, entry);
+	err = got_object_idset_for_each(graph->open_branches,
+	    remove_branch_tip, graph->open_branches);
+	if (err)
+		return err;
+
+	graph->flags |= GOT_COMMIT_GRAPH_TOPOSORT;
+
+	/*
+	 * Sorting the commit graph in topological order requires visiting
+	 * every reachable commit. This is very expensive but there are
+	 * ways to speed this up significantly in the future:
+	 * 1) Run this loop in got-read-pack if possible.
+	 * 2) Use Git's commit-graph file to compute the result incrementally.
+	 *    See the blog post linked above for details.
+	 */
+	err = got_object_qid_alloc_partial(&qid);
+	if (err)
+		return err;
+	memcpy(&qid->id, id, sizeof(qid->id));
+	STAILQ_INSERT_TAIL(&commits, qid, entry);
+	while (!STAILQ_EMPTY(&commits)) {
+		if (cancel_cb) {
+			err = (*cancel_cb)(cancel_arg);
+			if (err)
+				break;
+		}
+
+		qid = STAILQ_FIRST(&commits);
+		STAILQ_REMOVE_HEAD(&commits, entry);
+		err = got_object_open_as_commit(&commit, repo, &qid->id);
+		if (err)
+			break;
+
+		node = got_object_idset_get(graph->node_ids, &qid->id);
+		if (node == NULL) {
+			err = add_node(&node, graph, id, repo);
+			if (err)
+				break;
+			TAILQ_INSERT_TAIL(&graph->iter_list, node, entry);
+		}
+
+		got_object_qid_free(qid);
+		qid = NULL;
+
+		if (node->timestamp != 0) /* already traversed once */
+			continue;
+
+		if (node->nparents == -1) {
+			node->nparents = got_object_commit_get_nparents(commit);
+			if (node->nparents > nitems(node->parents)) {
+				node->more_parents = calloc(node->nparents,
+				    sizeof(*node->more_parents));
+				if (node->more_parents == NULL) {
+					err = got_error_from_errno("calloc");
+					break;
+				}
+			}
+
+		}
+
+		node->timestamp = got_object_commit_get_committer_time(commit);
+		parent_ids = got_object_commit_get_parent_ids(commit);
+		i = 0;
+		STAILQ_FOREACH(pid, parent_ids, entry) {
+			if (cancel_cb) {
+				err = (*cancel_cb)(cancel_arg);
+				if (err)
+					goto done;
+			}
+
+			/*
+			 * Increment the in-degree counter every time a given
+			 * commit appears as the parent of another commit.
+			 */
+			pnode = got_object_idset_get(graph->node_ids, &pid->id);
+			if (pnode == NULL) {
+				err = add_node(&pnode, graph, &pid->id, repo);
+				if (err)
+					goto done;
+				TAILQ_INSERT_TAIL(&graph->iter_list, pnode,
+				    entry);
+			}
+			pnode->indegree++;
+
+			/*
+			 * Cache parent pointers on the node to make future
+			 * in-degree updates easier.
+			 */
+			if (node->nparents <= nitems(node->parents)) {
+				node->parents[i] = pnode;
+			} else {
+				node->more_parents[i] = pnode;
+				if (i < nitems(node->parents))
+					node->parents[i] = pnode;
+			}
+			i++;
+
+			/* Keep traversing through all parent commits. */
+			err = got_object_qid_alloc_partial(&qid);
+			if (err)
+				goto done;
+			memcpy(&qid->id, &pid->id, sizeof(qid->id));
+			STAILQ_INSERT_TAIL(&commits, qid, entry);
+			qid = NULL;
+		}
+
+		got_object_commit_close(commit);
+		commit = NULL;
+	}
+done:
+	if (commit)
+		got_object_commit_close(commit);
+	got_object_qid_free(qid);
+	got_object_id_queue_free(&commits);
+	return err;
+}
blob - 4a83ec7819b07c9e4a689fc50177a7b979abbbf9
blob + aebdf49c02bb9cd38df6eb3530904c6237c5fb9c
--- regress/cmdline/log.sh
+++ regress/cmdline/log.sh
@@ -1241,6 +1241,68 @@ test_log_commit_keywords() {
 	test_done "$testroot" "$ret"
 }
 
+test_log_toposort() {
+	local testroot=`test_init log_toposort`
+	local commit0=`git_show_head $testroot/repo`
+	local author_time0=`git_show_author_time $testroot/repo`
+
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	echo aaa > $testroot/wt/alpha
+	(cd $testroot/wt && got commit -m 'change alpha' >/dev/null)
+	local commit1=`git_show_head $testroot/repo`
+	local author_time1=`git_show_author_time $testroot/repo`
+
+	got branch -r $testroot/repo -c $commit0 newbranch
+	(cd $testroot/wt && got update -b newbranch > /dev/null)
+	echo ddd > $testroot/wt/gamma/delta
+	(cd $testroot/wt && got commit -m 'change delta' >/dev/null)
+	local commit2=`git_show_branch_head $testroot/repo newbranch`
+	local author_time2=`git_show_author_time $testroot/repo newbranch`
+
+	echo zzz > $testroot/wt/epsilon/zeta
+	(cd $testroot/wt && got commit -m 'change zeta' >/dev/null)
+	local commit3=`git_show_head $testroot/repo`
+	local author_time3=`git_show_author_time $testroot/repo newbranch`
+
+	(cd $testroot/wt && got update -b master > /dev/null)
+	(cd $testroot/wt && got merge newbranch > /dev/null)
+	local merge_commit=`git_show_head $testroot/repo`
+	local merge_time=`git_show_author_time $testroot/repo`
+
+	local short_commit0=`trim_obj_id 33 $commit0`
+	local short_commit1=`trim_obj_id 33 $commit1`
+	local short_commit2=`trim_obj_id 33 $commit2`
+	local short_commit3=`trim_obj_id 33 $commit3`
+
+	d_0=`date -u -r $author_time0 +"%G-%m-%d"`
+	d_1=`date -u -r $author_time1 +"%G-%m-%d"`
+	d_2=`date -u -r $author_time2 +"%G-%m-%d"`
+	d_3=`date -u -r $author_time3 +"%G-%m-%d"`
+	d_m=`date -u -r $merge_time +"%G-%m-%d"`
+
+	got log -r $testroot/repo -s -b > $testroot/stdout
+	cat > $testroot/stdout.expected <<EOF
+$d_m master  merge refs/heads/newbranch into refs/heads/master
+$d_1 $short_commit1 change alpha
+$d_3 newbranch change zeta
+$d_2 $short_commit2 change delta
+$d_0 $short_commit0 adding the test tree
+EOF
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+	fi
+	test_done "$testroot" "$ret"
+}
+
+
 test_parseargs "$@"
 run_test test_log_in_repo
 run_test test_log_in_bare_repo
@@ -1259,3 +1321,4 @@ run_test test_log_merge_commit_nonexistent_path
 run_test test_log_submodule
 run_test test_log_diffstat
 run_test test_log_commit_keywords
+run_test test_log_toposort

-----------------------------------------------
 
 add log -t option which enables topological sorting of commits
 
 Because the current implementation of toposort is expensive, add a
 flag which enables it. I would rather not have this option and just
 use toposort by default, however more work is required to achieve
 acceptable performance.
 
diff 849e12c1d2dd36d497ec579cefa88b1b2dc117c8 8bdc66bd9541af47082b26fe87471b965eb0e455
commit - 849e12c1d2dd36d497ec579cefa88b1b2dc117c8
commit + 8bdc66bd9541af47082b26fe87471b965eb0e455
blob - 0b7ff7916ba48b09d48cbc1c37cf75175316b469
blob + ee41732e49ededaf4c54218c0d0b9deb2c1feff9
--- got/got.1
+++ got/got.1
@@ -890,7 +890,7 @@ and gives no special significance to the location of p
 in a pattern.
 .It Xo
 .Cm log
-.Op Fl bdPpRs
+.Op Fl bdPpRst
 .Op Fl C Ar number
 .Op Fl c Ar commit
 .Op Fl l Ar N
@@ -1038,6 +1038,13 @@ Cannot be used together with the
 or
 .Fl P
 option.
+.It Fl t
+Display commits in topological order.
+This option has no effect without the
+.Fl b 
+option because a linear history is sorted in topological order by definition.
+Topological sorting is disabled by default because the present implementation
+requires that commit history is fully traversed before any output can be shown.
 .It Fl x Ar commit
 Stop traversing commit history immediately after the specified
 .Ar commit
blob - a776f0d370166a89c45161d249a492830e104ced
blob + 74e2fb65d552093211aa85ccb3eaaa5d77f81f6f
--- got/got.c
+++ got/got.c
@@ -4459,7 +4459,7 @@ print_commits(struct got_object_id *root_id, struct go
     struct got_repository *repo, const char *path, int show_changed_paths,
     int show_diffstat, int show_patch, const char *search_pattern,
     int diff_context, int limit, int log_branches, int reverse_display_order,
-    struct got_reflist_object_id_map *refs_idmap, int one_line,
+    struct got_reflist_object_id_map *refs_idmap, int one_line, int toposort,
     FILE *tmpfile)
 {
 	const struct got_error *err;
@@ -4481,7 +4481,7 @@ print_commits(struct got_object_id *root_id, struct go
 	err = got_commit_graph_open(&graph, path, !log_branches);
 	if (err)
 		return err;
-	if (log_branches) {
+	if (log_branches && toposort) {
 		err = got_commit_graph_toposort(graph, root_id, repo,
 		    check_cancelled, NULL);
 	} else {
@@ -4615,7 +4615,7 @@ done:
 __dead static void
 usage_log(void)
 {
-	fprintf(stderr, "usage: %s log [-bdPpRs] [-C number] [-c commit] "
+	fprintf(stderr, "usage: %s log [-bdPpRst] [-C number] [-c commit] "
 	    "[-l N] [-r repository-path] [-S search-pattern] [-x commit] "
 	    "[path]\n", getprogname());
 	exit(1);
@@ -4651,6 +4651,7 @@ cmd_log(int argc, char *argv[])
 	int diff_context = -1, ch;
 	int show_changed_paths = 0, show_patch = 0, limit = 0, log_branches = 0;
 	int show_diffstat = 0, reverse_display_order = 0, one_line = 0;
+	int toposort = 0;
 	const char *errstr;
 	struct got_reflist_head refs;
 	struct got_reflist_object_id_map *refs_idmap = NULL;
@@ -4668,7 +4669,7 @@ cmd_log(int argc, char *argv[])
 
 	limit = get_default_log_limit();
 
-	while ((ch = getopt(argc, argv, "bC:c:dl:PpRr:S:sx:")) != -1) {
+	while ((ch = getopt(argc, argv, "bC:c:dl:PpRr:S:stx:")) != -1) {
 		switch (ch) {
 		case 'b':
 			log_branches = 1;
@@ -4714,6 +4715,9 @@ cmd_log(int argc, char *argv[])
 		case 's':
 			one_line = 1;
 			break;
+		case 't':
+			toposort = 1;
+			break;
 		case 'x':
 			end_commit = optarg;
 			break;
@@ -4880,7 +4884,7 @@ cmd_log(int argc, char *argv[])
 	error = print_commits(start_id, end_id, repo, path ? path : "",
 	    show_changed_paths, show_diffstat, show_patch, search_pattern,
 	    diff_context, limit, log_branches, reverse_display_order,
-	    refs_idmap, one_line, tmpfile);
+	    refs_idmap, one_line, toposort, tmpfile);
 done:
 	free(path);
 	free(repo_path);

-----------------------------------------------
 
 add rebase -t option which enables topological sorting of commits
 
 Running rebase with -t fixes a problematic case of spurious conflicts
 encountered by naddy@ on landry's firefox package git repository.
 
 Because the current implementation of toposort is expensive, add a
 flag which enables it. I would rather not have this option and just
 use toposort by default, however more work is required to achieve
 acceptable performance.
 
diff 8bdc66bd9541af47082b26fe87471b965eb0e455 b22cdb3e691c06335203b754af1796a467df2d5e
commit - 8bdc66bd9541af47082b26fe87471b965eb0e455
commit + b22cdb3e691c06335203b754af1796a467df2d5e
blob - 88729baf00e38ec442bf51dbd5f42196934e953a
blob + 93b12cd6d160b9b15d26706c00de377d49219f15
--- cvg/cvg.c
+++ cvg/cvg.c
@@ -2066,7 +2066,7 @@ check_linear_ancestry(struct got_object_id *commit_id,
 	struct got_object_id *yca_id;
 
 	err = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-	    commit_id, base_commit_id, 1, repo, check_cancelled, NULL);
+	    commit_id, base_commit_id, 1, 0, repo, check_cancelled, NULL);
 	if (err)
 		return err;
 
blob - ee41732e49ededaf4c54218c0d0b9deb2c1feff9
blob + 1f45eda9d978cbf5070f843b6e981b5c212c23ce
--- got/got.1
+++ got/got.1
@@ -2711,7 +2711,7 @@ This option cannot be used with
 .Tg rb
 .It Xo
 .Cm rebase
-.Op Fl aCclX
+.Op Fl aCcltX
 .Op Ar branch
 .Xc
 .Dl Pq alias: Cm rb
@@ -2903,6 +2903,16 @@ If this option is used,
 does not require a work tree.
 None of the other options can be used together with
 .Fl l .
+.It Fl t
+Traverse commit history in topological order when seaching for the common
+ancestor commit.
+Topological sorting can avoid spurious merge conflicts in case a merge commit
+exists in the history of the branch which was set with
+.Cm got update -b
+before the rebase operation was started.
+Topological sorting is disabled by default because the present implementation
+requires that commit history is fully traversed before the rebase operation
+can do any work.
 .It Fl X
 Delete backups created by past rebase operations, represented by references
 in the
blob - 74e2fb65d552093211aa85ccb3eaaa5d77f81f6f
blob + 4a610d20dc706aca0b5784cbb7055ba38c6e7234
--- got/got.c
+++ got/got.c
@@ -2891,7 +2891,7 @@ check_linear_ancestry(struct got_object_id *commit_id,
 	struct got_object_id *yca_id;
 
 	err = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-	    commit_id, base_commit_id, 1, repo, check_cancelled, NULL);
+	    commit_id, base_commit_id, 1, 0, repo, check_cancelled, NULL);
 	if (err)
 		return err;
 
@@ -10635,7 +10635,7 @@ done:
 __dead static void
 usage_rebase(void)
 {
-	fprintf(stderr, "usage: %s rebase [-aCclX] [branch]\n", getprogname());
+	fprintf(stderr, "usage: %s rebase [-aCcltX] [branch]\n", getprogname());
 	exit(1);
 }
 
@@ -11020,7 +11020,7 @@ print_backup_ref(const char *branch_name, const char *
 		goto done;
 
 	err = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-	    old_commit_id, new_commit_id, 1, repo, check_cancelled, NULL);
+	    old_commit_id, new_commit_id, 1, 0, repo, check_cancelled, NULL);
 	if (err)
 		goto done;
 
@@ -11268,7 +11268,7 @@ cmd_rebase(int argc, char *argv[])
 	int ch, rebase_in_progress = 0, abort_rebase = 0, continue_rebase = 0;
 	int histedit_in_progress = 0, merge_in_progress = 0;
 	int create_backup = 1, list_backups = 0, delete_backups = 0;
-	int allow_conflict = 0;
+	int allow_conflict = 0, toposort = 0;
 	struct got_object_id_queue commits;
 	struct got_pathlist_head merged_paths;
 	const struct got_object_id_queue *parent_ids;
@@ -11286,7 +11286,7 @@ cmd_rebase(int argc, char *argv[])
 		err(1, "pledge");
 #endif
 
-	while ((ch = getopt(argc, argv, "aCclX")) != -1) {
+	while ((ch = getopt(argc, argv, "aCcltX")) != -1) {
 		switch (ch) {
 		case 'a':
 			abort_rebase = 1;
@@ -11300,6 +11300,9 @@ cmd_rebase(int argc, char *argv[])
 		case 'l':
 			list_backups = 1;
 			break;
+		case 't':
+			toposort = 1;
+			break;
 		case 'X':
 			delete_backups = 1;
 			break;
@@ -11500,8 +11503,8 @@ cmd_rebase(int argc, char *argv[])
 		}
 
 		error = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-		    base_commit_id, branch_head_commit_id, 1, repo,
-		    check_cancelled, NULL);
+		    base_commit_id, branch_head_commit_id, !toposort, toposort,
+		    repo, check_cancelled, NULL);
 		if (error) {
 			if (error->code == GOT_ERR_ANCESTRY) {
 				error = got_error_msg(GOT_ERR_ANCESTRY,
@@ -13510,7 +13513,7 @@ cmd_merge(int argc, char *argv[])
 	}
 
 	error = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-	    wt_branch_tip, branch_tip, 0, repo,
+	    wt_branch_tip, branch_tip, 0, 0, repo,
 	    check_cancelled, NULL);
 	if (error && error->code != GOT_ERR_ANCESTRY)
 		goto done;
blob - 35d7c7072f5ce67648f7f2c4a878396746ad3e45
blob + a425d8d817ab860cb50f4d849945ba8fe207c0fd
--- include/got_commit_graph.h
+++ include/got_commit_graph.h
@@ -34,4 +34,4 @@ const struct got_error *got_commit_graph_intersect(str
 /* Find the youngest common ancestor of two commits. */
 const struct got_error *got_commit_graph_find_youngest_common_ancestor(
     struct got_object_id **, struct got_object_id *, struct got_object_id *,
-    int, struct got_repository *, got_cancel_cb, void *);
+    int, int, struct got_repository *, got_cancel_cb, void *);
blob - 976b20aab20831ca6c66bb5793542bf438ec1688
blob + 3748c660e7ad27e0b645825b852620a1a041ed71
--- lib/commit_graph.c
+++ lib/commit_graph.c
@@ -702,12 +702,14 @@ find_yca_add_id(struct got_object_id **yca_id, struct 
  * common ancestors.
  *
  * If first_parent_traversal is nonzero, only linear history is considered.
+ * If toposort is set then sort commits in topological order before
+ * traversing them.
  */
 const struct got_error *
 got_commit_graph_find_youngest_common_ancestor(struct got_object_id **yca_id,
     struct got_object_id *commit_id, struct got_object_id *commit_id2,
-    int first_parent_traversal,
-    struct got_repository *repo, got_cancel_cb cancel_cb, void *cancel_arg)
+    int first_parent_traversal, int toposort, struct got_repository *repo,
+    got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
 	struct got_commit_graph *graph = NULL, *graph2 = NULL;
@@ -728,16 +730,28 @@ got_commit_graph_find_youngest_common_ancestor(struct 
 	if (err)
 		goto done;
 
-	err = got_commit_graph_iter_start(graph, commit_id, repo,
-	    cancel_cb, cancel_arg);
-	if (err)
-		goto done;
+	if (toposort) {
+		err = got_commit_graph_toposort(graph, commit_id, repo,
+		    cancel_cb, cancel_arg);
+		if (err)
+			goto done;
 
-	err = got_commit_graph_iter_start(graph2, commit_id2, repo,
-	    cancel_cb, cancel_arg);
-	if (err)
-		goto done;
+		err = got_commit_graph_toposort(graph2, commit_id2, repo,
+		    cancel_cb, cancel_arg);
+		if (err)
+			goto done;
+	} else {
+		err = got_commit_graph_iter_start(graph, commit_id, repo,
+		    cancel_cb, cancel_arg);
+		if (err)
+			goto done;
 
+		err = got_commit_graph_iter_start(graph2, commit_id2, repo,
+		    cancel_cb, cancel_arg);
+		if (err)
+			goto done;
+	}
+
 	for (;;) {
 		if (cancel_cb) {
 			err = (*cancel_cb)(cancel_arg);
blob - 681905dc26b73d6a41382533ca6a87a306c8a240
blob + b6be87a769c4d7440f941e66aabb66e589e19a77
--- lib/send.c
+++ lib/send.c
@@ -226,7 +226,7 @@ check_common_ancestry(const char *refname, struct got_
 		    "bad object type on server for %s", refname);
 
 	err = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-	    my_id, their_id, 0, repo, cancel_cb, cancel_arg);
+	    my_id, their_id, 0, 0, repo, cancel_cb, cancel_arg);
 	if (err)
 		return err;
 	if (yca_id == NULL)
blob - 46f11f7c161db17549ba3c6ba4055ffcc81b97ce
blob + 1af0c70b3ab152b32f52bbe7f1d45ecf2b38daba
--- regress/cmdline/rebase.sh
+++ regress/cmdline/rebase.sh
@@ -2236,8 +2236,7 @@ test_rebase_merged_history_traversal() {
 	local commit1=`git_show_branch_head $testroot/repo master`
 
 	(cd $testroot/wt && got branch newbranch2) >/dev/null
-	(cd $testroot/wt && got rebase newbranch1) > $testroot/stdout \
-		2> $testroot/stderr
+	(cd $testroot/wt && got rebase -t newbranch1) > $testroot/stdout
 
 	echo "Forwarding refs/heads/newbranch1 to commit $commit1" > \
 		$testroot/stdout.expected
@@ -2245,8 +2244,9 @@ test_rebase_merged_history_traversal() {
 		>> $testroot/stdout.expected
 
 	if ! cmp -s $testroot/stdout.expected $testroot/stdout; then
-		#diff -u $testroot/stdout.expected $testroot/stdout
-		ret="xfail ($(head -n1 $testroot/stdout))"
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "1"
+		return 1
 	fi
 
 	test_done "$testroot" "$ret"