"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: Landry's firefox repository
To:
Christian Weisgerber <naddy@mips.inka.de>, gameoftrees@openbsd.org
Date:
Wed, 27 Mar 2024 11:17:32 +0100

Download raw body.

Thread
On Tue, Mar 26, 2024 at 03:02:54PM +0100, Stefan Sperling wrote:
> On Tue, Mar 26, 2024 at 02:53:22PM +0100, Stefan Sperling wrote:
> > On Wed, Jan 31, 2024 at 07:27:10PM +0100, Christian Weisgerber wrote:
> > > Stefan Sperling:
> > > 
> > > > > That merge commit de0fe38 prevents rebasing "master" on "origin/master".
> > > > 
> > > > Does "master" above mean commit 9cd4661 (HEAD -> master)?
> > > 
> > > Yes.
> > 
> > Below is a series of 3 patches which fix this problem by teaching the
> > commit graph about topological sort order.
> 
> The changes I sent were not nicely split up, mixing unrelated got.c
> and regression test changes into the first commit.
> 
> This version should be easier to review:

Here is another version which avoids the need to add a new option
to 'got rebase'. Instead 'got rebase' checks whether a merge commit
is present in the new base branch and then searches for a better
youngest common ancestor using toposort, taking the merged commits
into account. This means running 'got rebase' should "just work".

The price we pay is some additional run-time overhead. But I am happy
to let rebase run a bit slower if this means we can avoid needless
conflicts the user will have to figure out what to do with.

The 'got log' command still gets the new -t option however.

ok?

-----------------------------------------------
 add support for topological sorting to the commit graph
 
 The algorithm implemented here is based on a description I read
 on github's blog. See code comments for details.
 
diff 38e11cc05b40eb2d4fe81868dccdf2c59494efa4 d82c03f714cc334d12659765031a962184178f44
commit - 38e11cc05b40eb2d4fe81868dccdf2c59494efa4
commit + d82c03f714cc334d12659765031a962184178f44
blob - 9f0a8aa211fe8d5dd0defcc6fe994cafada284c3
blob + 35d7c7072f5ce67648f7f2c4a878396746ad3e45
--- include/got_commit_graph.h
+++ include/got_commit_graph.h
@@ -23,6 +23,8 @@ void got_commit_graph_close(struct got_commit_graph *)
 const struct got_error *got_commit_graph_iter_start(
     struct got_commit_graph *, struct got_object_id *, struct got_repository *,
     got_cancel_cb, void *);
+const struct got_error *got_commit_graph_toposort(struct got_commit_graph *,
+    struct got_object_id *, struct got_repository *, got_cancel_cb, void *);
 const struct got_error *got_commit_graph_iter_next(struct got_object_id *,
     struct got_commit_graph *, struct got_repository *, got_cancel_cb, void *);
 const struct got_error *got_commit_graph_intersect(struct got_object_id **,
blob - 514ea5bf403b3a7ac375edcf405ab96bcd94cb20
blob + 976b20aab20831ca6c66bb5793542bf438ec1688
--- lib/commit_graph.c
+++ lib/commit_graph.c
@@ -38,10 +38,21 @@
 #include "got_lib_inflate.h"
 #include "got_lib_object.h"
 #include "got_lib_object_idset.h"
+#include "got_lib_object_qid.h"
 
+#ifndef nitems
+#define nitems(_a)	(sizeof((_a)) / sizeof((_a)[0]))
+#endif
+
 struct got_commit_graph_node {
 	struct got_object_id id;
 
+	/* Used for topological sorting. */
+	struct got_commit_graph_node *parents[2];
+	struct got_commit_graph_node **more_parents;
+	int nparents;
+	int indegree;
+
 	/* Used only during iteration. */
 	time_t timestamp;
 	TAILQ_ENTRY(got_commit_graph_node) entry;
@@ -61,6 +72,7 @@ struct got_commit_graph {
 
 	int flags;
 #define GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL		0x01
+#define GOT_COMMIT_GRAPH_TOPOSORT			0x02
 
 	/*
 	 * A set of object IDs of known parent commits which we have not yet
@@ -88,7 +100,8 @@ struct got_commit_graph {
 
 	/*
 	 * Nodes which will be passed to the API user next, sorted by
-	 * commit timestmap.
+	 * commit timestamp. Sorted in topological order only if topological
+	 * sorting was requested.
 	 */
 	struct got_commit_graph_iter_list iter_list;
 };
@@ -181,7 +194,8 @@ add_node(struct got_commit_graph_node **new_node,
 		return got_error_from_errno("calloc");
 
 	memcpy(&node->id, commit_id, sizeof(node->id));
-	err = got_object_idset_add(graph->node_ids, &node->id, NULL);
+	node->nparents = -1;
+	err = got_object_idset_add(graph->node_ids, &node->id, node);
 	if (err)
 		free(node);
 	else
@@ -547,6 +561,7 @@ got_commit_graph_close(struct got_commit_graph *graph)
 
 	while ((node = TAILQ_FIRST(&graph->iter_list))) {
 		TAILQ_REMOVE(&graph->iter_list, node, entry);
+		free(node->more_parents);
 		free(node);
 	}
 
@@ -575,6 +590,9 @@ got_commit_graph_iter_start(struct got_commit_graph *g
 	const struct got_error *err = NULL;
 	struct got_commit_graph_node *node;
 
+	/* First-parent traversal is implicitly topological. */
+	graph->flags &= ~GOT_COMMIT_GRAPH_TOPOSORT;
+
 	/* Clear left-over state from previous iteration attempts. */
 	while ((node = TAILQ_FIRST(&graph->iter_list)))
 		TAILQ_REMOVE(&graph->iter_list, node, entry);
@@ -615,7 +633,8 @@ got_commit_graph_iter_next(struct got_object_id *id,
     got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
-	struct got_commit_graph_node *node;
+	struct got_commit_graph_node *node, *pnode;
+	int i;
 
 	node = TAILQ_FIRST(&graph->iter_list);
 	if (node == NULL) {
@@ -623,17 +642,33 @@ got_commit_graph_iter_next(struct got_object_id *id,
 		return got_error(GOT_ERR_ITER_COMPLETED);
 	}
 
-	while (TAILQ_NEXT(node, entry) == NULL &&
-	    got_object_idset_num_elements(graph->open_branches) > 0) {
-		err = fetch_commits_from_open_branches(graph, repo,
-		    cancel_cb, cancel_arg);
-		if (err)
-			return err;
+	if (graph->flags & GOT_COMMIT_GRAPH_TOPOSORT) {
+		/* At least one node with in-degree zero must exist. */
+		while (node->indegree != 0)
+			node = TAILQ_NEXT(node, entry);
+	} else {
+		while (TAILQ_NEXT(node, entry) == NULL &&
+		    got_object_idset_num_elements(graph->open_branches) > 0) {
+			err = fetch_commits_from_open_branches(graph, repo,
+			    cancel_cb, cancel_arg);
+			if (err)
+				return err;
+		}
 	}
 
 	memcpy(id, &node->id, sizeof(*id));
 
 	TAILQ_REMOVE(&graph->iter_list, node, entry);
+	if (graph->flags & GOT_COMMIT_GRAPH_TOPOSORT) {
+		/* When visiting a commit decrement in-degree of all parents. */
+		for (i = 0; i < node->nparents; i++) {
+			if (i < nitems(node->parents))
+				pnode = node->parents[i];
+			else
+				pnode = node->more_parents[i];
+			pnode->indegree--;
+		}
+	}
 	free(node);
 	return NULL;
 }
@@ -749,3 +784,164 @@ done:
 		got_commit_graph_close(graph2);
 	return err;
 }
+
+/*
+ * Sort the graph for traversal in topological order.
+ *
+ * This implementation is based on the description of topological sorting
+ * of git commits by Derrick Stolee at
+ * https://github.blog/2022-08-30-gits-database-internals-ii-commit-history-queries/#topological-sorting
+ * which reads as follows:
+ *
+ * The basic algorithm for topological sorting is Kahn’s algorithm which
+ * follows two big steps:
+ * 1. Walk all reachable commits, counting the number of times a commit appears
+ * as a parent of another commit. Call these numbers the in-degree of the
+ * commit, referencing the number of incoming edges.
+ * 2. Walk the reachable commits, but only visit a commit if its in-degree
+ * value is zero. When visiting a commit, decrement the in-degree value of
+ * each parent.
+ * 
+ * This algorithm works because at least one of our starting points will
+ * have in-degree zero, and then decrementing the in-degree value is similar
+ * to deleting the commit from the graph, always having at least one commit
+ * with in-degree zero.
+ */
+const struct got_error *
+got_commit_graph_toposort(struct got_commit_graph *graph,
+    struct got_object_id *id, struct got_repository *repo,
+    got_cancel_cb cancel_cb, void *cancel_arg)
+{
+	const struct got_error *err = NULL;
+	struct got_commit_graph_node *node = NULL, *pnode = NULL;
+	struct got_commit_object *commit = NULL;
+	struct got_object_id_queue commits;
+	const struct got_object_id_queue *parent_ids;
+	struct got_object_qid *qid = NULL, *pid;
+	int i;
+
+	STAILQ_INIT(&commits);
+
+	if (graph->flags & GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL)
+		return got_commit_graph_iter_start(graph, id, repo,
+		    cancel_cb, cancel_arg);
+
+	/* Clear left-over state from previous iteration attempts. */
+	while ((node = TAILQ_FIRST(&graph->iter_list)))
+		TAILQ_REMOVE(&graph->iter_list, node, entry);
+	err = got_object_idset_for_each(graph->open_branches,
+	    remove_branch_tip, graph->open_branches);
+	if (err)
+		return err;
+
+	graph->flags |= GOT_COMMIT_GRAPH_TOPOSORT;
+
+	/*
+	 * Sorting the commit graph in topological order requires visiting
+	 * every reachable commit. This is very expensive but there are
+	 * ways to speed this up significantly in the future:
+	 * 1) Run this loop in got-read-pack if possible.
+	 * 2) Use Git's commit-graph file to compute the result incrementally.
+	 *    See the blog post linked above for details.
+	 */
+	err = got_object_qid_alloc_partial(&qid);
+	if (err)
+		return err;
+	memcpy(&qid->id, id, sizeof(qid->id));
+	STAILQ_INSERT_TAIL(&commits, qid, entry);
+	while (!STAILQ_EMPTY(&commits)) {
+		if (cancel_cb) {
+			err = (*cancel_cb)(cancel_arg);
+			if (err)
+				break;
+		}
+
+		qid = STAILQ_FIRST(&commits);
+		STAILQ_REMOVE_HEAD(&commits, entry);
+		err = got_object_open_as_commit(&commit, repo, &qid->id);
+		if (err)
+			break;
+
+		node = got_object_idset_get(graph->node_ids, &qid->id);
+		if (node == NULL) {
+			err = add_node(&node, graph, id, repo);
+			if (err)
+				break;
+			TAILQ_INSERT_TAIL(&graph->iter_list, node, entry);
+		}
+
+		got_object_qid_free(qid);
+		qid = NULL;
+
+		if (node->timestamp != 0) /* already traversed once */
+			continue;
+
+		if (node->nparents == -1) {
+			node->nparents = got_object_commit_get_nparents(commit);
+			if (node->nparents > nitems(node->parents)) {
+				node->more_parents = calloc(node->nparents,
+				    sizeof(*node->more_parents));
+				if (node->more_parents == NULL) {
+					err = got_error_from_errno("calloc");
+					break;
+				}
+			}
+
+		}
+
+		node->timestamp = got_object_commit_get_committer_time(commit);
+		parent_ids = got_object_commit_get_parent_ids(commit);
+		i = 0;
+		STAILQ_FOREACH(pid, parent_ids, entry) {
+			if (cancel_cb) {
+				err = (*cancel_cb)(cancel_arg);
+				if (err)
+					goto done;
+			}
+
+			/*
+			 * Increment the in-degree counter every time a given
+			 * commit appears as the parent of another commit.
+			 */
+			pnode = got_object_idset_get(graph->node_ids, &pid->id);
+			if (pnode == NULL) {
+				err = add_node(&pnode, graph, &pid->id, repo);
+				if (err)
+					goto done;
+				TAILQ_INSERT_TAIL(&graph->iter_list, pnode,
+				    entry);
+			}
+			pnode->indegree++;
+
+			/*
+			 * Cache parent pointers on the node to make future
+			 * in-degree updates easier.
+			 */
+			if (node->nparents <= nitems(node->parents)) {
+				node->parents[i] = pnode;
+			} else {
+				node->more_parents[i] = pnode;
+				if (i < nitems(node->parents))
+					node->parents[i] = pnode;
+			}
+			i++;
+
+			/* Keep traversing through all parent commits. */
+			err = got_object_qid_alloc_partial(&qid);
+			if (err)
+				goto done;
+			memcpy(&qid->id, &pid->id, sizeof(qid->id));
+			STAILQ_INSERT_TAIL(&commits, qid, entry);
+			qid = NULL;
+		}
+
+		got_object_commit_close(commit);
+		commit = NULL;
+	}
+done:
+	if (commit)
+		got_object_commit_close(commit);
+	got_object_qid_free(qid);
+	got_object_id_queue_free(&commits);
+	return err;
+}

-----------------------------------------------
 
 add log -t option which enables topological sorting of commits
 
 Because the current implementation of toposort is expensive, add a
 flag which enables it. I would rather not have this option and just
 use toposort by default, however more work is required to achieve
 acceptable performance.
 
diff d82c03f714cc334d12659765031a962184178f44 f8f9d1dba842acdb1157b10523896ac85f0c0da4
commit - d82c03f714cc334d12659765031a962184178f44
commit + f8f9d1dba842acdb1157b10523896ac85f0c0da4
blob - 0b7ff7916ba48b09d48cbc1c37cf75175316b469
blob + ee41732e49ededaf4c54218c0d0b9deb2c1feff9
--- got/got.1
+++ got/got.1
@@ -890,7 +890,7 @@ and gives no special significance to the location of p
 in a pattern.
 .It Xo
 .Cm log
-.Op Fl bdPpRs
+.Op Fl bdPpRst
 .Op Fl C Ar number
 .Op Fl c Ar commit
 .Op Fl l Ar N
@@ -1038,6 +1038,13 @@ Cannot be used together with the
 or
 .Fl P
 option.
+.It Fl t
+Display commits in topological order.
+This option has no effect without the
+.Fl b 
+option because a linear history is sorted in topological order by definition.
+Topological sorting is disabled by default because the present implementation
+requires that commit history is fully traversed before any output can be shown.
 .It Fl x Ar commit
 Stop traversing commit history immediately after the specified
 .Ar commit
blob - 1a1732a2994a8428496e498b70ebb81e222507aa
blob + 74e2fb65d552093211aa85ccb3eaaa5d77f81f6f
--- got/got.c
+++ got/got.c
@@ -4459,7 +4459,7 @@ print_commits(struct got_object_id *root_id, struct go
     struct got_repository *repo, const char *path, int show_changed_paths,
     int show_diffstat, int show_patch, const char *search_pattern,
     int diff_context, int limit, int log_branches, int reverse_display_order,
-    struct got_reflist_object_id_map *refs_idmap, int one_line,
+    struct got_reflist_object_id_map *refs_idmap, int one_line, int toposort,
     FILE *tmpfile)
 {
 	const struct got_error *err;
@@ -4481,8 +4481,13 @@ print_commits(struct got_object_id *root_id, struct go
 	err = got_commit_graph_open(&graph, path, !log_branches);
 	if (err)
 		return err;
-	err = got_commit_graph_iter_start(graph, root_id, repo,
-	    check_cancelled, NULL);
+	if (log_branches && toposort) {
+		err = got_commit_graph_toposort(graph, root_id, repo,
+		    check_cancelled, NULL);
+	} else {
+		err = got_commit_graph_iter_start(graph, root_id, repo,
+		    check_cancelled, NULL);
+	}
 	if (err)
 		goto done;
 	for (;;) {
@@ -4610,7 +4615,7 @@ done:
 __dead static void
 usage_log(void)
 {
-	fprintf(stderr, "usage: %s log [-bdPpRs] [-C number] [-c commit] "
+	fprintf(stderr, "usage: %s log [-bdPpRst] [-C number] [-c commit] "
 	    "[-l N] [-r repository-path] [-S search-pattern] [-x commit] "
 	    "[path]\n", getprogname());
 	exit(1);
@@ -4646,6 +4651,7 @@ cmd_log(int argc, char *argv[])
 	int diff_context = -1, ch;
 	int show_changed_paths = 0, show_patch = 0, limit = 0, log_branches = 0;
 	int show_diffstat = 0, reverse_display_order = 0, one_line = 0;
+	int toposort = 0;
 	const char *errstr;
 	struct got_reflist_head refs;
 	struct got_reflist_object_id_map *refs_idmap = NULL;
@@ -4663,7 +4669,7 @@ cmd_log(int argc, char *argv[])
 
 	limit = get_default_log_limit();
 
-	while ((ch = getopt(argc, argv, "bC:c:dl:PpRr:S:sx:")) != -1) {
+	while ((ch = getopt(argc, argv, "bC:c:dl:PpRr:S:stx:")) != -1) {
 		switch (ch) {
 		case 'b':
 			log_branches = 1;
@@ -4709,6 +4715,9 @@ cmd_log(int argc, char *argv[])
 		case 's':
 			one_line = 1;
 			break;
+		case 't':
+			toposort = 1;
+			break;
 		case 'x':
 			end_commit = optarg;
 			break;
@@ -4875,7 +4884,7 @@ cmd_log(int argc, char *argv[])
 	error = print_commits(start_id, end_id, repo, path ? path : "",
 	    show_changed_paths, show_diffstat, show_patch, search_pattern,
 	    diff_context, limit, log_branches, reverse_display_order,
-	    refs_idmap, one_line, tmpfile);
+	    refs_idmap, one_line, toposort, tmpfile);
 done:
 	free(path);
 	free(repo_path);
blob - 4a83ec7819b07c9e4a689fc50177a7b979abbbf9
blob + c3e22d2178b4eebd38723079274f19c9e8b9d2d5
--- regress/cmdline/log.sh
+++ regress/cmdline/log.sh
@@ -1241,6 +1241,68 @@ test_log_commit_keywords() {
 	test_done "$testroot" "$ret"
 }
 
+test_log_toposort() {
+	local testroot=`test_init log_toposort`
+	local commit0=`git_show_head $testroot/repo`
+	local author_time0=`git_show_author_time $testroot/repo`
+
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	echo aaa > $testroot/wt/alpha
+	(cd $testroot/wt && got commit -m 'change alpha' >/dev/null)
+	local commit1=`git_show_head $testroot/repo`
+	local author_time1=`git_show_author_time $testroot/repo`
+
+	got branch -r $testroot/repo -c $commit0 newbranch
+	(cd $testroot/wt && got update -b newbranch > /dev/null)
+	echo ddd > $testroot/wt/gamma/delta
+	(cd $testroot/wt && got commit -m 'change delta' >/dev/null)
+	local commit2=`git_show_branch_head $testroot/repo newbranch`
+	local author_time2=`git_show_author_time $testroot/repo newbranch`
+
+	echo zzz > $testroot/wt/epsilon/zeta
+	(cd $testroot/wt && got commit -m 'change zeta' >/dev/null)
+	local commit3=`git_show_head $testroot/repo`
+	local author_time3=`git_show_author_time $testroot/repo newbranch`
+
+	(cd $testroot/wt && got update -b master > /dev/null)
+	(cd $testroot/wt && got merge newbranch > /dev/null)
+	local merge_commit=`git_show_head $testroot/repo`
+	local merge_time=`git_show_author_time $testroot/repo`
+
+	local short_commit0=`trim_obj_id 33 $commit0`
+	local short_commit1=`trim_obj_id 33 $commit1`
+	local short_commit2=`trim_obj_id 33 $commit2`
+	local short_commit3=`trim_obj_id 33 $commit3`
+
+	d_0=`date -u -r $author_time0 +"%G-%m-%d"`
+	d_1=`date -u -r $author_time1 +"%G-%m-%d"`
+	d_2=`date -u -r $author_time2 +"%G-%m-%d"`
+	d_3=`date -u -r $author_time3 +"%G-%m-%d"`
+	d_m=`date -u -r $merge_time +"%G-%m-%d"`
+
+	got log -r $testroot/repo -s -b -t > $testroot/stdout
+	cat > $testroot/stdout.expected <<EOF
+$d_m master  merge refs/heads/newbranch into refs/heads/master
+$d_1 $short_commit1 change alpha
+$d_3 newbranch change zeta
+$d_2 $short_commit2 change delta
+$d_0 $short_commit0 adding the test tree
+EOF
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+	fi
+	test_done "$testroot" "$ret"
+}
+
+
 test_parseargs "$@"
 run_test test_log_in_repo
 run_test test_log_in_bare_repo
@@ -1259,3 +1321,4 @@ run_test test_log_merge_commit_nonexistent_path
 run_test test_log_submodule
 run_test test_log_diffstat
 run_test test_log_commit_keywords
+run_test test_log_toposort

-----------------------------------------------
 
 make 'got rebase' find a merge base with topological sorting if needed
 
 Fixes a problematic case of spurious conflicts encountered by
 naddy@ on landry's firefox package git repository.
 
 The current implementation of toposort is expensive, so this might
 make rebase appear to run slowly on large repositories. However,
 this is better than letting users deal with spurious conflitcs.
 
diff f8f9d1dba842acdb1157b10523896ac85f0c0da4 63511d623e317835bb2bbace5e6fbfdf29e6d237
commit - f8f9d1dba842acdb1157b10523896ac85f0c0da4
commit + 63511d623e317835bb2bbace5e6fbfdf29e6d237
blob - 88729baf00e38ec442bf51dbd5f42196934e953a
blob + 93b12cd6d160b9b15d26706c00de377d49219f15
--- cvg/cvg.c
+++ cvg/cvg.c
@@ -2066,7 +2066,7 @@ check_linear_ancestry(struct got_object_id *commit_id,
 	struct got_object_id *yca_id;
 
 	err = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-	    commit_id, base_commit_id, 1, repo, check_cancelled, NULL);
+	    commit_id, base_commit_id, 1, 0, repo, check_cancelled, NULL);
 	if (err)
 		return err;
 
blob - 74e2fb65d552093211aa85ccb3eaaa5d77f81f6f
blob + 3ccb50b750cbf5628d56d7f361fdd88e54426fd6
--- got/got.c
+++ got/got.c
@@ -2891,7 +2891,7 @@ check_linear_ancestry(struct got_object_id *commit_id,
 	struct got_object_id *yca_id;
 
 	err = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-	    commit_id, base_commit_id, 1, repo, check_cancelled, NULL);
+	    commit_id, base_commit_id, 1, 0, repo, check_cancelled, NULL);
 	if (err)
 		return err;
 
@@ -11020,7 +11020,7 @@ print_backup_ref(const char *branch_name, const char *
 		goto done;
 
 	err = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-	    old_commit_id, new_commit_id, 1, repo, check_cancelled, NULL);
+	    old_commit_id, new_commit_id, 1, 0, repo, check_cancelled, NULL);
 	if (err)
 		goto done;
 
@@ -11250,6 +11250,63 @@ abort_progress(void *arg, unsigned char status, const 
 }
 
 static const struct got_error *
+find_merge_commit_yca(struct got_object_id **new_yca_id,
+    struct got_object_id *branch_head_commit_id,
+    struct got_object_id *yca_id,
+    struct got_object_id *base_commit_id,
+    struct got_repository *repo)
+{
+	const struct got_error *err = NULL;
+	struct got_commit_graph *graph = NULL;
+	struct got_commit_object *commit = NULL;
+
+	*new_yca_id = NULL;
+
+	err = got_commit_graph_open(&graph, "/", 1);
+	if (err)
+		return err;
+
+	err = got_commit_graph_iter_start(graph, base_commit_id,
+	    repo, check_cancelled, NULL);
+	if (err)
+		goto done;
+
+	for (;;) {
+		struct got_object_id id;
+
+		err = got_commit_graph_iter_next(&id, graph, repo,
+		    check_cancelled, NULL);
+		if (err) {
+			if (err->code == GOT_ERR_ITER_COMPLETED)
+				err = NULL;
+			break;
+		}
+
+		err = got_object_open_as_commit(&commit, repo, &id);
+		if (err)
+			break;
+
+		if (got_object_commit_get_nparents(commit) > 1) {
+			/* Search for a better YCA using toposort. */
+			err = got_commit_graph_find_youngest_common_ancestor(
+			    new_yca_id, base_commit_id, branch_head_commit_id,
+			    0, 1, repo, check_cancelled, NULL);
+			break;
+		}
+
+		if (got_object_id_cmp(&id, yca_id) == 0)
+			break;
+		got_object_commit_close(commit);
+		commit = NULL;
+	}
+done:
+	got_commit_graph_close(graph);
+	if (commit)
+		got_object_commit_close(commit);
+	return err;
+}
+
+static const struct got_error *
 cmd_rebase(int argc, char *argv[])
 {
 	const struct got_error *error = NULL;
@@ -11500,8 +11557,8 @@ cmd_rebase(int argc, char *argv[])
 		}
 
 		error = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-		    base_commit_id, branch_head_commit_id, 1, repo,
-		    check_cancelled, NULL);
+		    base_commit_id, branch_head_commit_id, 1, 0,
+		    repo, check_cancelled, NULL);
 		if (error) {
 			if (error->code == GOT_ERR_ANCESTRY) {
 				error = got_error_msg(GOT_ERR_ANCESTRY,
@@ -11511,6 +11568,24 @@ cmd_rebase(int argc, char *argv[])
 			goto done;
 		}
 
+		/*
+		 * If a merge commit appears between the new base branch tip
+		 * and a YCA found via first-parent traversal then we might
+		 * find a better YCA using topologically sorted commits.
+		 */
+		if (got_object_id_cmp(base_commit_id, yca_id) != 0) {
+			struct got_object_id *better_yca_id;
+			error = find_merge_commit_yca(&better_yca_id,
+			    branch_head_commit_id, yca_id,
+			    base_commit_id, repo);
+			if (error)
+				goto done;
+			if (better_yca_id) {
+				free(yca_id);
+				yca_id = better_yca_id;
+			}
+		}
+
 		if (got_object_id_cmp(base_commit_id, yca_id) == 0) {
 			struct got_pathlist_head paths;
 			printf("%s is already based on %s\n",
@@ -13510,7 +13585,7 @@ cmd_merge(int argc, char *argv[])
 	}
 
 	error = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-	    wt_branch_tip, branch_tip, 0, repo,
+	    wt_branch_tip, branch_tip, 0, 0, repo,
 	    check_cancelled, NULL);
 	if (error && error->code != GOT_ERR_ANCESTRY)
 		goto done;
blob - 35d7c7072f5ce67648f7f2c4a878396746ad3e45
blob + a425d8d817ab860cb50f4d849945ba8fe207c0fd
--- include/got_commit_graph.h
+++ include/got_commit_graph.h
@@ -34,4 +34,4 @@ const struct got_error *got_commit_graph_intersect(str
 /* Find the youngest common ancestor of two commits. */
 const struct got_error *got_commit_graph_find_youngest_common_ancestor(
     struct got_object_id **, struct got_object_id *, struct got_object_id *,
-    int, struct got_repository *, got_cancel_cb, void *);
+    int, int, struct got_repository *, got_cancel_cb, void *);
blob - 976b20aab20831ca6c66bb5793542bf438ec1688
blob + 3748c660e7ad27e0b645825b852620a1a041ed71
--- lib/commit_graph.c
+++ lib/commit_graph.c
@@ -702,12 +702,14 @@ find_yca_add_id(struct got_object_id **yca_id, struct 
  * common ancestors.
  *
  * If first_parent_traversal is nonzero, only linear history is considered.
+ * If toposort is set then sort commits in topological order before
+ * traversing them.
  */
 const struct got_error *
 got_commit_graph_find_youngest_common_ancestor(struct got_object_id **yca_id,
     struct got_object_id *commit_id, struct got_object_id *commit_id2,
-    int first_parent_traversal,
-    struct got_repository *repo, got_cancel_cb cancel_cb, void *cancel_arg)
+    int first_parent_traversal, int toposort, struct got_repository *repo,
+    got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
 	struct got_commit_graph *graph = NULL, *graph2 = NULL;
@@ -728,16 +730,28 @@ got_commit_graph_find_youngest_common_ancestor(struct 
 	if (err)
 		goto done;
 
-	err = got_commit_graph_iter_start(graph, commit_id, repo,
-	    cancel_cb, cancel_arg);
-	if (err)
-		goto done;
+	if (toposort) {
+		err = got_commit_graph_toposort(graph, commit_id, repo,
+		    cancel_cb, cancel_arg);
+		if (err)
+			goto done;
 
-	err = got_commit_graph_iter_start(graph2, commit_id2, repo,
-	    cancel_cb, cancel_arg);
-	if (err)
-		goto done;
+		err = got_commit_graph_toposort(graph2, commit_id2, repo,
+		    cancel_cb, cancel_arg);
+		if (err)
+			goto done;
+	} else {
+		err = got_commit_graph_iter_start(graph, commit_id, repo,
+		    cancel_cb, cancel_arg);
+		if (err)
+			goto done;
 
+		err = got_commit_graph_iter_start(graph2, commit_id2, repo,
+		    cancel_cb, cancel_arg);
+		if (err)
+			goto done;
+	}
+
 	for (;;) {
 		if (cancel_cb) {
 			err = (*cancel_cb)(cancel_arg);
blob - 681905dc26b73d6a41382533ca6a87a306c8a240
blob + b6be87a769c4d7440f941e66aabb66e589e19a77
--- lib/send.c
+++ lib/send.c
@@ -226,7 +226,7 @@ check_common_ancestry(const char *refname, struct got_
 		    "bad object type on server for %s", refname);
 
 	err = got_commit_graph_find_youngest_common_ancestor(&yca_id,
-	    my_id, their_id, 0, repo, cancel_cb, cancel_arg);
+	    my_id, their_id, 0, 0, repo, cancel_cb, cancel_arg);
 	if (err)
 		return err;
 	if (yca_id == NULL)
blob - 46f11f7c161db17549ba3c6ba4055ffcc81b97ce
blob + 0239e75d3e0b84aa67d95700219ecda8782265ef
--- regress/cmdline/rebase.sh
+++ regress/cmdline/rebase.sh
@@ -2236,8 +2236,7 @@ test_rebase_merged_history_traversal() {
 	local commit1=`git_show_branch_head $testroot/repo master`
 
 	(cd $testroot/wt && got branch newbranch2) >/dev/null
-	(cd $testroot/wt && got rebase newbranch1) > $testroot/stdout \
-		2> $testroot/stderr
+	(cd $testroot/wt && got rebase newbranch1) > $testroot/stdout
 
 	echo "Forwarding refs/heads/newbranch1 to commit $commit1" > \
 		$testroot/stdout.expected
@@ -2245,8 +2244,9 @@ test_rebase_merged_history_traversal() {
 		>> $testroot/stdout.expected
 
 	if ! cmp -s $testroot/stdout.expected $testroot/stdout; then
-		#diff -u $testroot/stdout.expected $testroot/stdout
-		ret="xfail ($(head -n1 $testroot/stdout))"
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "1"
+		return 1
 	fi
 
 	test_done "$testroot" "$ret"