"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: Some random whining
To:
Christian Weisgerber <naddy@mips.inka.de>, gameoftrees@openbsd.org
Date:
Thu, 2 Sep 2021 22:54:01 +0200

Download raw body.

Thread
On Thu, Sep 02, 2021 at 10:24:15PM +0200, Stefan Sperling wrote:
> On Thu, Sep 02, 2021 at 09:11:29PM +0200, Stefan Sperling wrote:
> > On Thu, Sep 02, 2021 at 07:16:43PM +0200, Christian Weisgerber wrote:
> > > It's still fairly slow, though.
> > 
> > Needs profiling to dig deeper.
> 
> I found a way to speed it up a bit further.
> 
> The current code checks merge pre-conditions for all files in the work tree.
> We can instead only check pre-conditions on files which will be affected
> by the merge. This can be done by running the tree diff twice, once for
> checking pre-conditions, and then again for merging.
> In general, this will be faster than iterating over all files in the work
> tree and then diffing trees to do the merge. We use less stat(2) calls.
> And thanks to the object cache the second tree diff run will have less
> overhead than the first run.
> 
> As a side-effect this relaxes pre-condition checks a bit:
> Conflicts on files which will not be affected by 'got cherrypick' will
> now be ignored. I guess some would say that this is an improvement over
> the current behaviour where cherrypick is disallowed if _any_ file in
> the work tree contains conflicts. I would expect lingering merge conflicts
> to be common in large disparate trees such as a ports tree checkout.
> 
> This patch doesn't touch the test suite, and our tests are still passing.
> Ideally, when relaxing pre-conditions we should try to expand test coverage
> to cover cases which will no longer be caught upfront and might result in
> unexpected behaviour. Any ideas what we could test for? I have some ideas
> but would like to collect other people's ideas, too.

Here is an updated version of this patch.
The previous version had a bug where I reversed the mixed-revision check.
I am now including a new test case in the patch which checks for this issue.

Also, we can check the base commit ID on every file in the work tree.
This is cheap since we are loading the entire file index anyway and it
doesn't require any stat calls.
Checking the entire work tree for mixed-commits is also much safer:
Imagine an entire subdirectory having moved around in the repository.
A mixed-commit work tree might then contain the deleted side of the move,
but miss the added side; or vice versa. Should this move involve any of
the files being cherry-picked then the merge result could be quite messy.

diff 5546d4669ccce281a6f6f89ccf204e671ed0d3a9 /home/stsp/src/got
blob - 15441d50109add43dd64b4368c57680ec2884d50
file + got/got.1
--- got/got.1
+++ got/got.1
@@ -1615,7 +1615,7 @@ will refuse to run if certain preconditions are not me
 If the work tree contains multiple base commits it must first be updated
 to a single base commit with
 .Cm got update .
-If the work tree already contains files with merge conflicts, these
+If any relevant files already contain merge conflicts, these
 conflicts must be resolved first.
 .It Cm cy
 Short alias for
@@ -1659,7 +1659,7 @@ will refuse to run if certain preconditions are not me
 If the work tree contains multiple base commits it must first be updated
 to a single base commit with
 .Cm got update .
-If the work tree already contains files with merge conflicts, these
+If any relevant files already contain merge conflicts, these
 conflicts must be resolved first.
 .It Cm bo
 Short alias for
blob - 821074bee10bd2aec44fc5e2732eb33c5b157f6f
file + lib/worktree.c
--- lib/worktree.c
+++ lib/worktree.c
@@ -3118,26 +3118,48 @@ done:
 	return err;
 }
 
-struct check_merge_ok_arg {
+static const struct got_error *
+check_mixed_commits(void *arg, struct got_fileindex_entry *ie)
+{
+	struct got_worktree *worktree = arg;
+
+ 	/* Reject merges into a work tree with mixed base commits. */
+ 	if (got_fileindex_entry_has_commit(ie) &&
+	    memcmp(ie->commit_sha1, worktree->base_commit_id->sha1,
+	    SHA1_DIGEST_LENGTH) != 0)
+ 		return got_error(GOT_ERR_MIXED_COMMITS);
+
+	return NULL;
+}
+
+struct check_merge_conflicts_arg {
 	struct got_worktree *worktree;
+	struct got_fileindex *fileindex;
 	struct got_repository *repo;
 };
 
 static const struct got_error *
-check_merge_ok(void *arg, struct got_fileindex_entry *ie)
+check_merge_conflicts(void *arg, struct got_blob_object *blob1,
+    struct got_blob_object *blob2, struct got_object_id *id1,
+    struct got_object_id *id2, const char *path1, const char *path2,
+    mode_t mode1, mode_t mode2, struct got_repository *repo)
 {
 	const struct got_error *err = NULL;
-	struct check_merge_ok_arg *a = arg;
+	struct check_merge_conflicts_arg *a = arg;
 	unsigned char status;
 	struct stat sb;
+	struct got_fileindex_entry *ie;
+	const char *path = path2 ? path2 : path1;
+	struct got_object_id *id = id2 ? id2 : id1;
 	char *ondisk_path;
 
-	/* Reject merges into a work tree with mixed base commits. */
-	if (got_fileindex_entry_has_commit(ie) &&
-	    memcmp(ie->commit_sha1, a->worktree->base_commit_id->sha1,
-	    SHA1_DIGEST_LENGTH))
-		return got_error(GOT_ERR_MIXED_COMMITS);
+	if (id == NULL)
+		return NULL;
 
+	ie = got_fileindex_entry_get(a->fileindex, path, strlen(path));
+	if (ie == NULL)
+		return NULL;
+
 	if (asprintf(&ondisk_path, "%s/%s", a->worktree->root_path, ie->path)
 	    == -1)
 		return got_error_from_errno("asprintf");
@@ -3163,6 +3185,7 @@ merge_files(struct got_worktree *worktree, struct got_
 	const struct got_error *err = NULL, *sync_err;
 	struct got_object_id *tree_id1 = NULL, *tree_id2 = NULL;
 	struct got_tree_object *tree1 = NULL, *tree2 = NULL;
+	struct check_merge_conflicts_arg cmc_arg;
 	struct merge_file_cb_arg arg;
 	char *label_orig = NULL;
 
@@ -3201,6 +3224,14 @@ merge_files(struct got_worktree *worktree, struct got_
 	if (err)
 		goto done;
 
+	cmc_arg.worktree = worktree;
+	cmc_arg.fileindex = fileindex;
+	cmc_arg.repo = repo;
+	err = got_diff_tree(tree1, tree2, "", "", repo,
+	    check_merge_conflicts, &cmc_arg, 0);
+	if (err)
+		goto done;
+
 	arg.worktree = worktree;
 	arg.fileindex = fileindex;
 	arg.progress_cb = progress_cb;
@@ -3231,7 +3262,6 @@ got_worktree_merge_files(struct got_worktree *worktree
 	const struct got_error *err, *unlockerr;
 	char *fileindex_path = NULL;
 	struct got_fileindex *fileindex = NULL;
-	struct check_merge_ok_arg mok_arg;
 
 	err = lock_worktree(worktree, LOCK_EX);
 	if (err)
@@ -3241,10 +3271,8 @@ got_worktree_merge_files(struct got_worktree *worktree
 	if (err)
 		goto done;
 
-	mok_arg.worktree = worktree;
-	mok_arg.repo = repo;
-	err = got_fileindex_for_each_entry_safe(fileindex, check_merge_ok,
-	    &mok_arg);
+	err = got_fileindex_for_each_entry_safe(fileindex, check_mixed_commits,
+	    worktree);
 	if (err)
 		goto done;
 
blob - e38f0c962fdc4cd3c0ee01dba970d153b8fe6d2a
file + regress/cmdline/cherrypick.sh
--- regress/cmdline/cherrypick.sh
+++ regress/cmdline/cherrypick.sh
@@ -265,6 +265,66 @@ test_cherrypick_into_work_tree_with_conflicts() {
 	test_done "$testroot" "$ret"
 }
 
+test_cherrypick_into_work_tree_with_mixed_commits() {
+	local testroot=`test_init cherrypick_into_work_tree_with_mixed_commits`
+	local first_rev=`git_show_head $testroot/repo`
+
+	echo "modified alpha" > $testroot/repo/alpha
+	git_commit $testroot/repo -m "committing to alpha" 
+	local second_rev=`git_show_head $testroot/repo`
+
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	(cd $testroot/repo && git checkout -q -b newbranch)
+	echo "modified delta on branch" > $testroot/repo/gamma/delta
+	git_commit $testroot/repo -m "committing to delta on newbranch"
+
+	(cd $testroot/repo && git rm -q beta)
+	echo "new file on branch" > $testroot/repo/epsilon/new
+	(cd $testroot/repo && git add epsilon/new)
+	git_commit $testroot/repo -m "committing more changes on newbranch"
+
+	local branch_rev=`git_show_head $testroot/repo`
+
+	(cd $testroot/wt && got update -c $first_rev alpha >/dev/null)
+
+	(cd $testroot/wt && got cherrypick $branch_rev \
+		> $testroot/stdout 2> $testroot/stderr)
+	ret="$?"
+	if [ "$ret" = "0" ]; then
+		echo "cherrypick succeeded unexpectedly" >&2
+		test_done "$testroot" "1"
+		return 1
+	fi
+
+	echo -n > $testroot/stdout.expected
+	echo -n "got: work tree contains files from multiple base commits; " \
+		> $testroot/stderr.expected
+	echo "the entire work tree must be updated first" \
+		>> $testroot/stderr.expected
+
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	cmp -s $testroot/stderr.expected $testroot/stderr
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stderr.expected $testroot/stderr
+	fi
+	test_done "$testroot" "$ret"
+
+}
+
 test_cherrypick_modified_submodule() {
 	local testroot=`test_init cherrypick_modified_submodules`
 
@@ -1229,6 +1289,7 @@ test_parseargs "$@"
 run_test test_cherrypick_basic
 run_test test_cherrypick_root_commit
 run_test test_cherrypick_into_work_tree_with_conflicts
+run_test test_cherrypick_into_work_tree_with_mixed_commits
 run_test test_cherrypick_modified_submodule
 run_test test_cherrypick_added_submodule
 run_test test_cherrypick_conflict_wt_file_vs_repo_submodule