"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
got patch: merge patches with diff3
To:
gameoftrees@openbsd.org
Date:
Thu, 16 Jun 2022 00:35:31 +0200

Download raw body.

Thread
Turns out that plain diffs are not without their own shortcomings.  For
example, when multiple people are working on the same codebase often
happens that changes done by one person conflicts with changes done in
parallel by other people.

(I know this well since i'm making an habit of breaking stsp@ diffs
before he can even send them to the mailing list ;-)

Plain patches also have other issues and can produce wrong results in
some circumstances.  Long story short, a patch implementation can
guarantee to produce correct results only when the patch is applied
exactly to the same version of the file that the patch was generated
from.  (The CAVEATS and BUGS section of patch(1) gives good hints.)

Fortunately, in the scope of a version control system we can do better :)

`got diff' output includes some metadata about the blob id from which
the diff was generated.  The blob id indicates the exact version of the
file, so by using that as input of the patch machinery instead of the
current working revision we can guarantee a correct result.  Then we can
reuse the same mechanism used by 'got cherrypick' and 'got rebase' to
merge the files: diff3.  We have the file indicated by the blob id in
the patch text as the ancestor, the patched file as one of the
derivative and the current working revision as other derivative.

Pragmatically speaking, this reduces the times 'got patch' fails to
apply a diff turning many patch-incompatibilities either into a success
or a diff3 conflict.  QoL stuff.

(diff3 conflicts are also more correct than rej files since they should
correctly track the piece of the file with the conflict.)

Diff below is a first and simple implementation of this.  It modifies
got-read-patch to recognize 'got diff' blob ids and the patch machinery
to use the file indicated by the blob id as base, and then running the
diff3 merge.

There are various things that can be improved/added on top of this, but
I think they can be done later in-tree.  (I'm not trying to get this in
the 0.71 release by the way.)

Some possible improvements are:

 - more test coverage: the added tests are very basic;
 - requiring a perfect application (i.e. not even 'hunk applied with
   offset') when patching the blob;
 - requiring that a previous revision of the file effectively had
   associated that blob id (i.e. not trusting the blob id metadata)
 - extracting the same metadata from the output of git diff;
 - inspecting the file history: at least in theory, we should be able to
   apply a diff even after a file has been renamed; also, by doing a
   recursive merge we may be able to reduce the conflicts further maybe
   (i need to investigate on this)
 - providing a flag to use a specified commit tree as base when applying
   the diff and then merging the result with the current version of the
   files: this should allow to easily apply diffs generated by other
   means without conflicts;
 - we could even try to take the previous point to the extreme and
   traversing the history until we find a point where the patch applies
   cleanly.  probably not useful if not in extreme cases, but it's a fun
   thought experiment at least.

diff 1b09451fde509822695bf0c397fabe72e806d0bf /home/op/w/got-pd3
blob - 8ae16da40c07f4dcb3133f9ff08328a10605722b
file + include/got_error.h
--- include/got_error.h
+++ include/got_error.h
@@ -168,6 +168,7 @@
 #define GOT_ERR_HUNK_FAILED	150
 #define GOT_ERR_PATCH_FAILED	151
 #define GOT_ERR_FILEIDX_DUP_ENTRY 152
+#define GOT_ERR_PATCH_CONFLICT	153
 
 struct got_error {
         int code;
blob - 29541c0234b7f96a4638157fdd9017888571a76c
file + lib/error.c
--- lib/error.c
+++ lib/error.c
@@ -216,6 +216,7 @@ static const struct got_error got_errors[] = {
 	{ GOT_ERR_HUNK_FAILED, "hunk failed to apply" },
 	{ GOT_ERR_PATCH_FAILED, "patch failed to apply" },
 	{ GOT_ERR_FILEIDX_DUP_ENTRY, "duplicate file index entry" },
+	{ GOT_ERR_PATCH_CONFLICT, "got conflict applying the patch" },
 };
 
 static struct got_custom_error {
blob - af9557d85c3c38603367d54310b5f2a59d7b3b11
file + lib/got_lib_privsep.h
--- lib/got_lib_privsep.h
+++ lib/got_lib_privsep.h
@@ -613,6 +613,7 @@ struct got_imsg_patch {
 	int	git;
 	char	old[PATH_MAX];
 	char	new[PATH_MAX];
+	char	blob[41];
 };
 
 /*
blob - 265446beb14fbf9c793d2c81f1c81d92f1d05fe3
file + lib/patch.c
--- lib/patch.c
+++ lib/patch.c
@@ -42,10 +42,12 @@
 #include "got_reference.h"
 #include "got_cancel.h"
 #include "got_worktree.h"
+#include "got_repository.h"
 #include "got_opentemp.h"
 #include "got_patch.h"
 
 #include "got_lib_delta.h"
+#include "got_lib_diff.h"
 #include "got_lib_object.h"
 #include "got_lib_privsep.h"
 
@@ -70,6 +72,7 @@ STAILQ_HEAD(got_patch_hunk_head, got_patch_hunk);
 struct got_patch {
 	char	*old;
 	char	*new;
+	char	 blob[41];
 	struct got_patch_hunk_head head;
 };
 
@@ -180,11 +183,14 @@ recv_patch(struct imsgbuf *ibuf, int *done, struct got
 	memcpy(&patch, imsg.data, sizeof(patch));
 
 	if (patch.old[sizeof(patch.old)-1] != '\0' ||
-	    patch.new[sizeof(patch.new)-1] != '\0') {
+	    patch.new[sizeof(patch.new)-1] != '\0' ||
+	    patch.blob[sizeof(patch.blob)-1] != '\0') {
 		err = got_error(GOT_ERR_PRIVSEP_LEN);
 		goto done;
 	}
 
+	strlcpy(p->blob, patch.blob, sizeof(p->blob));
+
 	/* automatically set strip=1 for git-style diffs */
 	if (strip == -1 && patch.git &&
 	    (*patch.old == '\0' || !strncmp(patch.old, "a/", 2)) &&
@@ -570,18 +576,75 @@ patch_add(void *arg, unsigned char status, const char 
 }
 
 static const struct got_error *
+open_blob(char **path, FILE **fp, const char *blobid,
+    struct got_repository *repo)
+{
+	const struct got_error *err = NULL;
+	struct got_object_id *id = NULL;
+	struct got_blob_object *blob = NULL;
+
+	*fp = NULL;
+	*path = NULL;
+
+	err = got_repo_match_object_id(&id, NULL, blobid,
+	    GOT_OBJ_TYPE_BLOB, NULL /* do not resolve tags */,
+	    repo);
+	if (err)
+		return err;
+
+	err = got_object_open_as_blob(&blob, repo, id, 8192);
+	if (err)
+		goto done;
+
+	err = got_opentemp_named(path, fp, GOT_TMPDIR_STR "/got-patch-blob");
+	if (err)
+		goto done;
+
+	err = got_object_blob_dump_to_file(NULL, NULL, NULL, *fp, blob);
+	if (err)
+		goto done;
+
+done:
+	if (id)
+		free(id);
+	if (blob)
+		got_object_blob_close(blob);
+	if (err) {
+		if (*fp != NULL)
+			fclose(*fp);
+		if (*path != NULL)
+			unlink(*path);
+		free(*path);
+		*fp = NULL;
+		*path = NULL;
+	}
+	return err;
+}
+
+static const struct got_error *
 apply_patch(struct got_worktree *worktree, struct got_repository *repo,
     struct got_fileindex *fileindex, const char *old, const char *new,
     struct got_patch *p, int nop, struct patch_args *pa,
     got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
-	int file_renamed = 0;
+	int do_merge = 0, overlapcnt = 0, file_renamed = 0;
 	char *oldpath = NULL, *newpath = NULL;
 	char *tmppath = NULL, *template = NULL, *parent = NULL;
-	FILE *oldfile = NULL, *tmp = NULL;
+	char *apath = NULL, *mergepath = NULL;
+	FILE *oldfile = NULL, *tmpfile = NULL, *afile = NULL, *mergefile = NULL;
+	int outfd;
+	const char *outpath;
 	mode_t mode = GOT_DEFAULT_FILE_MODE;
 
+	/* don't run the diff3 merge on creations/deletions */
+	if (*p->blob != '\0' && p->old != NULL && p->new != NULL) {
+		do_merge = 1;
+		err = open_blob(&apath, &afile, p->blob, repo);
+		if (err)
+			return err;
+	}
+
 	if (asprintf(&oldpath, "%s/%s", got_worktree_get_root_path(worktree),
 	    old) == -1) {
 		err = got_error_from_errno("asprintf");
@@ -607,13 +670,36 @@ apply_patch(struct got_worktree *worktree, struct got_
 		goto done;
 	}
 
-	err = got_opentemp_named(&tmppath, &tmp, template);
+	err = got_opentemp_named(&tmppath, &tmpfile, template);
 	if (err)
 		goto done;
-	err = patch_file(p, oldfile, tmp, &mode);
+	outpath = tmppath;
+	outfd = fileno(tmpfile);
+	err = patch_file(p, afile != NULL ? afile : oldfile, tmpfile, &mode);
 	if (err)
 		goto done;
 
+	if (do_merge) {
+		if (fseeko(afile, 0, SEEK_SET) == -1 ||
+		    fseeko(oldfile, 0, SEEK_SET) == -1 ||
+		    fseeko(tmpfile, 0, SEEK_SET)) {
+			err = got_error_from_errno("fseeko");
+			goto done;
+		}
+
+		err = got_opentemp_named(&mergepath, &mergefile, template);
+		if (err)
+			goto done;
+		outpath = mergepath;
+		outfd = fileno(mergefile);
+
+		err = got_merge_diff3(&overlapcnt, outfd, tmpfile, afile,
+		    oldfile, "old file", "ancestor", "work file",
+		    GOT_DIFF_ALGORITHM_PATIENCE);
+		if (err)
+			goto done;
+	}
+
 	if (nop)
 		goto done;
 
@@ -623,14 +709,14 @@ apply_patch(struct got_worktree *worktree, struct got_
 		goto done;
 	}
 
-	if (fchmod(fileno(tmp), mode) == -1) {
+	if (fchmod(outfd, mode) == -1) {
 		err = got_error_from_errno2("chmod", tmppath);
 		goto done;
 	}
 
-	if (rename(tmppath, newpath) == -1) {
+	if (rename(outpath, newpath) == -1) {
 		if (errno != ENOENT) {
-			err = got_error_from_errno3("rename", tmppath,
+			err = got_error_from_errno3("rename", outpath,
 			    newpath);
 			goto done;
 		}
@@ -641,8 +727,8 @@ apply_patch(struct got_worktree *worktree, struct got_
 		err = got_path_mkdir(parent);
 		if (err != NULL)
 			goto done;
-		if (rename(tmppath, newpath) == -1) {
-			err = got_error_from_errno3("rename", tmppath,
+		if (rename(outpath, newpath) == -1) {
+			err = got_error_from_errno3("rename", outpath,
 			    newpath);
 			goto done;
 		}
@@ -662,20 +748,40 @@ apply_patch(struct got_worktree *worktree, struct got_
 		    fileindex, patch_add, pa);
 		if (err)
 			unlink(newpath);
-	} else
+	} else if (overlapcnt > 0)
+		err = report_progress(pa, old, new, GOT_STATUS_CONFLICT, NULL);
+	else
 		err = report_progress(pa, old, new, GOT_STATUS_MODIFY, NULL);
 
 done:
+	if (err == NULL && overlapcnt != 0)
+		err = got_error(GOT_ERR_PATCH_CONFLICT);
+
 	free(parent);
 	free(template);
+
 	if (tmppath != NULL)
 		unlink(tmppath);
-	if (tmp != NULL && fclose(tmp) == EOF && err == NULL)
+	if (tmpfile != NULL && fclose(tmpfile) == EOF && err == NULL)
 		err = got_error_from_errno("fclose");
 	free(tmppath);
+
 	free(oldpath);
 	if (oldfile != NULL && fclose(oldfile) == EOF && err == NULL)
 		err = got_error_from_errno("fclose");
+
+	if (apath != NULL)
+		unlink(apath);
+	if (afile != NULL && fclose(afile) == EOF && err == NULL)
+		err = got_error_from_errno("fclose");
+	free(apath);
+
+	if (mergepath != NULL)
+		unlink(mergepath);
+	if (mergefile != NULL && fclose(mergefile) == EOF && err == NULL)
+		err = got_error_from_errno("fclose");
+	free(mergepath);
+
 	free(newpath);
 	return err;
 }
blob - 23212abe02328d5837822225fecbf174e677f2bb
file + libexec/got-read-patch/got-read-patch.c
--- libexec/got-read-patch/got-read-patch.c
+++ libexec/got-read-patch/got-read-patch.c
@@ -60,7 +60,7 @@
 struct imsgbuf ibuf;
 
 static const struct got_error *
-send_patch(const char *oldname, const char *newname, int git)
+send_patch(const char *oldname, const char *newname, const char *blob, int git)
 {
 	struct got_imsg_patch p;
 
@@ -72,9 +72,11 @@ send_patch(const char *oldname, const char *newname, i
 	if (newname != NULL)
 		strlcpy(p.new, newname, sizeof(p.new));
 
+	if (blob != NULL)
+		strlcpy(p.blob, blob, sizeof(p.blob));
+
 	p.git = git;
-	if (imsg_compose(&ibuf, GOT_IMSG_PATCH, 0, 0, -1,
-	    &p, sizeof(p)) == -1)
+	if (imsg_compose(&ibuf, GOT_IMSG_PATCH, 0, 0, -1, &p, sizeof(p)) == -1)
 		return got_error_from_errno("imsg_compose GOT_IMSG_PATCH");
 	return NULL;
 }
@@ -127,6 +129,7 @@ find_patch(int *done, FILE *fp)
 {
 	const struct got_error *err = NULL;
 	char	*old = NULL, *new = NULL;
+	char	*blob = NULL;
 	char	*line = NULL;
 	size_t	 linesize = 0;
 	ssize_t	 linelen;
@@ -150,9 +153,12 @@ find_patch(int *done, FILE *fp)
 		} else if (rename && !strncmp(line, "rename to ", 10)) {
 			free(new);
 			err = filename(line + 10, &new);
-		} else if (git && !strncmp(line, "similarity index 100%", 21))
+		} else if (git && !strncmp(line, "similarity index 100%", 21)) {
 			rename = 1;
-		else if (!strncmp(line, "diff --git a/", 13))
+		} else if (!git && !strncmp(line, "blob - ", 7)) {
+			free(blob);
+			err = filename(line + 7, &blob);
+		}else if (!strncmp(line, "diff --git a/", 13))
 			git = 1;
 
 		if (err)
@@ -165,7 +171,7 @@ find_patch(int *done, FILE *fp)
 		 */
 		if (rename && old != NULL && new != NULL) {
 			*done = 1;
-			err = send_patch(old, new, git);
+			err = send_patch(old, new, blob, git);
 			break;
 		}
 
@@ -175,7 +181,7 @@ find_patch(int *done, FILE *fp)
 			    (!create && old == NULL))
 				err = got_error(GOT_ERR_PATCH_MALFORMED);
 			else
-				err = send_patch(old, new, git);
+				err = send_patch(old, new, blob, git);
 
 			if (err)
 				break;
@@ -189,6 +195,7 @@ find_patch(int *done, FILE *fp)
 
 	free(old);
 	free(new);
+	free(blob);
 	free(line);
 	if (ferror(fp) && err == NULL)
 		err = got_error_from_errno("getline");
blob - c1e703a5e36a9140b460df532dc84b36583e5884
file + regress/cmdline/patch.sh
--- regress/cmdline/patch.sh
+++ regress/cmdline/patch.sh
@@ -1442,7 +1442,137 @@ EOF
 	fi
 	test_done $testroot $ret
 }
+test_patch_merge_simple() {
+	local testroot=`test_init patch_merge_simple`
 
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	jot 10 > $testroot/wt/numbers
+	(cd $testroot/wt && got add numbers && got commit -m +numbers) \
+		> /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	jot 10 | sed 's/4/four/g' > $testroot/wt/numbers
+
+	(cd $testroot/wt && got diff > $testroot/old.diff \
+		&& got revert numbers) >/dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	jot 10 | sed 's/6/six/g' > $testroot/wt/numbers
+	(cd $testroot/wt && got commit -m 'edit numbers') \
+		> /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	(cd $testroot/wt && got patch $testroot/old.diff) \
+		2>&1 > /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	jot 10 | sed -e s/4/four/ -e s/6/six/ > $testroot/wt/numbers.expected
+	cmp -s $testroot/wt/numbers $testroot/wt/numbers.expected
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/wt/numbers $testroot/wt/numbers.expected
+	fi
+	test_done $testroot $ret
+}
+
+test_patch_merge_conflict() {
+	local testroot=`test_init patch_merge_conflict`
+
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	jot 10 > $testroot/wt/numbers
+	(cd $testroot/wt && got add numbers && got commit -m +numbers) \
+		> /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	jot 10 | sed 's/6/six/g' > $testroot/wt/numbers
+
+	(cd $testroot/wt && got diff > $testroot/old.diff \
+		&& got revert numbers) >/dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	jot 10 | sed 's/6/3+3/g' > $testroot/wt/numbers
+	(cd $testroot/wt && got commit -m 'edit numbers') \
+		> /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	(cd $testroot/wt && got patch $testroot/old.diff) \
+		>/dev/null 2>&1
+	ret=$?
+	if [ $ret -eq 0 ]; then
+		echo "got patch merged a diff that should conflic" >&2
+		test_done $testroot 0
+		return 1
+	fi
+
+	# XXX: prefixing every line with a tab otherwise got thinks
+	# the file has conflicts in it.
+	cat <<-EOF > $testroot/wt/numbers.expected
+	1
+	2
+	3
+	4
+	5
+	<<<<<<< old file
+	six
+	||||||| ancestor
+	6
+	=======
+	3+3
+	>>>>>>> work file
+	7
+	8
+	9
+	10
+EOF
+
+	cmp -s $testroot/wt/numbers $testroot/wt/numbers.expected
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/wt/numbers $testroot/wt/numbers.expected
+	fi
+	test_done $testroot $ret
+}
+
 test_parseargs "$@"
 run_test test_patch_simple_add_file
 run_test test_patch_simple_rm_file
@@ -1468,3 +1598,5 @@ run_test test_patch_relative_paths
 run_test test_patch_with_path_prefix
 run_test test_patch_relpath_with_path_prefix
 run_test test_patch_reverse
+run_test test_patch_merge_simple
+run_test test_patch_merge_conflict