From: Omar Polo Subject: got patch: merge patches with diff3 To: gameoftrees@openbsd.org Date: Thu, 16 Jun 2022 00:35:31 +0200 Turns out that plain diffs are not without their own shortcomings. For example, when multiple people are working on the same codebase often happens that changes done by one person conflicts with changes done in parallel by other people. (I know this well since i'm making an habit of breaking stsp@ diffs before he can even send them to the mailing list ;-) Plain patches also have other issues and can produce wrong results in some circumstances. Long story short, a patch implementation can guarantee to produce correct results only when the patch is applied exactly to the same version of the file that the patch was generated from. (The CAVEATS and BUGS section of patch(1) gives good hints.) Fortunately, in the scope of a version control system we can do better :) `got diff' output includes some metadata about the blob id from which the diff was generated. The blob id indicates the exact version of the file, so by using that as input of the patch machinery instead of the current working revision we can guarantee a correct result. Then we can reuse the same mechanism used by 'got cherrypick' and 'got rebase' to merge the files: diff3. We have the file indicated by the blob id in the patch text as the ancestor, the patched file as one of the derivative and the current working revision as other derivative. Pragmatically speaking, this reduces the times 'got patch' fails to apply a diff turning many patch-incompatibilities either into a success or a diff3 conflict. QoL stuff. (diff3 conflicts are also more correct than rej files since they should correctly track the piece of the file with the conflict.) Diff below is a first and simple implementation of this. It modifies got-read-patch to recognize 'got diff' blob ids and the patch machinery to use the file indicated by the blob id as base, and then running the diff3 merge. There are various things that can be improved/added on top of this, but I think they can be done later in-tree. (I'm not trying to get this in the 0.71 release by the way.) Some possible improvements are: - more test coverage: the added tests are very basic; - requiring a perfect application (i.e. not even 'hunk applied with offset') when patching the blob; - requiring that a previous revision of the file effectively had associated that blob id (i.e. not trusting the blob id metadata) - extracting the same metadata from the output of git diff; - inspecting the file history: at least in theory, we should be able to apply a diff even after a file has been renamed; also, by doing a recursive merge we may be able to reduce the conflicts further maybe (i need to investigate on this) - providing a flag to use a specified commit tree as base when applying the diff and then merging the result with the current version of the files: this should allow to easily apply diffs generated by other means without conflicts; - we could even try to take the previous point to the extreme and traversing the history until we find a point where the patch applies cleanly. probably not useful if not in extreme cases, but it's a fun thought experiment at least. diff 1b09451fde509822695bf0c397fabe72e806d0bf /home/op/w/got-pd3 blob - 8ae16da40c07f4dcb3133f9ff08328a10605722b file + include/got_error.h --- include/got_error.h +++ include/got_error.h @@ -168,6 +168,7 @@ #define GOT_ERR_HUNK_FAILED 150 #define GOT_ERR_PATCH_FAILED 151 #define GOT_ERR_FILEIDX_DUP_ENTRY 152 +#define GOT_ERR_PATCH_CONFLICT 153 struct got_error { int code; blob - 29541c0234b7f96a4638157fdd9017888571a76c file + lib/error.c --- lib/error.c +++ lib/error.c @@ -216,6 +216,7 @@ static const struct got_error got_errors[] = { { GOT_ERR_HUNK_FAILED, "hunk failed to apply" }, { GOT_ERR_PATCH_FAILED, "patch failed to apply" }, { GOT_ERR_FILEIDX_DUP_ENTRY, "duplicate file index entry" }, + { GOT_ERR_PATCH_CONFLICT, "got conflict applying the patch" }, }; static struct got_custom_error { blob - af9557d85c3c38603367d54310b5f2a59d7b3b11 file + lib/got_lib_privsep.h --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -613,6 +613,7 @@ struct got_imsg_patch { int git; char old[PATH_MAX]; char new[PATH_MAX]; + char blob[41]; }; /* blob - 265446beb14fbf9c793d2c81f1c81d92f1d05fe3 file + lib/patch.c --- lib/patch.c +++ lib/patch.c @@ -42,10 +42,12 @@ #include "got_reference.h" #include "got_cancel.h" #include "got_worktree.h" +#include "got_repository.h" #include "got_opentemp.h" #include "got_patch.h" #include "got_lib_delta.h" +#include "got_lib_diff.h" #include "got_lib_object.h" #include "got_lib_privsep.h" @@ -70,6 +72,7 @@ STAILQ_HEAD(got_patch_hunk_head, got_patch_hunk); struct got_patch { char *old; char *new; + char blob[41]; struct got_patch_hunk_head head; }; @@ -180,11 +183,14 @@ recv_patch(struct imsgbuf *ibuf, int *done, struct got memcpy(&patch, imsg.data, sizeof(patch)); if (patch.old[sizeof(patch.old)-1] != '\0' || - patch.new[sizeof(patch.new)-1] != '\0') { + patch.new[sizeof(patch.new)-1] != '\0' || + patch.blob[sizeof(patch.blob)-1] != '\0') { err = got_error(GOT_ERR_PRIVSEP_LEN); goto done; } + strlcpy(p->blob, patch.blob, sizeof(p->blob)); + /* automatically set strip=1 for git-style diffs */ if (strip == -1 && patch.git && (*patch.old == '\0' || !strncmp(patch.old, "a/", 2)) && @@ -570,18 +576,75 @@ patch_add(void *arg, unsigned char status, const char } static const struct got_error * +open_blob(char **path, FILE **fp, const char *blobid, + struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_object_id *id = NULL; + struct got_blob_object *blob = NULL; + + *fp = NULL; + *path = NULL; + + err = got_repo_match_object_id(&id, NULL, blobid, + GOT_OBJ_TYPE_BLOB, NULL /* do not resolve tags */, + repo); + if (err) + return err; + + err = got_object_open_as_blob(&blob, repo, id, 8192); + if (err) + goto done; + + err = got_opentemp_named(path, fp, GOT_TMPDIR_STR "/got-patch-blob"); + if (err) + goto done; + + err = got_object_blob_dump_to_file(NULL, NULL, NULL, *fp, blob); + if (err) + goto done; + +done: + if (id) + free(id); + if (blob) + got_object_blob_close(blob); + if (err) { + if (*fp != NULL) + fclose(*fp); + if (*path != NULL) + unlink(*path); + free(*path); + *fp = NULL; + *path = NULL; + } + return err; +} + +static const struct got_error * apply_patch(struct got_worktree *worktree, struct got_repository *repo, struct got_fileindex *fileindex, const char *old, const char *new, struct got_patch *p, int nop, struct patch_args *pa, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err = NULL; - int file_renamed = 0; + int do_merge = 0, overlapcnt = 0, file_renamed = 0; char *oldpath = NULL, *newpath = NULL; char *tmppath = NULL, *template = NULL, *parent = NULL; - FILE *oldfile = NULL, *tmp = NULL; + char *apath = NULL, *mergepath = NULL; + FILE *oldfile = NULL, *tmpfile = NULL, *afile = NULL, *mergefile = NULL; + int outfd; + const char *outpath; mode_t mode = GOT_DEFAULT_FILE_MODE; + /* don't run the diff3 merge on creations/deletions */ + if (*p->blob != '\0' && p->old != NULL && p->new != NULL) { + do_merge = 1; + err = open_blob(&apath, &afile, p->blob, repo); + if (err) + return err; + } + if (asprintf(&oldpath, "%s/%s", got_worktree_get_root_path(worktree), old) == -1) { err = got_error_from_errno("asprintf"); @@ -607,13 +670,36 @@ apply_patch(struct got_worktree *worktree, struct got_ goto done; } - err = got_opentemp_named(&tmppath, &tmp, template); + err = got_opentemp_named(&tmppath, &tmpfile, template); if (err) goto done; - err = patch_file(p, oldfile, tmp, &mode); + outpath = tmppath; + outfd = fileno(tmpfile); + err = patch_file(p, afile != NULL ? afile : oldfile, tmpfile, &mode); if (err) goto done; + if (do_merge) { + if (fseeko(afile, 0, SEEK_SET) == -1 || + fseeko(oldfile, 0, SEEK_SET) == -1 || + fseeko(tmpfile, 0, SEEK_SET)) { + err = got_error_from_errno("fseeko"); + goto done; + } + + err = got_opentemp_named(&mergepath, &mergefile, template); + if (err) + goto done; + outpath = mergepath; + outfd = fileno(mergefile); + + err = got_merge_diff3(&overlapcnt, outfd, tmpfile, afile, + oldfile, "old file", "ancestor", "work file", + GOT_DIFF_ALGORITHM_PATIENCE); + if (err) + goto done; + } + if (nop) goto done; @@ -623,14 +709,14 @@ apply_patch(struct got_worktree *worktree, struct got_ goto done; } - if (fchmod(fileno(tmp), mode) == -1) { + if (fchmod(outfd, mode) == -1) { err = got_error_from_errno2("chmod", tmppath); goto done; } - if (rename(tmppath, newpath) == -1) { + if (rename(outpath, newpath) == -1) { if (errno != ENOENT) { - err = got_error_from_errno3("rename", tmppath, + err = got_error_from_errno3("rename", outpath, newpath); goto done; } @@ -641,8 +727,8 @@ apply_patch(struct got_worktree *worktree, struct got_ err = got_path_mkdir(parent); if (err != NULL) goto done; - if (rename(tmppath, newpath) == -1) { - err = got_error_from_errno3("rename", tmppath, + if (rename(outpath, newpath) == -1) { + err = got_error_from_errno3("rename", outpath, newpath); goto done; } @@ -662,20 +748,40 @@ apply_patch(struct got_worktree *worktree, struct got_ fileindex, patch_add, pa); if (err) unlink(newpath); - } else + } else if (overlapcnt > 0) + err = report_progress(pa, old, new, GOT_STATUS_CONFLICT, NULL); + else err = report_progress(pa, old, new, GOT_STATUS_MODIFY, NULL); done: + if (err == NULL && overlapcnt != 0) + err = got_error(GOT_ERR_PATCH_CONFLICT); + free(parent); free(template); + if (tmppath != NULL) unlink(tmppath); - if (tmp != NULL && fclose(tmp) == EOF && err == NULL) + if (tmpfile != NULL && fclose(tmpfile) == EOF && err == NULL) err = got_error_from_errno("fclose"); free(tmppath); + free(oldpath); if (oldfile != NULL && fclose(oldfile) == EOF && err == NULL) err = got_error_from_errno("fclose"); + + if (apath != NULL) + unlink(apath); + if (afile != NULL && fclose(afile) == EOF && err == NULL) + err = got_error_from_errno("fclose"); + free(apath); + + if (mergepath != NULL) + unlink(mergepath); + if (mergefile != NULL && fclose(mergefile) == EOF && err == NULL) + err = got_error_from_errno("fclose"); + free(mergepath); + free(newpath); return err; } blob - 23212abe02328d5837822225fecbf174e677f2bb file + libexec/got-read-patch/got-read-patch.c --- libexec/got-read-patch/got-read-patch.c +++ libexec/got-read-patch/got-read-patch.c @@ -60,7 +60,7 @@ struct imsgbuf ibuf; static const struct got_error * -send_patch(const char *oldname, const char *newname, int git) +send_patch(const char *oldname, const char *newname, const char *blob, int git) { struct got_imsg_patch p; @@ -72,9 +72,11 @@ send_patch(const char *oldname, const char *newname, i if (newname != NULL) strlcpy(p.new, newname, sizeof(p.new)); + if (blob != NULL) + strlcpy(p.blob, blob, sizeof(p.blob)); + p.git = git; - if (imsg_compose(&ibuf, GOT_IMSG_PATCH, 0, 0, -1, - &p, sizeof(p)) == -1) + if (imsg_compose(&ibuf, GOT_IMSG_PATCH, 0, 0, -1, &p, sizeof(p)) == -1) return got_error_from_errno("imsg_compose GOT_IMSG_PATCH"); return NULL; } @@ -127,6 +129,7 @@ find_patch(int *done, FILE *fp) { const struct got_error *err = NULL; char *old = NULL, *new = NULL; + char *blob = NULL; char *line = NULL; size_t linesize = 0; ssize_t linelen; @@ -150,9 +153,12 @@ find_patch(int *done, FILE *fp) } else if (rename && !strncmp(line, "rename to ", 10)) { free(new); err = filename(line + 10, &new); - } else if (git && !strncmp(line, "similarity index 100%", 21)) + } else if (git && !strncmp(line, "similarity index 100%", 21)) { rename = 1; - else if (!strncmp(line, "diff --git a/", 13)) + } else if (!git && !strncmp(line, "blob - ", 7)) { + free(blob); + err = filename(line + 7, &blob); + }else if (!strncmp(line, "diff --git a/", 13)) git = 1; if (err) @@ -165,7 +171,7 @@ find_patch(int *done, FILE *fp) */ if (rename && old != NULL && new != NULL) { *done = 1; - err = send_patch(old, new, git); + err = send_patch(old, new, blob, git); break; } @@ -175,7 +181,7 @@ find_patch(int *done, FILE *fp) (!create && old == NULL)) err = got_error(GOT_ERR_PATCH_MALFORMED); else - err = send_patch(old, new, git); + err = send_patch(old, new, blob, git); if (err) break; @@ -189,6 +195,7 @@ find_patch(int *done, FILE *fp) free(old); free(new); + free(blob); free(line); if (ferror(fp) && err == NULL) err = got_error_from_errno("getline"); blob - c1e703a5e36a9140b460df532dc84b36583e5884 file + regress/cmdline/patch.sh --- regress/cmdline/patch.sh +++ regress/cmdline/patch.sh @@ -1442,7 +1442,137 @@ EOF fi test_done $testroot $ret } +test_patch_merge_simple() { + local testroot=`test_init patch_merge_simple` + got checkout $testroot/repo $testroot/wt > /dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + jot 10 > $testroot/wt/numbers + (cd $testroot/wt && got add numbers && got commit -m +numbers) \ + > /dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + jot 10 | sed 's/4/four/g' > $testroot/wt/numbers + + (cd $testroot/wt && got diff > $testroot/old.diff \ + && got revert numbers) >/dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + jot 10 | sed 's/6/six/g' > $testroot/wt/numbers + (cd $testroot/wt && got commit -m 'edit numbers') \ + > /dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + (cd $testroot/wt && got patch $testroot/old.diff) \ + 2>&1 > /dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + jot 10 | sed -e s/4/four/ -e s/6/six/ > $testroot/wt/numbers.expected + cmp -s $testroot/wt/numbers $testroot/wt/numbers.expected + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/wt/numbers $testroot/wt/numbers.expected + fi + test_done $testroot $ret +} + +test_patch_merge_conflict() { + local testroot=`test_init patch_merge_conflict` + + got checkout $testroot/repo $testroot/wt > /dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + jot 10 > $testroot/wt/numbers + (cd $testroot/wt && got add numbers && got commit -m +numbers) \ + > /dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + jot 10 | sed 's/6/six/g' > $testroot/wt/numbers + + (cd $testroot/wt && got diff > $testroot/old.diff \ + && got revert numbers) >/dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + jot 10 | sed 's/6/3+3/g' > $testroot/wt/numbers + (cd $testroot/wt && got commit -m 'edit numbers') \ + > /dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + (cd $testroot/wt && got patch $testroot/old.diff) \ + >/dev/null 2>&1 + ret=$? + if [ $ret -eq 0 ]; then + echo "got patch merged a diff that should conflic" >&2 + test_done $testroot 0 + return 1 + fi + + # XXX: prefixing every line with a tab otherwise got thinks + # the file has conflicts in it. + cat <<-EOF > $testroot/wt/numbers.expected + 1 + 2 + 3 + 4 + 5 + <<<<<<< old file + six + ||||||| ancestor + 6 + ======= + 3+3 + >>>>>>> work file + 7 + 8 + 9 + 10 +EOF + + cmp -s $testroot/wt/numbers $testroot/wt/numbers.expected + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/wt/numbers $testroot/wt/numbers.expected + fi + test_done $testroot $ret +} + test_parseargs "$@" run_test test_patch_simple_add_file run_test test_patch_simple_rm_file @@ -1468,3 +1598,5 @@ run_test test_patch_relative_paths run_test test_patch_with_path_prefix run_test test_patch_relpath_with_path_prefix run_test test_patch_reverse +run_test test_patch_merge_simple +run_test test_patch_merge_conflict