Download raw body.
use patience diff for some diff3 merges
While using 'got histedit' I have encountered another mis-merge where a change was dropped from history which shouldn't have been dropped. I could reproduce this in 'got cherrypick' as well, by cherry-picking the affected commit from my local 'packcreate' branch to 'main'. This particular problem disappears when diff3 uses the Patience diff algorithm instead of the Myers algorithm to perform the merge. Note that Git's rebase command offers an option to force Patience diff instead of Myers. From the git-rebase(1) man page: [[[ patience With this option, merge-recursive spends a little extra time to avoid mismerges that sometimes occur due to unimportant matching lines (e.g., braces from distinct functions). ]]] Rather than asking the user to configure a diff algorithm I would like to use sane defaults, which my patch below sets as follows: - use Patience diff during cherrypick/backout/histedit/rebase - use Myers diff during update/unstage During cherrypick/backout/rebase/histedit commits may be applied out of order, and generally the 3-way merge base may not 100% match the actual file which both derived versions were based on. Such cases are more likely to result in conflicts and mis-merges. I'd like to rely on patience diff here to hopefully make mis-merges less likely. Note that we already used diff3 with patience diff before, since this commit: https://git.gameoftrees.org/gitweb/?p=got.git;a=commit;h=fe621944e83fe6367f7bff97128b4240a9cdc7c5 And we switched back to Myers here: https://git.gameoftrees.org/gitweb/?p=got.git;a=commit;h=bc62ede807f0ad3a920fa1e8b05dd90cc8f5f289 As noted in the commit message linked above, the advantage of using Myers is that conflict chunks become smaller. Conflicts will typically cover less lines with Myers putting more effort into finding minimal diffs. I would like to keep using Myers in cases where 3-way merges are run with a common ancestor version that is known to be sane. This is the case during 'update' and 'unstage' because these commands apply changes in order and the common ancestor is known to be an actual common ancestor. To illustrate my problematic case, the patch being merged was this one: [[[ --- lib/deltify.c +++ lib/deltify.c @@ -314,8 +314,6 @@ stretchblk(FILE *basefile, struct got_delta_block *blo { uint8_t basebuf[GOT_DELTIFY_MAXCHUNK], buf[GOT_DELTIFY_MAXCHUNK]; size_t base_r, r, i; - off_t orig_blocklen = *blocklen; - off_t pos = ftello(f); int buf_equal = 1; if (fseeko(basefile, block->offset, SEEK_SET) == -1) @@ -343,9 +341,6 @@ stretchblk(FILE *basefile, struct got_delta_block *blo } } - if (fseeko(f, pos + *blocklen - orig_blocklen, SEEK_SET) == -1) - return got_error_from_errno("fseeko"); - return NULL; } @@ -402,6 +397,9 @@ got_deltify(struct got_delta_instruction **deltas, int blocklen); } fileoffset += blocklen; + if (fseeko(f, fileoffset, SEEK_SET) == -1) + return got_error_from_errno("fseeko"); + } if (err) { ]]] The wrong merge result was missing the second chunk which deletes the fseeko() call in stretchblk(): [[[ --- lib/deltify.c +++ lib/deltify.c @@ -312,8 +312,6 @@ stretchblk(FILE *basefile, struct got_delta_block *blo { uint8_t basebuf[GOT_DELTIFY_MAXCHUNK], buf[GOT_DELTIFY_MAXCHUNK]; size_t base_r, r, i; - off_t orig_blocklen = *blocklen; - off_t pos = ftello(f); int buf_equal = 1; if (fseeko(basefile, block->offset, SEEK_SET) == -1) @@ -399,6 +397,9 @@ got_deltify(struct got_delta_instruction **deltas, int blocklen); } fileoffset += blocklen; + if (fseeko(f, fileoffset, SEEK_SET) == -1) + return got_error_from_errno("fseeko"); + } if (err) { ]]] Lines which used the removed 'pos' variable weren't deleted, so this mis-merge resulted in a compilation error and was detected. However, such mis-merges could potentially introducing bugs which go unnoticed. Ideally, I would like to add a test case. But since again the bad behaviour only triggers on actual source code files, building a small test case is difficult. I think it makes sense to change the behaviour and listen to problems people will be reporting. Most of this patch deals with passing the diff algorithm as a parameter so callers of merge_file() can decide which algorithm to use. diff 282f42e5d1095015379b49280429e558b3bbc4fe /home/stsp/src/got blob - b8c328a2f9b853caabcc322afe5c68eefd3b4ccb file + lib/diff3.c --- lib/diff3.c +++ lib/diff3.c @@ -197,7 +197,8 @@ diff_output(BUF *diffbuf, const char *fmt, ...) } static const struct got_error* -diffreg(BUF **d, const char *path1, const char *path2) +diffreg(BUF **d, const char *path1, const char *path2, + enum got_diff_algorithm diff_algo) { const struct got_error *err = NULL; FILE *f1 = NULL, *f2 = NULL, *outfile = NULL; @@ -222,8 +223,7 @@ diffreg(BUF **d, const char *path1, const char *path2) if (err) goto done; - err = got_diffreg(&diffreg_result, f1, f2, - GOT_DIFF_ALGORITHM_MYERS, 0, 1); + err = got_diffreg(&diffreg_result, f1, f2, diff_algo, 0, 1); if (err) goto done; @@ -262,7 +262,8 @@ done: */ const struct got_error * got_merge_diff3(int *overlapcnt, int outfd, FILE *f1, FILE *f2, - FILE *f3, const char *label1, const char *label2, const char *label3) + FILE *f3, const char *label1, const char *label2, const char *label3, + enum got_diff_algorithm diff_algo) { const struct got_error *err = NULL; char *dp13, *dp23, *path1, *path2, *path3; @@ -321,14 +322,14 @@ got_merge_diff3(int *overlapcnt, int outfd, FILE *f1, buf_free(b2); b2 = NULL; - err = diffreg(&d1, path1, path3); + err = diffreg(&d1, path1, path3, diff_algo); if (err) { buf_free(diffb); diffb = NULL; goto out; } - err = diffreg(&d2, path2, path3); + err = diffreg(&d2, path2, path3, diff_algo); if (err) { buf_free(diffb); diffb = NULL; blob - c056426dd1569f33cd5459cba2caaa263be5224d file + lib/got_lib_diff.h --- lib/got_lib_diff.h +++ lib/got_lib_diff.h @@ -63,7 +63,7 @@ const struct got_error *got_diffreg_close(FILE *, char FILE *, char *, size_t); const struct got_error *got_merge_diff3(int *, int, FILE *, FILE *, FILE *, - const char *, const char *, const char *); + const char *, const char *, const char *, enum got_diff_algorithm); const struct got_error *got_diff_files(struct got_diffreg_result **, FILE *, const char *, FILE *, const char *, int, int, int, FILE *); blob - 13cc9fbaca9c08dbb86084c978e908bee7eebe17 file + lib/worktree.c --- lib/worktree.c +++ lib/worktree.c @@ -768,7 +768,7 @@ merge_file(int *local_changes_subsumed, struct got_wor FILE *f_orig, FILE *f_deriv, FILE *f_deriv2, const char *ondisk_path, const char *path, uint16_t st_mode, const char *label_orig, const char *label_deriv, const char *label_deriv2, - struct got_repository *repo, + enum got_diff_algorithm diff_algo, struct got_repository *repo, got_worktree_checkout_cb progress_cb, void *progress_arg) { const struct got_error *err = NULL; @@ -794,7 +794,7 @@ merge_file(int *local_changes_subsumed, struct got_wor goto done; err = got_merge_diff3(&overlapcnt, merged_fd, f_deriv, f_orig, - f_deriv2, label_deriv, label_orig, label_deriv2); + f_deriv2, label_deriv, label_orig, label_deriv2, diff_algo); if (err) goto done; @@ -1179,7 +1179,7 @@ merge_blob(int *local_changes_subsumed, struct got_wor err = merge_file(local_changes_subsumed, worktree, f_orig, f_deriv, f_deriv2, ondisk_path, path, st_mode, label_orig, label_deriv, - NULL, repo, progress_cb, progress_arg); + NULL, GOT_DIFF_ALGORITHM_MYERS, repo, progress_cb, progress_arg); done: if (f_orig && fclose(f_orig) == EOF && err == NULL) err = got_error_from_errno("fclose"); @@ -2919,7 +2919,8 @@ merge_file_cb(void *arg, struct got_blob_object *blob1 err = merge_file(&local_changes_subsumed, a->worktree, f_orig, f_deriv, f_deriv2, ondisk_path, path2, sb.st_mode, a->label_orig, NULL, label_deriv2, - repo, a->progress_cb, a->progress_arg); + GOT_DIFF_ALGORITHM_PATIENCE, repo, + a->progress_cb, a->progress_arg); } } else if (blob1) { ie = got_fileindex_entry_get(a->fileindex, path1, @@ -7872,7 +7873,7 @@ unstage_hunks(struct got_object_id *staged_blob_id, err = merge_file(&local_changes_subsumed, worktree, f_base, f, f_deriv2, ondisk_path, ie->path, got_fileindex_perms_to_st(ie), - label_orig, "unstaged", NULL, + label_orig, "unstaged", NULL, GOT_DIFF_ALGORITHM_MYERS, repo, progress_cb, progress_arg); } if (err)
use patience diff for some diff3 merges