From: Mark Jamsek Subject: Re: Weird behaviour when staging binary files interactively To: gameoftrees@openbsd.org Cc: Johannes Thyssen Tishman Date: Mon, 04 Nov 2024 17:02:08 +1100 Mark Jamsek wrote: > Johannes Thyssen Tishman wrote: > > I bumped into a few weird behaviours when staging modified binary files > > interactively. Here is my best effort to summarize them. All commands > > below build upon each other (i.e. the last command in this email depends > > on all previously executed commands). > > Thanks for the thorough report (and sorry for the late reply)! > > I've reproduced this with your recipe and can confirm the unexpected > behaviour. It's quite late here now, but I'll look into this and cook > a fix tomorrow (or rather later today) after zzz. I just wanted to > acknowledge your report and thank you for the repro as I saw your email > the other day but only had the chance to reproduce it this evening :) The below diff fixes the unexpected `got stage -p $binary` behaviour reported by Johannes. It also fixes the unstage -p and revert -p cases that produced the same behaviour. Rather than iterate the hunks, we present the standard "Binary files $f and $f differ" with the same y/n/q prompt. If n or q is entered, the stage/unstage/revert of the binary file does not happen; if y is entered, the entire binary file is (un)staged or reverted. I went for a minimally invasive fix rather than change the UI. I think it makes sense to keep the same prompt and use the same "binary files ... differ" prompt for consistency. Regress is happy but I still haven't had time to expand it. The diff contains a basic `stage -p` test that I used while cooking the fix, but I want to expand on this a fair bit later tonight with plaintext and binary files in the changeset, y/n responses, and tests for unstage and revert. This also fixes the diffstat insofar as it shows "1 file changed ..." instead of 0, but it still shows "0 insertions/deletions" which we do for binary files unless the "-a" flag to force ascii is used, in which case the insertions/deletions are computed. I don't know if we want to change this to always force ascii so they're unconditionally shown for binary files; I lean to no but I'm not married to that position. M lib/worktree.c | 99+ 26- M regress/cmdline/stage.sh | 54+ 0- 2 files changed, 153 insertions(+), 26 deletions(-) commit - e37a5589c16266235a9b0d3b6d7be7ec67b46390 commit + 8ec072dd0806ad244a653332caf555a05d5f3b27 blob - bb9125ced48b15709c711e4a7f24e2e35e0a2c0d blob + 3a2102bcc91836d05bac9811eb348becde3340d2 --- lib/worktree.c +++ lib/worktree.c @@ -4811,6 +4811,43 @@ copy_remaining_content(FILE *f1, FILE *f2, int *line_c } static const struct got_error * +accept_or_reject_binary_change(int *choice, const char *path, + got_worktree_patch_cb patch_cb, void *patch_arg) +{ + const struct got_error *err; + FILE *f; + + *choice = GOT_PATCH_CHOICE_NONE; + + f = got_opentemp(); + if (f == NULL) + return got_error_from_errno("got_opentemp"); + + if (fprintf(f, "Binary files %s and %s differ\n", path, path) < 0) { + err = got_error_msg(GOT_ERR_IO, "fprintf"); + goto done; + } + if (fseeko(f, 0L, SEEK_SET) == -1) { + err = got_error_from_errno("fseeko"); + goto done; + } + + err = (*patch_cb)(choice, patch_arg, GOT_STATUS_MODIFY, path, f, 1, 1); + +done: + if (f != NULL && fclose(f) == EOF && err == NULL) + err = got_error_from_errno("fclose"); + return err; +} + +static int +diff_result_has_binary(struct diff_result *r) +{ + return (r->left->atomizer_flags | r->right->atomizer_flags) & + DIFF_ATOMIZER_FOUND_BINARY_DATA; +} + +static const struct got_error * apply_or_reject_change(int *choice, int *nchunks_used, struct diff_result *diff_result, int n, const char *relpath, FILE *f1, FILE *f2, int *line_cur1, int *line_cur2, @@ -4905,8 +4942,8 @@ struct revert_file_args { }; static const struct got_error * -create_patched_content(char **path_outfile, int reverse_patch, - struct got_object_id *blob_id, const char *path2, +create_patched_content(char **path_outfile, int *confirm_binary_change, + int reverse_patch, struct got_object_id *blob_id, const char *path2, int dirfd2, const char *de_name2, const char *relpath, struct got_repository *repo, got_worktree_patch_cb patch_cb, void *patch_arg) @@ -4920,10 +4957,11 @@ create_patched_content(char **path_outfile, int revers char *path1 = NULL, *id_str = NULL; struct stat sb2; struct got_diffreg_result *diffreg_result = NULL; - int line_cur1 = 1, line_cur2 = 1, have_content = 0; + int choice, line_cur1 = 1, line_cur2 = 1, have_content = 0; int i = 0, n = 0, nchunks_used = 0, nchanges = 0; *path_outfile = NULL; + *confirm_binary_change = 0; err = got_object_id_str(&id_str, blob_id); if (err) @@ -5015,6 +5053,14 @@ create_patched_content(char **path_outfile, int revers if (err) goto done; + if (diff_result_has_binary(diffreg_result->result)) { + err = accept_or_reject_binary_change(&choice, relpath, + patch_cb, patch_arg); + if (err == NULL && choice == GOT_PATCH_CHOICE_YES) + *confirm_binary_change = 1; + goto done; + } + err = got_opentemp_named(path_outfile, &outfile, "got-patched-content", ""); if (err) @@ -5033,7 +5079,6 @@ create_patched_content(char **path_outfile, int revers nchanges++; } for (n = 0; n < diffreg_result->result->chunks.len; n += nchunks_used) { - int choice; err = apply_or_reject_change(&choice, &nchunks_used, diffreg_result->result, n, relpath, f1, f2, &line_cur1, &line_cur2, @@ -5273,11 +5318,23 @@ revert_file(void *arg, unsigned char status, unsigned if (a->patch_cb && (status == GOT_STATUS_MODIFY || status == GOT_STATUS_CONFLICT)) { - int is_bad_symlink = 0; - err = create_patched_content(&path_content, 1, &id, - ondisk_path, dirfd, de_name, ie->path, a->repo, + int is_bad_symlink = 0, revert_binary_file = 0; + + err = create_patched_content(&path_content, + &revert_binary_file, 1, &id, ondisk_path, dirfd, + de_name, ie->path, a->repo, a->patch_cb, a->patch_arg); - if (err || path_content == NULL) + if (err != NULL) + goto done; + if (revert_binary_file){ + err = install_blob(a->worktree, ondisk_path, + ie->path, + te ? te->mode : GOT_DEFAULT_FILE_MODE, + got_fileindex_perms_to_st(ie), blob, + 0, 1, 0, 0, NULL, a->repo, + a->progress_cb, a->progress_arg); + } + if (path_content == NULL) break; if (te && S_ISLNK(te->mode)) { if (unlink(path_content) == -1) { @@ -9236,11 +9293,15 @@ stage_path(void *arg, unsigned char status, if (choice != GOT_PATCH_CHOICE_YES) break; } else { - err = create_patched_content(&path_content, 0, + int stage_binary_file = 0; + + err = create_patched_content(&path_content, + &stage_binary_file, 0, staged_blob_id ? staged_blob_id : blob_id, ondisk_path, dirfd, de_name, ie->path, a->repo, a->patch_cb, a->patch_arg); - if (err || path_content == NULL) + if (!stage_binary_file && + (err || path_content == NULL)) break; } } @@ -9449,9 +9510,9 @@ struct unstage_path_arg { static const struct got_error * create_unstaged_content(char **path_unstaged_content, - char **path_new_staged_content, struct got_object_id *blob_id, - struct got_object_id *staged_blob_id, const char *relpath, - struct got_repository *repo, + char **path_new_staged_content, int *confirm_binary_change, + struct got_object_id *blob_id, struct got_object_id *staged_blob_id, + const char *relpath, struct got_repository *repo, got_worktree_patch_cb patch_cb, void *patch_arg) { const struct got_error *err, *free_err; @@ -9459,10 +9520,11 @@ create_unstaged_content(char **path_unstaged_content, FILE *f1 = NULL, *f2 = NULL, *outfile = NULL, *rejectfile = NULL; char *path1 = NULL, *path2 = NULL, *label1 = NULL; struct got_diffreg_result *diffreg_result = NULL; - int line_cur1 = 1, line_cur2 = 1, n = 0, nchunks_used = 0; + int choice, line_cur1 = 1, line_cur2 = 1, n = 0, nchunks_used = 0; int have_content = 0, have_rejected_content = 0, i = 0, nchanges = 0; int fd1 = -1, fd2 = -1; + *confirm_binary_change = 0; *path_unstaged_content = NULL; *path_new_staged_content = NULL; @@ -9511,6 +9573,14 @@ create_unstaged_content(char **path_unstaged_content, if (err) goto done; + if (diff_result_has_binary(diffreg_result->result)) { + err = accept_or_reject_binary_change(&choice, relpath, + patch_cb, patch_arg); + if (err == NULL && choice == GOT_PATCH_CHOICE_YES) + *confirm_binary_change = 1; + goto done; + } + err = got_opentemp_named(path_unstaged_content, &outfile, "got-unstaged-content", ""); if (err) @@ -9536,7 +9606,6 @@ create_unstaged_content(char **path_unstaged_content, nchanges++; } for (n = 0; n < diffreg_result->result->chunks.len; n += nchunks_used) { - int choice; err = apply_or_reject_change(&choice, &nchunks_used, diffreg_result->result, n, relpath, f1, f2, &line_cur1, &line_cur2, @@ -9600,11 +9669,11 @@ done: } static const struct got_error * -unstage_hunks(struct got_object_id *staged_blob_id, - struct got_blob_object *blob_base, - struct got_object_id *blob_id, struct got_fileindex_entry *ie, - const char *ondisk_path, const char *label_orig, - struct got_worktree *worktree, struct got_repository *repo, +unstage_hunks(int *confirm_binary_change, struct got_object_id *staged_blob_id, + struct got_blob_object *blob_base, struct got_object_id *blob_id, + struct got_fileindex_entry *ie, const char *ondisk_path, + const char *label_orig, struct got_worktree *worktree, + struct got_repository *repo, got_worktree_patch_cb patch_cb, void *patch_arg, got_worktree_checkout_cb progress_cb, void *progress_arg) { @@ -9618,8 +9687,8 @@ unstage_hunks(struct got_object_id *staged_blob_id, struct stat sb; err = create_unstaged_content(&path_unstaged_content, - &path_new_staged_content, blob_id, staged_blob_id, - ie->path, repo, patch_cb, patch_arg); + &path_new_staged_content, confirm_binary_change, + blob_id, staged_blob_id, ie->path, repo, patch_cb, patch_arg); if (err) return err; @@ -9823,12 +9892,16 @@ unstage_path(void *arg, unsigned char status, if (choice != GOT_PATCH_CHOICE_YES) break; } else { - err = unstage_hunks(staged_blob_id, - blob_base, blob_id, ie, ondisk_path, - label_orig, a->worktree, a->repo, + int unstage_binary_file = 0; + + err = unstage_hunks(&unstage_binary_file, + staged_blob_id, blob_base, blob_id, + ie, ondisk_path, label_orig, + a->worktree, a->repo, a->patch_cb, a->patch_arg, a->progress_cb, a->progress_arg); - break; /* Done with this file. */ + if (!unstage_binary_file) + break; /* Done with this file. */ } } err = got_object_open_as_blob(&blob_staged, a->repo, blob - bb8f0dfe722dc734a56ccff46a57c1cf44e3005b blob + dff4de020d2b0dfb2a76cc01215bd3dd68fa0b7d --- regress/cmdline/stage.sh +++ regress/cmdline/stage.sh @@ -3041,6 +3041,59 @@ EOF test_done "$testroot" "0" } +test_stage_patch_binary() { + local testroot=$(test_init stage_patch_binary) + + dd if=/dev/urandom of=$testroot/repo/binary bs=1024 count=16 \ + > /dev/null 2>&1 + git -C $testroot/repo add binary + git_commit $testroot/repo -m "add binary file" + local id=$(git_show_head $testroot/repo) + + got checkout $testroot/repo $testroot/wt > /dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done "$testroot" "$ret" + return 1 + fi + + ed -s $testroot/wt/binary <<-EOF + 2m8 + 7m16 + 5m24 + 22m32 + w + EOF + + printf "y\n" > $testroot/patchscript + (cd $testroot/wt && got stage -F $testroot/patchscript -p binary \ + > $testroot/stdout 2> $testroot/stderr) + ret=$? + if [ $ret -ne 0 ]; then + echo "got stage command failed unexpectedly" >&2 + test_done "$testroot" "1" + return 1 + fi + + cat > $testroot/stdout.expected < GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68