"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Mark Jamsek <mark@jamsek.com>
Subject:
Re: Weird behaviour when staging binary files interactively
To:
gameoftrees@openbsd.org
Cc:
Johannes Thyssen Tishman <johannes@thyssentishman.com>
Date:
Mon, 04 Nov 2024 17:02:08 +1100

Download raw body.

Thread
Mark Jamsek <mark@jamsek.com> wrote:
> Johannes Thyssen Tishman <johannes@thyssentishman.com> wrote:
> > I bumped into a few weird behaviours when staging modified binary files
> > interactively. Here is my best effort to summarize them. All commands
> > below build upon each other (i.e. the last command in this email depends
> > on all previously executed commands).
> 
> Thanks for the thorough report (and sorry for the late reply)!
> 
> I've reproduced this with your recipe and can confirm the unexpected
> behaviour. It's quite late here now, but I'll look into this and cook
> a fix tomorrow (or rather later today) after zzz. I just wanted to
> acknowledge your report and thank you for the repro as I saw your email
> the other day but only had the chance to reproduce it this evening :)

The below diff fixes the unexpected `got stage -p $binary` behaviour
reported by Johannes. It also fixes the unstage -p and revert -p cases
that produced the same behaviour. Rather than iterate the hunks, we
present the standard "Binary files $f and $f differ" with the same y/n/q
prompt. If n or q is entered, the stage/unstage/revert of the binary
file does not happen; if y is entered, the entire binary file is
(un)staged or reverted.

I went for a minimally invasive fix rather than change the UI. I think
it makes sense to keep the same prompt and use the same "binary files
... differ" prompt for consistency. Regress is happy but I still haven't
had time to expand it. The diff contains a basic `stage -p` test that I
used while cooking the fix, but I want to expand on this a fair bit
later tonight with plaintext and binary files in the changeset, y/n
responses, and tests for unstage and revert.

This also fixes the diffstat insofar as it shows "1 file changed ..."
instead of 0, but it still shows "0 insertions/deletions" which we do
for binary files unless the "-a" flag to force ascii is used, in which
case the insertions/deletions are computed. I don't know if we want to
change this to always force ascii so they're unconditionally shown for
binary files; I lean to no but I'm not married to that position.


M  lib/worktree.c            |  99+  26-
M  regress/cmdline/stage.sh  |  54+   0-

2 files changed, 153 insertions(+), 26 deletions(-)

commit - e37a5589c16266235a9b0d3b6d7be7ec67b46390
commit + 8ec072dd0806ad244a653332caf555a05d5f3b27
blob - bb9125ced48b15709c711e4a7f24e2e35e0a2c0d
blob + 3a2102bcc91836d05bac9811eb348becde3340d2
--- lib/worktree.c
+++ lib/worktree.c
@@ -4811,6 +4811,43 @@ copy_remaining_content(FILE *f1, FILE *f2, int *line_c
 }
 
 static const struct got_error *
+accept_or_reject_binary_change(int *choice, const char *path,
+    got_worktree_patch_cb patch_cb, void *patch_arg)
+{
+	const struct got_error	*err;
+	FILE			*f;
+
+	*choice = GOT_PATCH_CHOICE_NONE;
+
+	f = got_opentemp();
+	if (f == NULL)
+		return got_error_from_errno("got_opentemp");
+
+	if (fprintf(f, "Binary files %s and %s differ\n", path, path) < 0) {
+		err = got_error_msg(GOT_ERR_IO, "fprintf");
+		goto done;
+	}
+	if (fseeko(f, 0L, SEEK_SET) == -1) {
+		err = got_error_from_errno("fseeko");
+		goto done;
+	}
+
+	err = (*patch_cb)(choice, patch_arg, GOT_STATUS_MODIFY, path, f, 1, 1);
+
+done:
+	if (f != NULL && fclose(f) == EOF && err == NULL)
+		err = got_error_from_errno("fclose");
+	return err;
+}
+
+static int
+diff_result_has_binary(struct diff_result *r)
+{
+	return (r->left->atomizer_flags | r->right->atomizer_flags) &
+	    DIFF_ATOMIZER_FOUND_BINARY_DATA;
+}
+
+static const struct got_error *
 apply_or_reject_change(int *choice, int *nchunks_used,
     struct diff_result *diff_result, int n,
     const char *relpath, FILE *f1, FILE *f2, int *line_cur1, int *line_cur2,
@@ -4905,8 +4942,8 @@ struct revert_file_args {
 };
 
 static const struct got_error *
-create_patched_content(char **path_outfile, int reverse_patch,
-    struct got_object_id *blob_id, const char *path2,
+create_patched_content(char **path_outfile, int *confirm_binary_change,
+    int reverse_patch, struct got_object_id *blob_id, const char *path2,
     int dirfd2, const char *de_name2,
     const char *relpath, struct got_repository *repo,
     got_worktree_patch_cb patch_cb, void *patch_arg)
@@ -4920,10 +4957,11 @@ create_patched_content(char **path_outfile, int revers
 	char *path1 = NULL, *id_str = NULL;
 	struct stat sb2;
 	struct got_diffreg_result *diffreg_result = NULL;
-	int line_cur1 = 1, line_cur2 = 1, have_content = 0;
+	int choice, line_cur1 = 1, line_cur2 = 1, have_content = 0;
 	int i = 0, n = 0, nchunks_used = 0, nchanges = 0;
 
 	*path_outfile = NULL;
+	*confirm_binary_change = 0;
 
 	err = got_object_id_str(&id_str, blob_id);
 	if (err)
@@ -5015,6 +5053,14 @@ create_patched_content(char **path_outfile, int revers
 	if (err)
 		goto done;
 
+	if (diff_result_has_binary(diffreg_result->result)) {
+		err = accept_or_reject_binary_change(&choice, relpath,
+		    patch_cb, patch_arg);
+		if (err == NULL && choice == GOT_PATCH_CHOICE_YES)
+			*confirm_binary_change = 1;
+		goto done;
+	}
+
 	err = got_opentemp_named(path_outfile, &outfile, "got-patched-content",
 	    "");
 	if (err)
@@ -5033,7 +5079,6 @@ create_patched_content(char **path_outfile, int revers
 		nchanges++;
 	}
 	for (n = 0; n < diffreg_result->result->chunks.len; n += nchunks_used) {
-		int choice;
 		err = apply_or_reject_change(&choice, &nchunks_used,
 		    diffreg_result->result, n, relpath, f1, f2,
 		    &line_cur1, &line_cur2,
@@ -5273,11 +5318,23 @@ revert_file(void *arg, unsigned char status, unsigned 
 
 		if (a->patch_cb && (status == GOT_STATUS_MODIFY ||
 		    status == GOT_STATUS_CONFLICT)) {
-			int is_bad_symlink = 0;
-			err = create_patched_content(&path_content, 1, &id,
-			    ondisk_path, dirfd, de_name, ie->path, a->repo,
+			int is_bad_symlink = 0, revert_binary_file = 0;
+
+			err = create_patched_content(&path_content,
+			    &revert_binary_file, 1, &id, ondisk_path, dirfd,
+			    de_name, ie->path, a->repo,
 			    a->patch_cb, a->patch_arg);
-			if (err || path_content == NULL)
+			if (err != NULL)
+				goto done;
+			if (revert_binary_file){
+				err = install_blob(a->worktree, ondisk_path,
+				    ie->path,
+				    te ? te->mode : GOT_DEFAULT_FILE_MODE,
+				    got_fileindex_perms_to_st(ie), blob,
+				    0, 1, 0, 0, NULL, a->repo,
+				    a->progress_cb, a->progress_arg);
+			}
+			if (path_content == NULL)
 				break;
 			if (te && S_ISLNK(te->mode)) {
 				if (unlink(path_content) == -1) {
@@ -9236,11 +9293,15 @@ stage_path(void *arg, unsigned char status,
 				if (choice != GOT_PATCH_CHOICE_YES)
 					break;
 			} else {
-				err = create_patched_content(&path_content, 0,
+				int stage_binary_file = 0;
+
+				err = create_patched_content(&path_content,
+				    &stage_binary_file, 0,
 				    staged_blob_id ? staged_blob_id : blob_id,
 				    ondisk_path, dirfd, de_name, ie->path,
 				    a->repo, a->patch_cb, a->patch_arg);
-				if (err || path_content == NULL)
+				if (!stage_binary_file &&
+				    (err || path_content == NULL))
 					break;
 			}
 		}
@@ -9449,9 +9510,9 @@ struct unstage_path_arg {
 
 static const struct got_error *
 create_unstaged_content(char **path_unstaged_content,
-    char **path_new_staged_content, struct got_object_id *blob_id,
-    struct got_object_id *staged_blob_id, const char *relpath,
-    struct got_repository *repo,
+    char **path_new_staged_content, int *confirm_binary_change,
+    struct got_object_id *blob_id, struct got_object_id *staged_blob_id,
+    const char *relpath, struct got_repository *repo,
     got_worktree_patch_cb patch_cb, void *patch_arg)
 {
 	const struct got_error *err, *free_err;
@@ -9459,10 +9520,11 @@ create_unstaged_content(char **path_unstaged_content,
 	FILE *f1 = NULL, *f2 = NULL, *outfile = NULL, *rejectfile = NULL;
 	char *path1 = NULL, *path2 = NULL, *label1 = NULL;
 	struct got_diffreg_result *diffreg_result = NULL;
-	int line_cur1 = 1, line_cur2 = 1, n = 0, nchunks_used = 0;
+	int choice, line_cur1 = 1, line_cur2 = 1, n = 0, nchunks_used = 0;
 	int have_content = 0, have_rejected_content = 0, i = 0, nchanges = 0;
 	int fd1 = -1, fd2 = -1;
 
+	*confirm_binary_change = 0;
 	*path_unstaged_content = NULL;
 	*path_new_staged_content = NULL;
 
@@ -9511,6 +9573,14 @@ create_unstaged_content(char **path_unstaged_content,
 	if (err)
 		goto done;
 
+	if (diff_result_has_binary(diffreg_result->result)) {
+		err = accept_or_reject_binary_change(&choice, relpath,
+		    patch_cb, patch_arg);
+		if (err == NULL && choice == GOT_PATCH_CHOICE_YES)
+			*confirm_binary_change = 1;
+		goto done;
+	}
+
 	err = got_opentemp_named(path_unstaged_content, &outfile,
 	    "got-unstaged-content", "");
 	if (err)
@@ -9536,7 +9606,6 @@ create_unstaged_content(char **path_unstaged_content,
 		nchanges++;
 	}
 	for (n = 0; n < diffreg_result->result->chunks.len; n += nchunks_used) {
-		int choice;
 		err = apply_or_reject_change(&choice, &nchunks_used,
 		    diffreg_result->result, n, relpath, f1, f2,
 		    &line_cur1, &line_cur2,
@@ -9600,11 +9669,11 @@ done:
 }
 
 static const struct got_error *
-unstage_hunks(struct got_object_id *staged_blob_id,
-    struct got_blob_object *blob_base,
-    struct got_object_id *blob_id, struct got_fileindex_entry *ie,
-    const char *ondisk_path, const char *label_orig,
-    struct got_worktree *worktree, struct got_repository *repo,
+unstage_hunks(int *confirm_binary_change, struct got_object_id *staged_blob_id,
+    struct got_blob_object *blob_base, struct got_object_id *blob_id,
+    struct got_fileindex_entry *ie, const char *ondisk_path,
+    const char *label_orig, struct got_worktree *worktree,
+    struct got_repository *repo,
     got_worktree_patch_cb patch_cb, void *patch_arg,
     got_worktree_checkout_cb progress_cb, void *progress_arg)
 {
@@ -9618,8 +9687,8 @@ unstage_hunks(struct got_object_id *staged_blob_id,
 	struct stat sb;
 
 	err = create_unstaged_content(&path_unstaged_content,
-	    &path_new_staged_content, blob_id, staged_blob_id,
-	    ie->path, repo, patch_cb, patch_arg);
+	    &path_new_staged_content, confirm_binary_change,
+	    blob_id, staged_blob_id, ie->path, repo, patch_cb, patch_arg);
 	if (err)
 		return err;
 
@@ -9823,12 +9892,16 @@ unstage_path(void *arg, unsigned char status,
 				if (choice != GOT_PATCH_CHOICE_YES)
 					break;
 			} else {
-				err = unstage_hunks(staged_blob_id,
-				    blob_base, blob_id, ie, ondisk_path,
-				    label_orig, a->worktree, a->repo,
+				int unstage_binary_file = 0;
+
+				err = unstage_hunks(&unstage_binary_file,
+				    staged_blob_id, blob_base, blob_id,
+				    ie, ondisk_path, label_orig,
+				    a->worktree, a->repo,
 				    a->patch_cb, a->patch_arg,
 				    a->progress_cb, a->progress_arg);
-				break; /* Done with this file. */
+				if (!unstage_binary_file)
+					break; /* Done with this file. */
 			}
 		}
 		err = got_object_open_as_blob(&blob_staged, a->repo,
blob - bb8f0dfe722dc734a56ccff46a57c1cf44e3005b
blob + dff4de020d2b0dfb2a76cc01215bd3dd68fa0b7d
--- regress/cmdline/stage.sh
+++ regress/cmdline/stage.sh
@@ -3041,6 +3041,59 @@ EOF
 	test_done "$testroot" "0"
 }
 
+test_stage_patch_binary() {
+	local testroot=$(test_init stage_patch_binary)
+
+	dd if=/dev/urandom of=$testroot/repo/binary bs=1024 count=16 \
+	    > /dev/null 2>&1
+	git -C $testroot/repo add binary
+	git_commit $testroot/repo -m "add binary file"
+	local id=$(git_show_head $testroot/repo)
+
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	ed -s $testroot/wt/binary <<-EOF
+	2m8
+	7m16
+	5m24
+	22m32
+	w
+	EOF
+
+	printf "y\n" > $testroot/patchscript
+	(cd $testroot/wt && got stage -F $testroot/patchscript -p binary \
+		> $testroot/stdout 2> $testroot/stderr)
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		echo "got stage command failed unexpectedly" >&2
+		test_done "$testroot" "1"
+		return 1
+	fi
+
+	cat > $testroot/stdout.expected <<EOF
+-----------------------------------------------
+Binary files binary and binary differ
+-----------------------------------------------
+M  binary (change 1 of 1)
+stage this change? [y/n/q] y
+EOF
+
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	test_done "$testroot" 0
+}
+
 test_parseargs "$@"
 run_test test_stage_basic
 run_test test_stage_no_changes
@@ -3072,3 +3125,4 @@ run_test test_stage_patch_quit
 run_test test_stage_patch_incomplete_script
 run_test test_stage_symlink
 run_test test_stage_patch_symlink
+run_test test_stage_patch_binary


-- 
Mark Jamsek <https://bsdbox.org>
GPG: F2FF 13DE 6A06 C471 CA80  E6E2 2930 DC66 86EE CF68