"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: got patch: delete binary files too
To:
Stefan Sperling <stsp@stsp.name>
Cc:
gameoftrees@openbsd.org
Date:
Sat, 31 Dec 2022 17:51:16 +0100

Download raw body.

Thread
On 2022/12/31 16:18:00 +0100, Stefan Sperling <stsp@stsp.name> wrote:
> On Sat, Dec 31, 2022 at 04:05:26PM +0100, Omar Polo wrote:
> > diff and cvs diff looks similar but they don't provide the filename in
> > a way that's easy to extract so they're left out.  Here's an example
> > for reference:
> > 
> > 	% diff -u bin/got /dev/null
> > 	Binary files bin/got and /dev/null differ
> 
> The name could be extracted here. But you don't want to handle this
> since it lacks a 'file + /dev/null' or 'delete file mode' header?
> 
> Isn't 'Binary files ... and /dev/null differ' enough to detect this
> case? A destination of /dev/null imlies file deletion.

The problem is that currently got-read-patch looks for a line that
starts with "diff" or "---" to guess where the diff (and/or metadata)
starts.  diff(1) doesn't add that when diffing binary files.

cvs diff would work, but the filename is shown in an earlier line, the
Index and RCS one (that I'm currently ignoring).

Actually the diff could be simplified: we can break out of patch_start
(that looks for the start of a diff) also when finding a "Binary files
... and /dev/null differ" and just look for that line in find_diff.
This allow to handle also diff(1)-generated patches.

CVS patches will now be recognized and will fail (the extracted
filename will be "/tmp/cvs...".)  We could read the Index line, which
POSIX patch(1) handles but neither Larry' nor GNU patch honour by
default, and somehow use it.  Maybe hardcoding some logic for rcs/cvs
diffs.  Will try to add this logic as a follow-up commit if diff below
is still ok.

(i've also tweaked slightly the test, should be a bit easier to read
now.)

> [...]
> > +static const struct got_error *
> > +binaryfilename(const char *at, char **name)
> > +{
> > +	const char *trail = " and /dev/null differ\n";
> 
> I would call this variable 'tail' or 'trailer', not 'trail' (which
> means "path" or "track" one can follow, e.g. while going on a hike).

ooops :)

changed to "suffix" which reads nice when accompanied by "prefix" in
`binary_deleted'.


Thanks,



diff /home/op/w/got
commit - cf536071bc57734308f29cda79d67c88abb3b9f0
path + /home/op/w/got
blob - 6c31c4b5e3788850c30c78ed552e90f1eddd3982
file + lib/patch.c
--- lib/patch.c
+++ lib/patch.c
@@ -587,6 +587,10 @@ patch_file(struct got_patch *p, FILE *orig, FILE *tmp)
 		return apply_hunk(orig, tmp, h, &lineno, 0);
 	}
 
+	/* When deleting binary files there are no hunks to apply. */
+	if (p->new == NULL && STAILQ_EMPTY(&p->head))
+		return NULL;
+
 	if (fstat(fileno(orig), &sb) == -1)
 		return got_error_from_errno("fstat");
 
blob - becf27d3e00feb1fd2b32823931dc0d1b580cfb6
file + libexec/got-read-patch/got-read-patch.c
--- libexec/got-read-patch/got-read-patch.c
+++ libexec/got-read-patch/got-read-patch.c
@@ -129,6 +129,44 @@ filexbit(const char *line)
 }
 
 static int
+binary_deleted(const char *line)
+{
+	const char *prefix = "Binary files ";
+	const char *suffix = " and /dev/null differ\n";
+	size_t len, d;
+
+	if (strncmp(line, prefix, strlen(prefix)) != 0)
+		return 0;
+	line += strlen(prefix);
+
+	len = strlen(line);
+	if (len <= strlen(suffix))
+		return 0;
+	d = len - strlen(suffix);
+	return (strcmp(line + d, suffix) == 0);
+}
+
+static const struct got_error *
+binaryfilename(const char *at, char **name)
+{
+	const char *suffix = " and /dev/null differ\n";
+	size_t len, d;
+
+	len = strlen(at);
+	if (len <= strlen(suffix))
+		return NULL;
+
+	d = len - strlen(suffix);
+	if (strcmp(at + d, suffix) != 0)
+		return NULL;
+
+	*name = strndup(at, d);
+	if (*name == NULL)
+		return got_error_from_errno("strndup");
+	return NULL;
+}
+
+static int
 filexbit(const char *line)
 {
 	char *m;
@@ -187,7 +225,8 @@ patch_start(int *git, char **cid, FILE *fp)
 				break;
 		} else if (!strncmp(line, "--- ", 4) ||
 		    !strncmp(line, "+++ ", 4) ||
-		    !strncmp(line, "blob - ", 7)) {
+		    !strncmp(line, "blob - ", 7) ||
+		    binary_deleted(line)) {
 			/* rewind to previous line */
 			if (fseeko(fp, -linelen, SEEK_CUR) == -1)
 				err = got_error_from_errno("fseeko");
@@ -212,7 +251,7 @@ find_diff(int *done, int *next, FILE *fp, int git, con
 	char	*line = NULL;
 	size_t	 linesize = 0;
 	ssize_t	 linelen;
-	int	 create, rename = 0, xbit = 0;
+	int	 create, delete_binary = 0, rename = 0, xbit = 0;
 
 	*done = 0;
 	*next = 0;
@@ -237,6 +276,10 @@ find_diff(int *done, int *next, FILE *fp, int git, con
 		} else if (!git && !strncmp(line, "blob - ", 7)) {
 			free(blob);
 			err = blobid(line + 7, &blob, git);
+		} else if (!strncmp(line, "Binary files ", 13)) {
+			delete_binary = 1;
+			free(old);
+			err = binaryfilename(line + 13, &old);
 		} else if (rename && !strncmp(line, "rename to ", 10)) {
 			free(new);
 			err = filename(line + 10, &new);
@@ -270,6 +313,16 @@ find_diff(int *done, int *next, FILE *fp, int git, con
 			break;
 		}
 
+		/*
+		 * Diffs that remove binary files have no hunks.
+		 */
+		if (delete_binary && old != NULL) {
+			*done = 1;
+			err = send_patch(old, new, commitid,
+			    blob, xbit, git);
+			break;
+		}
+
 		if (!strncmp(line, "@@ -", 4)) {
 			create = !strncmp(line+4, "0,0", 3);
 			if ((old == NULL && new == NULL) ||
blob - 9e3d4639d01241422b50bdddf316137ceb10d945
file + regress/cmdline/patch.sh
--- regress/cmdline/patch.sh
+++ regress/cmdline/patch.sh
@@ -1881,6 +1881,78 @@ test_parseargs "$@"
 	test_done "$testroot" 0
 }
 
+test_patch_remove_binary_file() {
+	local testroot=`test_init patch_remove_binary_file`
+
+	if ! got checkout $testroot/repo $testroot/wt >/dev/null; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	dd if=/dev/zero of=$testroot/wt/x bs=1 count=16 2>/dev/null >&2
+	(cd $testroot/wt && got add x && got commit -m +x) >/dev/null
+
+	(cd $testroot/wt && \
+		got branch demo && \
+		got rm x && \
+		got ci -m -x &&
+		got up -b master) >/dev/null
+
+	echo 'D  x' > $testroot/stdout.expected
+
+	(cd $testroot/wt && got log -c demo -l 1 -p >patch)
+
+	(cd $testroot/wt && got patch <patch) > $testroot/stdout
+	if [ $? -ne 0 ]; then
+		echo 'patch failed' >&2
+		test_done $testroot 1
+		return 1
+	fi
+
+	if ! cmp -s $testroot/stdout.expected $testroot/stdout; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done $testroot 1
+		return 1
+	fi
+
+	# try again using a git produced diff
+	(cd $testroot/wt && got revert x) >/dev/null
+
+	(cd $testroot/repo && git show demo) >$testroot/wt/patch
+
+	(cd $testroot/wt && got patch <patch) > $testroot/stdout
+	if [ $? -ne 0 ]; then
+		echo 'patch failed' >&2
+		test_done $testroot 1
+		return 1
+	fi
+
+	if ! cmp -s $testroot/stdout.expected $testroot/stdout; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done $testroot 1
+		return 1
+	fi
+
+	# try again using a diff(1) style patch
+	(cd $testroot/wt && got revert x) >/dev/null
+
+	echo "Binary files x and /dev/null differ" >$testroot/wt/patch
+	(cd $testroot/wt && got patch <patch) >$testroot/stdout
+	if [ $? -ne 0 ]; then
+		echo 'patch failed' >&2
+		test_done $testroot 1
+		return 1
+	fi
+
+	if ! cmp -s $testroot/stdout.expected $testroot/stdout; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done $testroot 1
+		return 1
+	fi
+
+	test_done $testroot 0
+}
+
 test_parseargs "$@"
 run_test test_patch_basic
 run_test test_patch_dont_apply
@@ -1910,3 +1982,4 @@ run_test test_patch_umask
 run_test test_patch_newfile_xbit_got_diff
 run_test test_patch_newfile_xbit_git_diff
 run_test test_patch_umask
+run_test test_patch_remove_binary_file