"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
got patch: add flag to ignore whitespace?
To:
gameoftrees@openbsd.org
Date:
Sat, 02 Jul 2022 17:16:34 +0200

Download raw body.

Thread
The last patch from Mark reminded me that I thought some time ago to add
a "-w" flag for got patch to ignore white spaces.  It's conceptually
similar to patch(1) -l/--ignore-whitespace or git-apply(1)
--ignore-whitespace.

Here's a possible usage (using Mark' last diff)

	% mshow | got patch
	#  tog/tog.1
	@@ -295,7 +305,7 @@ hunk failed to apply
	G  tog/tog.c
	got: patch failed to apply
	% # woops, try again with -w
	% mshow | got patch -w
	G  tog/tog.1
	G  tog/tog.c
	% # yay!

The diff does exactly what one would expect.  The line comparisons are
now done in linecmp, which first runs strcmp(3) and optionally falls
back to a whitespace-loose matching.

There's a bit of churn in apply_hunk now.  Before, I was re-using the
lines saved in the hunk to apply it, now I can't.  If the context lines
of the hunk may have the whitespaces mangled and so I have to re-read
the lines from the original file.  Even if we're using a loose match,
the context line needs not to be changed.

diff refs/heads/main refs/heads/pw
commit - acf749fc600a43d8e276321e8a63cd97484f30bb
commit + d508e09056f199e5fd4e15d028fd1987e9cd5a41
blob - d2db3f11b55b0dcd7008e9f4662210887aa2742b
blob + ac432bba43e6aa8dc04e5981fa8f99a8c9ae9e39
--- got/got.1
+++ got/got.1
@@ -1307,7 +1307,7 @@ option)
 .El
 .El
 .Tg pa
-.It Cm patch Oo Fl n Oc Oo Fl p Ar strip-count Oc Oo Fl R Oc Op Ar patchfile
+.It Cm patch Oo Fl n Oc Oo Fl p Ar strip-count Oc Oo Fl R Oc Oo Fl w Oc Op Ar patchfile
 .Dl Pq alias: Cm pa
 Apply changes from
 .Ar patchfile
@@ -1393,6 +1393,11 @@ path prefixes generated by
 will be recognized and stripped automatically.
 .It Fl R
 Reverse the patch before applying it.
+.It Fl w
+Ignore whitespaces in context lines.
+Useful in case the tabs and spaces in the
+.Ar patchfile
+have been mangled.
 .El
 .Tg rv
 .It Cm revert Oo Fl p Oc Oo Fl F Ar response-script Oc Oo Fl R Oc Ar path ...
blob - 5f7f00007937a048564e685aac0a8c6b9e98adab
blob + 62afd6dac8942d13f30de9135d7d8dcba4e79a9e
--- got/got.c
+++ got/got.c
@@ -7702,7 +7702,7 @@ __dead static void
 usage_patch(void)
 {
 	fprintf(stderr, "usage: %s patch [-n] [-p strip-count] "
-	    "[-R] [patchfile]\n", getprogname());
+	    "[-R] [-w] [patchfile]\n", getprogname());
 	exit(1);
 }
 
@@ -7791,11 +7791,11 @@ cmd_patch(int argc, char *argv[])
 	struct got_repository *repo = NULL;
 	const char *errstr;
 	char *cwd = NULL;
-	int ch, nop = 0, strip = -1, reverse = 0;
+	int ch, nop = 0, strip = -1, reverse = 0, ws = 0;
 	int patchfd;
 	int *pack_fds = NULL;
 
-	while ((ch = getopt(argc, argv, "np:R")) != -1) {
+	while ((ch = getopt(argc, argv, "np:Rw")) != -1) {
 		switch (ch) {
 		case 'n':
 			nop = 1;
@@ -7809,6 +7809,9 @@ cmd_patch(int argc, char *argv[])
 		case 'R':
 			reverse = 1;
 			break;
+		case 'w':
+			ws = 1;
+			break;
 		default:
 			usage_patch();
 			/* NOTREACHED */
@@ -7860,7 +7863,7 @@ cmd_patch(int argc, char *argv[])
 		err(1, "pledge");
 #endif
 
-	error = got_patch(patchfd, worktree, repo, nop, strip, reverse,
+	error = got_patch(patchfd, worktree, repo, nop, strip, reverse, ws,
 	    &patch_progress, NULL, check_cancelled, NULL);
 
 done:
blob - bc2e95bb4fa535617bc28f76935a78e36dea8a3e
blob + b91edd6f33b6aa71491a276c624ccb7a933573c6
--- include/got_patch.h
+++ include/got_patch.h
@@ -32,4 +32,4 @@ typedef const struct got_error *(*got_patch_progress_c
  */
 const struct got_error *
 got_patch(int, struct got_worktree *, struct got_repository *, int, int,
-    int, got_patch_progress_cb, void *, got_cancel_cb, void *);
+    int, int, got_patch_progress_cb, void *, got_cancel_cb, void *);
blob - 6b6915f9998d30a1ae5b1846365b299ba2c974fe
blob + 0cc767b01d1a33b9f67faf1a8d9c29ce3d54ea53
--- lib/patch.c
+++ lib/patch.c
@@ -26,6 +26,7 @@
 #include <sys/stat.h>
 #include <sys/uio.h>
 
+#include <ctype.h>
 #include <errno.h>
 #include <limits.h>
 #include <sha1.h>
@@ -401,8 +402,30 @@ locate_hunk(FILE *orig, struct got_patch_hunk *h, off_
 	return err;
 }
 
+static int
+linecmp(const char *a, const char *b, int ws)
+{
+	int c;
+
+	c = strcmp(a, b);
+	if (c == 0 || !ws)
+		return c;
+
+	for (;;) {
+		while (isspace(*a))
+			a++;
+		while (isspace(*b))
+			b++;
+		if (*a == '\0' || *b == '\0' || *a != *b)
+			break;
+		a++, b++;
+	}
+
+	return *a - *b;
+}
+
 static const struct got_error *
-test_hunk(FILE *orig, struct got_patch_hunk *h)
+test_hunk(FILE *orig, struct got_patch_hunk *h, int ws)
 {
 	const struct got_error *err = NULL;
 	char *line = NULL;
@@ -426,7 +449,7 @@ test_hunk(FILE *orig, struct got_patch_hunk *h)
 			}
 			if (line[linelen - 1] == '\n')
 				line[linelen - 1] = '\0';
-			if (strcmp(h->lines[i] + 1, line)) {
+			if (linecmp(h->lines[i] + 1, line, ws)) {
 				err = got_error(GOT_ERR_HUNK_FAILED);
 				goto done;
 			}
@@ -440,36 +463,63 @@ done:
 }
 
 static const struct got_error *
-apply_hunk(FILE *tmp, struct got_patch_hunk *h, int *lineno)
+apply_hunk(FILE *orig, FILE *tmp, struct got_patch_hunk *h, int *lineno,
+    off_t from)
 {
-	size_t i, new = 0;
+	const struct got_error *err = NULL;
+	const char *t;
+	size_t linesize = 0, i, new = 0;
+	char *line = NULL;
+	char mode;
+	ssize_t linelen;
 
+	if (orig != NULL && fseeko(orig, from, SEEK_SET) == -1)
+		return got_error_from_errno("fseeko");
+
 	for (i = 0; i < h->len; ++i) {
-		switch (*h->lines[i]) {
-		case ' ':
-			if (fprintf(tmp, "%s\n", h->lines[i] + 1) < 0)
-				return got_error_from_errno("fprintf");
-			/* fallthrough */
+		switch (mode = *h->lines[i]) {
 		case '-':
+		case ' ':
 			(*lineno)++;
+			if (orig != NULL) {
+				linelen = getline(&line, &linesize, orig);
+				if (linelen == -1) {
+					err = got_error_from_errno("getline");
+					goto done;
+				}
+				t = line;
+			} else
+				t = h->lines[i] + 1;
+			if (mode == '-')
+				continue;
+			if (fprintf(tmp, "%s", t) < 0) {
+				err = got_error_from_errno("fprintf");
+				goto done;
+			}
 			break;
 		case '+':
 			new++;
-			if (fprintf(tmp, "%s", h->lines[i] + 1) < 0)
-				return got_error_from_errno("fprintf");
+			if (fprintf(tmp, "%s", h->lines[i] + 1) < 0) {
+				err = got_error_from_errno("fprintf");
+				goto done;
+			}
 			if (new != h->new_lines || !h->new_nonl) {
-				if (fprintf(tmp, "\n") < 0)
-					return got_error_from_errno(
-					    "fprintf");
+				if (fprintf(tmp, "\n") < 0) {
+					err = got_error_from_errno("fprintf");
+					goto done;
+				}
 			}
 			break;
 		}
 	}
-	return NULL;
+
+done:
+	free(line);
+	return err;
 }
 
 static const struct got_error *
-patch_file(struct got_patch *p, FILE *orig, FILE *tmp, mode_t *mode)
+patch_file(struct got_patch *p, FILE *orig, FILE *tmp, mode_t *mode, int ws)
 {
 	const struct got_error *err = NULL;
 	struct got_patch_hunk *h;
@@ -484,7 +534,7 @@ patch_file(struct got_patch *p, FILE *orig, FILE *tmp,
 		h = STAILQ_FIRST(&p->head);
 		if (h == NULL || STAILQ_NEXT(h, entries) != NULL)
 			return got_error(GOT_ERR_PATCH_MALFORMED);
-		return apply_hunk(tmp, h, &lineno);
+		return apply_hunk(orig, tmp, h, &lineno, 0);
 	}
 
 	if (fstat(fileno(orig), &sb) == -1)
@@ -504,7 +554,7 @@ patch_file(struct got_patch *p, FILE *orig, FILE *tmp,
 			return err;
 		copypos = pos;
 
-		err = test_hunk(orig, h);
+		err = test_hunk(orig, h, ws);
 		if (err != NULL && err->code == GOT_ERR_HUNK_FAILED) {
 			/*
 			 * try to apply the hunk again starting the search
@@ -524,7 +574,7 @@ patch_file(struct got_patch *p, FILE *orig, FILE *tmp,
 		if (lineno + 1 != h->old_from)
 			h->offset = lineno + 1 - h->old_from;
 
-		err = apply_hunk(tmp, h, &lineno);
+		err = apply_hunk(orig, tmp, h, &lineno, pos);
 		if (err != NULL)
 			return err;
 
@@ -647,7 +697,7 @@ done:
 static const struct got_error *
 apply_patch(int *overlapcnt, struct got_worktree *worktree,
     struct got_repository *repo, struct got_fileindex *fileindex,
-    const char *old, const char *new, struct got_patch *p, int nop,
+    const char *old, const char *new, struct got_patch *p, int nop, int ws,
     struct patch_args *pa, got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
@@ -708,7 +758,8 @@ apply_patch(int *overlapcnt, struct got_worktree *work
 		goto done;
 	outpath = tmppath;
 	outfd = fileno(tmpfile);
-	err = patch_file(p, afile != NULL ? afile : oldfile, tmpfile, &mode);
+	err = patch_file(p, afile != NULL ? afile : oldfile, tmpfile,
+	    &mode, ws);
 	if (err)
 		goto done;
 
@@ -880,7 +931,7 @@ reverse_patch(struct got_patch *p)
 
 const struct got_error *
 got_patch(int fd, struct got_worktree *worktree, struct got_repository *repo,
-    int nop, int strip, int reverse, got_patch_progress_cb progress_cb,
+    int nop, int strip, int reverse, int ws, got_patch_progress_cb progress_cb,
     void *progress_arg, got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL, *complete_err = NULL;
@@ -949,7 +1000,7 @@ got_patch(int fd, struct got_worktree *worktree, struc
 		    &newpath, worktree, repo, fileindex);
 		if (err == NULL)
 			err = apply_patch(&overlapcnt, worktree, repo,
-			    fileindex, oldpath, newpath, &p, nop, &pa,
+			    fileindex, oldpath, newpath, &p, nop, ws, &pa,
 			    cancel_cb, cancel_arg);
 		if (err != NULL) {
 			failed = 1;
blob - 40309e5b82bddf39321e6da00a91b79dd7ec65b0
blob + ef4dd59512cedc603ff733648311fafe3cc5f207
--- regress/cmdline/patch.sh
+++ regress/cmdline/patch.sh
@@ -1273,6 +1273,96 @@ EOF
 	test_done $testroot 0
 }
 
+test_patch_whitespace() {
+	local testroot=`test_init patch_whitespace`
+
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	trailing="		"
+
+	cat <<EOF > $testroot/wt/hello.c
+#include <stdio.h>
+
+int
+main(void)
+{
+	/* the trailing whitespace is on purpose */
+	printf("hello, world\n");$trailing
+	return 0;
+}
+EOF
+
+	(cd $testroot/wt && got add hello.c && got ci -m '+hello.c') \
+		> /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	# test with a diff with various whitespace corruptions
+	cat <<EOF > $testroot/wt/patch
+--- hello.c
++++ hello.c
+@@ -5,5 +5,5 @@
+ {
+ /* the trailing whitespace is on purpose */
+	printf("hello, world\n");
+-    return 0;
++    return 5; /* always fails */
+ }
+EOF
+
+	(cd $testroot/wt && got patch patch) 2>/dev/null 1>&2
+	ret=$?
+	if [ $ret -ne 1 ]; then
+		echo "applied a mangled patch without -w?" >&2
+		test_done $testroot 1
+		return 1
+	fi
+
+	(cd $testroot/wt && got patch -w patch) 2>/dev/null > $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		echo "failed to apply with -w" >&2
+		test_done $testroot $ret
+		return 1
+	fi
+
+	echo 'M  hello.c' > $testroot/stdout.expected
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done $testroot $ret
+		return 1
+	fi
+
+	cat <<EOF > $testroot/wt/hello.c.expected
+#include <stdio.h>
+
+int
+main(void)
+{
+	/* the trailing whitespace is on purpose */
+	printf("hello, world\n");$trailing
+    return 5; /* always fails */
+}
+EOF
+
+	cmp -s $testroot/wt/hello.c.expected $testroot/wt/hello.c
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/wt/hello.c.expected $testroot/wt/hello.c
+	fi
+	test_done $testroot $ret
+}
+
 test_patch_relative_paths() {
 	local testroot=`test_init patch_relative_paths`
 
@@ -1798,6 +1888,7 @@ run_test test_patch_with_offset
 run_test test_patch_prefer_new_path
 run_test test_patch_no_newline
 run_test test_patch_strip
+run_test test_patch_whitespace
 run_test test_patch_relative_paths
 run_test test_patch_with_path_prefix
 run_test test_patch_relpath_with_path_prefix