"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
got patch vs binary files
To:
gameoftrees@openbsd.org
Date:
Fri, 12 May 2023 21:06:15 +0200

Download raw body.

Thread
back in february naddy reported that got patch choked on the perl
update.

https://marc.gameoftrees.org/thread/1675780522.55927_0.html

The problem was two-fold:
  1. the "Result too large" for very long lines
  2. NUL bytes in the middle of the lines

Here I'm adressing the most interesting one, the sencond.  I'll handle
the very long lines case next.

got patch assumes lines can be encoded as strings, so embedded NUL
bytes effectively truncates the line.  diff below changes this
assumption, by considering lines just binary strings LF-delimited.

While doing this, I took the chance to ship some other minor changes.

The type of the line (i.e. '+', '-' or ' ' aka context) now is a
separate field instead of being encoded in the first byte of each
line, this simplifies the various +1/-1 that were needed otherwise.

I've renamed linecmp into lineq and made it boolean: it's simpler to
write, to use and we'll never be interested in sorting lines anyway.

One quirk is that I'm still allocating the space for a NUL terminator.
This is not required, and in fact makes pushline() maybe not obvious,
but it's useful for debugging so it's worth it I think.

The diff is not committable as-is however.  I had to disable a check
in diff3 to allow got_merge_diff3 to proceed on binary files.  I'm not
sure what we want to do in for this, but I believe that even without
the will to support "binary patches" most of the diff still makes
sense, as it propagate lengths rather than recomputing strlen() here
and there and avoids some extra useless allocations in got-read-patch.


diff /home/op/w/got
commit - ba0bed23e515b14cd3fe877c6847785c8c29971e
path + /home/op/w/got
blob - 5cd79150cc6476f78c90b997d76b00eaa99fc341
file + lib/diff3.c
--- lib/diff3.c
+++ lib/diff3.c
@@ -231,6 +231,7 @@ diffreg(BUF **d, const char *path1, const char *path2,
 	if (err)
 		goto done;
 
+#if 0
 	if (diffreg_result) {
 		struct diff_result *diff_result = diffreg_result->result;
 		int atomizer_flags = (diff_result->left->atomizer_flags |
@@ -240,6 +241,7 @@ diffreg(BUF **d, const char *path1, const char *path2,
 			goto done;
 		}
 	}
+#endif
 
 	err = got_diffreg_output(NULL, NULL, diffreg_result, 1, 1, "", "",
 	    GOT_DIFF_OUTPUT_PLAIN, 0, outfile);
blob - 9f524a6bfc0deae40498090e43fe7f9238eb4e64
file + lib/patch.c
--- lib/patch.c
+++ lib/patch.c
@@ -59,6 +59,12 @@ struct got_patch_hunk {
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #endif
 
+struct got_patch_line {
+	char	 mode;
+	char	*line;
+	size_t	 len;
+};
+
 struct got_patch_hunk {
 	STAILQ_ENTRY(got_patch_hunk) entries;
 	const struct got_error *err;
@@ -72,7 +78,7 @@ struct got_patch_hunk {
 	int	new_lines;
 	size_t	len;
 	size_t	cap;
-	char	**lines;
+	struct got_patch_line *lines;
 };
 
 STAILQ_HEAD(got_patch_hunk_head, got_patch_hunk);
@@ -128,7 +134,7 @@ patch_free(struct got_patch *p)
 		STAILQ_REMOVE_HEAD(&p->head, entries);
 
 		for (i = 0; i < h->len; ++i)
-			free(h->lines[i]);
+			free(h->lines[i].line);
 		free(h->lines);
 		free(h);
 	}
@@ -141,7 +147,7 @@ pushline(struct got_patch_hunk *h, const char *line)
 }
 
 static const struct got_error *
-pushline(struct got_patch_hunk *h, const char *line)
+pushline(struct got_patch_hunk *h, const char *line, size_t len)
 {
 	void 	*t;
 	size_t	 newcap;
@@ -157,10 +163,14 @@ pushline(struct got_patch_hunk *h, const char *line)
 		h->cap = newcap;
 	}
 
-	if ((t = strdup(line)) == NULL)
-		return got_error_from_errno("strdup");
+	if ((t = malloc(len - 1)) == NULL)
+		return got_error_from_errno("malloc");
+	memcpy(t, line + 1, len - 1);	/* skip the line type */
 
-	h->lines[h->len++] = t;
+	h->lines[h->len].mode = *line;
+	h->lines[h->len].line = t;
+	h->lines[h->len].len = len - 2;	/* skip line type and trailing NUL */
+	h->len++;
 	return NULL;
 }
 
@@ -301,7 +311,7 @@ recv_patch(struct imsgbuf *ibuf, int *done, struct got
 			}
 
 			if (*t != '\\')
-				err = pushline(h, t);
+				err = pushline(h, t, datalen);
 			else if (lastmode == '-')
 				h->old_nonl = 1;
 			else if (lastmode == '+')
@@ -351,10 +361,10 @@ reverse_patch(struct got_patch *p)
 		h->new_nonl = tmp;
 
 		for (i = 0; i < h->len; ++i) {
-			if (*h->lines[i] == '+')
-				*h->lines[i] = '-';
-			else if (*h->lines[i] == '-')
-				*h->lines[i] = '+';
+			if (h->lines[i].mode == '+')
+				h->lines[i].mode = '-';
+			else if (h->lines[i].mode == '-')
+				h->lines[i].mode = '+';
 		}
 	}
 }
@@ -392,14 +402,15 @@ static int linecmp(const char *, const char *, int *);
 	return NULL;
 }
 
-static int linecmp(const char *, const char *, int *);
+static int lineq(struct got_patch_line *, const char *, size_t, int *);
 
 static const struct got_error *
 locate_hunk(FILE *orig, struct got_patch_hunk *h, off_t *pos, int *lineno)
 {
 	const struct got_error *err = NULL;
+	struct got_patch_line *l = &h->lines[0];
 	char *line = NULL;
-	char mode = *h->lines[0];
+	char mode = l->mode;
 	size_t linesize = 0;
 	ssize_t linelen;
 	off_t match = -1;
@@ -414,12 +425,10 @@ locate_hunk(FILE *orig, struct got_patch_hunk *h, off_
 				err = got_error(GOT_ERR_HUNK_FAILED);
 			break;
 		}
-		if (line[linelen - 1] == '\n')
-			line[linelen - 1] = '\0';
 		(*lineno)++;
 
-		if ((mode == ' ' && !linecmp(h->lines[0] + 1, line, &mangled)) ||
-		    (mode == '-' && !linecmp(h->lines[0] + 1, line, &mangled)) ||
+		if ((mode == ' ' && lineq(l, line, linelen, &mangled)) ||
+		    (mode == '-' && lineq(l, line, linelen, &mangled)) ||
 		    (mode == '+' && *lineno == h->old_from)) {
 			match = ftello(orig);
 			if (match == -1) {
@@ -449,27 +458,34 @@ linecmp(const char *a, const char *b, int *mangled)
 }
 
 static int
-linecmp(const char *a, const char *b, int *mangled)
+lineq(struct got_patch_line *l, const char *b, size_t len, int *mangled)
 {
-	int c;
+	char *a = l->line;
+	size_t i, j;
 
+	if (b[len - 1] == '\n')
+		len--;
+
 	*mangled = 0;
-	c = strcmp(a, b);
-	if (c == 0)
-		return c;
+	if (l->len == len && !memcmp(a, b, len))
+		return 1;
 
 	*mangled = 1;
+
+	i = j = 0;
 	for (;;) {
-		while (*a == '\t' || *a == ' ' || *a == '\f')
-			a++;
-		while (*b == '\t' || *b == ' ' || *b == '\f')
-			b++;
-		if (*a == '\0' || *a != *b)
+		while (i < l->len &&
+		    (a[i] == '\t' || a[i] == ' ' || a[i] == '\f'))
+			i++;
+		while (j < len &&
+		    (b[j] == '\t' || b[j] == ' ' || b[j] == '\f'))
+			j++;
+		if (i == l->len || j == len || a[i] != b[j])
 			break;
-		a++, b++;
+		i++, j++;
 	}
 
-	return *a - *b;
+	return (i == l->len && j == len);
 }
 
 static const struct got_error *
@@ -482,7 +498,7 @@ test_hunk(FILE *orig, struct got_patch_hunk *h)
 	int mangled;
 
 	for (i = 0; i < h->len; ++i) {
-		switch (*h->lines[i]) {
+		switch (h->lines[i].mode) {
 		case '+':
 			continue;
 		case ' ':
@@ -496,9 +512,7 @@ test_hunk(FILE *orig, struct got_patch_hunk *h)
 					    GOT_ERR_HUNK_FAILED);
 				goto done;
 			}
-			if (line[linelen - 1] == '\n')
-				line[linelen - 1] = '\0';
-			if (linecmp(h->lines[i] + 1, line, &mangled)) {
+			if (!lineq(&h->lines[i], line, linelen, &mangled)) {
 				err = got_error(GOT_ERR_HUNK_FAILED);
 				goto done;
 			}
@@ -522,13 +536,14 @@ apply_hunk(FILE *orig, FILE *tmp, struct got_patch_hun
 	size_t linesize = 0, i, new = 0;
 	char *line = NULL;
 	char mode;
+	size_t l;
 	ssize_t linelen;
 
 	if (orig != NULL && fseeko(orig, from, SEEK_SET) == -1)
 		return got_error_from_errno("fseeko");
 
 	for (i = 0; i < h->len; ++i) {
-		switch (mode = *h->lines[i]) {
+		switch (mode = h->lines[i].mode) {
 		case '-':
 		case ' ':
 			(*lineno)++;
@@ -541,18 +556,24 @@ apply_hunk(FILE *orig, FILE *tmp, struct got_patch_hun
 				if (line[linelen - 1] == '\n')
 					line[linelen - 1] = '\0';
 				t = line;
-			} else
-				t = h->lines[i] + 1;
+				l = linelen - 1;
+			} else {
+				t = h->lines[i].line;
+				l = h->lines[i].len;
+			}
 			if (mode == '-')
 				continue;
-			if (fprintf(tmp, "%s\n", t) < 0) {
+			if (fwrite(t, 1, l, tmp) != l ||
+			    fputc('\n', tmp) == EOF) {
 				err = got_error_from_errno("fprintf");
 				goto done;
 			}
 			break;
 		case '+':
 			new++;
-			if (fprintf(tmp, "%s", h->lines[i] + 1) < 0) {
+			t = h->lines[i].line;
+			l = h->lines[i].len;
+			if (fwrite(t, 1, l, tmp) != l) {
 				err = got_error_from_errno("fprintf");
 				goto done;
 			}
blob - a5fe8c2470856e960745214cc536ddbc804c7ef0
file + libexec/got-read-patch/got-read-patch.c
--- libexec/got-read-patch/got-read-patch.c
+++ libexec/got-read-patch/got-read-patch.c
@@ -445,23 +445,29 @@ send_line(const char *line)
 }
 
 static const struct got_error *
-send_line(const char *line)
+send_line(const char *line, size_t len)
 {
 	static const struct got_error *err = NULL;
-	char *p = NULL;
+	struct iovec iov[2];
+	int iovcnt = 0;
 
+	memset(&iov, 0, sizeof(iov));
+
 	if (*line != '+' && *line != '-' && *line != ' ' && *line != '\\') {
-		if (asprintf(&p, " %s", line) == -1)
-			return got_error_from_errno("asprintf");
-		line = p;
+		iov[iovcnt].iov_base = (void *)" ";
+		iov[iovcnt].iov_len = 1;
+		iovcnt++;
 	}
 
-	if (imsg_compose(&ibuf, GOT_IMSG_PATCH_LINE, 0, 0, -1,
-	    line, strlen(line) + 1) == -1)
+	iov[iovcnt].iov_base = (void *)line;
+	iov[iovcnt].iov_len = len;
+	iovcnt++;
+
+	if (imsg_composev(&ibuf, GOT_IMSG_PATCH_LINE, 0, 0, -1,
+	    iov, iovcnt) == -1)
 		err = got_error_from_errno(
 		    "imsg_compose GOT_IMSG_PATCH_LINE");
 
-	free(p);
 	return err;
 }
 
@@ -478,7 +484,7 @@ peek_special_line(FILE *fp)
 	}
 
 	if (ch == '\\') {
-		err = send_line("\\");
+		err = send_line("\\", 2);
 		if (err)
 			return err;
 	}
@@ -568,7 +574,7 @@ parse_hunk(FILE *fp, int *done)
 			goto done;
 		}
 
-		err = send_line(line);
+		err = send_line(line, linelen);
 		if (err)
 			goto done;
 
blob - 17db471e5c3aa81e0eb9233b406861cdccd08fcd
file + regress/cmdline/patch.sh
--- regress/cmdline/patch.sh
+++ regress/cmdline/patch.sh
@@ -1956,6 +1956,57 @@ test_parseargs "$@"
 	test_done $testroot 0
 }
 
+test_patch_binary() {
+	local testroot=`test_init patch_binary`
+
+	if ! got checkout "$testroot/repo" "$testroot/wt" >/dev/null; then
+		test_done "$testroot" $ret
+		return 1
+	fi
+
+	printf 'e\0ta' > $testroot/wt/eta
+	(cd "$testroot/wt" && got add eta && got commit -m '+eta') >/dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		echo "got add failed unexpectedly" >&2
+		test_done "$testroot" $ret
+		return 1
+	fi
+
+	printf 'et\0a' | tee "$testroot/wt/eta.expected" > $testroot/wt/eta
+
+	(cd "$testroot/wt" && got diff -a > patch && got revert eta) >/dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		echo "got diff or revert failed unexpectedly" >&2
+		test_done "$testroot" $ret
+		return 1
+	fi
+
+	(cd "$testroot/wt" && got patch <patch) > $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		echo "got patch failed unexpectedly" >&2
+		test_done "$testroot" $ret
+		return 1
+	fi
+
+	echo 'G  eta' > $testroot/stdout.expected
+	if ! cmp -s "$testroot/stdout.expected" "$testroot/stdout"; then
+		diff -u "$testroot/stdout.expected" "$testroot/stdout"
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	if ! cmp -s "$testroot/wt/eta.expected" "$testroot/wt/eta"; then
+		diff -u "$testroot/wt/eta.expected" "$testroot/wt/eta"
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	test_done "$testroot" 0
+}
+
 test_parseargs "$@"
 run_test test_patch_basic
 run_test test_patch_dont_apply
@@ -1986,3 +2037,4 @@ run_test test_patch_remove_binary_file
 run_test test_patch_newfile_xbit_git_diff
 run_test test_patch_umask
 run_test test_patch_remove_binary_file
+run_test test_patch_binary