"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Mark Jamsek <mark@jamsek.com>
Subject:
Re: got patch vs binary files
To:
Omar Polo <op@omarpolo.com>
Cc:
gameoftrees@openbsd.org
Date:
Tue, 30 May 2023 20:25:29 +1000

Download raw body.

Thread
On 23-05-29 08:52AM, Omar Polo wrote:
> ping
> 
> On 2023/05/12 21:06:15 +0200, Omar Polo <op@omarpolo.com> wrote:
> > back in february naddy reported that got patch choked on the perl
> > update.
> > 
> > https://marc.gameoftrees.org/thread/1675780522.55927_0.html
> > 
> > The problem was two-fold:
> >   1. the "Result too large" for very long lines
> >   2. NUL bytes in the middle of the lines
> > 
> > Here I'm adressing the most interesting one, the sencond.  I'll handle
> > the very long lines case next.
> > 
> > got patch assumes lines can be encoded as strings, so embedded NUL
> > bytes effectively truncates the line.  diff below changes this
> > assumption, by considering lines just binary strings LF-delimited.
> > 
> > While doing this, I took the chance to ship some other minor changes.
> > 
> > The type of the line (i.e. '+', '-' or ' ' aka context) now is a
> > separate field instead of being encoded in the first byte of each
> > line, this simplifies the various +1/-1 that were needed otherwise.
> > 
> > I've renamed linecmp into lineq and made it boolean: it's simpler to
> > write, to use and we'll never be interested in sorting lines anyway.
> > 
> > One quirk is that I'm still allocating the space for a NUL terminator.
> > This is not required, and in fact makes pushline() maybe not obvious,
> > but it's useful for debugging so it's worth it I think.
> > 
> > The diff is not committable as-is however.  I had to disable a check
> > in diff3 to allow got_merge_diff3 to proceed on binary files.  I'm not
> > sure what we want to do in for this, but I believe that even without
> > the will to support "binary patches" most of the diff still makes
> > sense, as it propagate lengths rather than recomputing strlen() here
> > and there and avoids some extra useless allocations in got-read-patch.

I really like the change to parsing lines as simply newline delimited
binary strings; this is how it's done in fossil too. And I like the
other changes, particularly the got_patch_line struct and lineq()
simplification.

The diff also reads fine. I'm just not sure about the diff3 issue, tbh.
Also, test_cherrypick_binary_file regress is tripping on this change.

> > 
> > diff /home/op/w/got
> > commit - ba0bed23e515b14cd3fe877c6847785c8c29971e
> > path + /home/op/w/got
> > blob - 5cd79150cc6476f78c90b997d76b00eaa99fc341
> > file + lib/diff3.c
> > --- lib/diff3.c
> > +++ lib/diff3.c
> > @@ -231,6 +231,7 @@ diffreg(BUF **d, const char *path1, const char *path2,
> >  	if (err)
> >  		goto done;
> >  
> > +#if 0
> >  	if (diffreg_result) {
> >  		struct diff_result *diff_result = diffreg_result->result;
> >  		int atomizer_flags = (diff_result->left->atomizer_flags |
> > @@ -240,6 +241,7 @@ diffreg(BUF **d, const char *path1, const char *path2,
> >  			goto done;
> >  		}
> >  	}
> > +#endif
> >  
> >  	err = got_diffreg_output(NULL, NULL, diffreg_result, 1, 1, "", "",
> >  	    GOT_DIFF_OUTPUT_PLAIN, 0, outfile);
> > blob - 9f524a6bfc0deae40498090e43fe7f9238eb4e64
> > file + lib/patch.c
> > --- lib/patch.c
> > +++ lib/patch.c
> > @@ -59,6 +59,12 @@ struct got_patch_hunk {
> >  #define MIN(a, b) ((a) < (b) ? (a) : (b))
> >  #endif
> >  
> > +struct got_patch_line {
> > +	char	 mode;
> > +	char	*line;
> > +	size_t	 len;
> > +};
> > +
> >  struct got_patch_hunk {
> >  	STAILQ_ENTRY(got_patch_hunk) entries;
> >  	const struct got_error *err;
> > @@ -72,7 +78,7 @@ struct got_patch_hunk {
> >  	int	new_lines;
> >  	size_t	len;
> >  	size_t	cap;
> > -	char	**lines;
> > +	struct got_patch_line *lines;
> >  };
> >  
> >  STAILQ_HEAD(got_patch_hunk_head, got_patch_hunk);
> > @@ -128,7 +134,7 @@ patch_free(struct got_patch *p)
> >  		STAILQ_REMOVE_HEAD(&p->head, entries);
> >  
> >  		for (i = 0; i < h->len; ++i)
> > -			free(h->lines[i]);
> > +			free(h->lines[i].line);
> >  		free(h->lines);
> >  		free(h);
> >  	}
> > @@ -141,7 +147,7 @@ pushline(struct got_patch_hunk *h, const char *line)
> >  }
> >  
> >  static const struct got_error *
> > -pushline(struct got_patch_hunk *h, const char *line)
> > +pushline(struct got_patch_hunk *h, const char *line, size_t len)
> >  {
> >  	void 	*t;
> >  	size_t	 newcap;
> > @@ -157,10 +163,14 @@ pushline(struct got_patch_hunk *h, const char *line)
> >  		h->cap = newcap;
> >  	}
> >  
> > -	if ((t = strdup(line)) == NULL)
> > -		return got_error_from_errno("strdup");
> > +	if ((t = malloc(len - 1)) == NULL)
> > +		return got_error_from_errno("malloc");
> > +	memcpy(t, line + 1, len - 1);	/* skip the line type */
> >  
> > -	h->lines[h->len++] = t;
> > +	h->lines[h->len].mode = *line;
> > +	h->lines[h->len].line = t;
> > +	h->lines[h->len].len = len - 2;	/* skip line type and trailing NUL */
> > +	h->len++;
> >  	return NULL;
> >  }
> >  
> > @@ -301,7 +311,7 @@ recv_patch(struct imsgbuf *ibuf, int *done, struct got
> >  			}
> >  
> >  			if (*t != '\\')
> > -				err = pushline(h, t);
> > +				err = pushline(h, t, datalen);
> >  			else if (lastmode == '-')
> >  				h->old_nonl = 1;
> >  			else if (lastmode == '+')
> > @@ -351,10 +361,10 @@ reverse_patch(struct got_patch *p)
> >  		h->new_nonl = tmp;
> >  
> >  		for (i = 0; i < h->len; ++i) {
> > -			if (*h->lines[i] == '+')
> > -				*h->lines[i] = '-';
> > -			else if (*h->lines[i] == '-')
> > -				*h->lines[i] = '+';
> > +			if (h->lines[i].mode == '+')
> > +				h->lines[i].mode = '-';
> > +			else if (h->lines[i].mode == '-')
> > +				h->lines[i].mode = '+';
> >  		}
> >  	}
> >  }
> > @@ -392,14 +402,15 @@ static int linecmp(const char *, const char *, int *);
> >  	return NULL;
> >  }
> >  
> > -static int linecmp(const char *, const char *, int *);
> > +static int lineq(struct got_patch_line *, const char *, size_t, int *);
> >  
> >  static const struct got_error *
> >  locate_hunk(FILE *orig, struct got_patch_hunk *h, off_t *pos, int *lineno)
> >  {
> >  	const struct got_error *err = NULL;
> > +	struct got_patch_line *l = &h->lines[0];
> >  	char *line = NULL;
> > -	char mode = *h->lines[0];
> > +	char mode = l->mode;
> >  	size_t linesize = 0;
> >  	ssize_t linelen;
> >  	off_t match = -1;
> > @@ -414,12 +425,10 @@ locate_hunk(FILE *orig, struct got_patch_hunk *h, off_
> >  				err = got_error(GOT_ERR_HUNK_FAILED);
> >  			break;
> >  		}
> > -		if (line[linelen - 1] == '\n')
> > -			line[linelen - 1] = '\0';
> >  		(*lineno)++;
> >  
> > -		if ((mode == ' ' && !linecmp(h->lines[0] + 1, line, &mangled)) ||
> > -		    (mode == '-' && !linecmp(h->lines[0] + 1, line, &mangled)) ||
> > +		if ((mode == ' ' && lineq(l, line, linelen, &mangled)) ||
> > +		    (mode == '-' && lineq(l, line, linelen, &mangled)) ||
> >  		    (mode == '+' && *lineno == h->old_from)) {
> >  			match = ftello(orig);
> >  			if (match == -1) {
> > @@ -449,27 +458,34 @@ linecmp(const char *a, const char *b, int *mangled)
> >  }
> >  
> >  static int
> > -linecmp(const char *a, const char *b, int *mangled)
> > +lineq(struct got_patch_line *l, const char *b, size_t len, int *mangled)
> >  {
> > -	int c;
> > +	char *a = l->line;
> > +	size_t i, j;
> >  
> > +	if (b[len - 1] == '\n')
> > +		len--;
> > +
> >  	*mangled = 0;
> > -	c = strcmp(a, b);
> > -	if (c == 0)
> > -		return c;
> > +	if (l->len == len && !memcmp(a, b, len))
> > +		return 1;
> >  
> >  	*mangled = 1;
> > +
> > +	i = j = 0;
> >  	for (;;) {
> > -		while (*a == '\t' || *a == ' ' || *a == '\f')
> > -			a++;
> > -		while (*b == '\t' || *b == ' ' || *b == '\f')
> > -			b++;
> > -		if (*a == '\0' || *a != *b)
> > +		while (i < l->len &&
> > +		    (a[i] == '\t' || a[i] == ' ' || a[i] == '\f'))
> > +			i++;
> > +		while (j < len &&
> > +		    (b[j] == '\t' || b[j] == ' ' || b[j] == '\f'))
> > +			j++;
> > +		if (i == l->len || j == len || a[i] != b[j])
> >  			break;
> > -		a++, b++;
> > +		i++, j++;
> >  	}
> >  
> > -	return *a - *b;
> > +	return (i == l->len && j == len);
> >  }
> >  
> >  static const struct got_error *
> > @@ -482,7 +498,7 @@ test_hunk(FILE *orig, struct got_patch_hunk *h)
> >  	int mangled;
> >  
> >  	for (i = 0; i < h->len; ++i) {
> > -		switch (*h->lines[i]) {
> > +		switch (h->lines[i].mode) {
> >  		case '+':
> >  			continue;
> >  		case ' ':
> > @@ -496,9 +512,7 @@ test_hunk(FILE *orig, struct got_patch_hunk *h)
> >  					    GOT_ERR_HUNK_FAILED);
> >  				goto done;
> >  			}
> > -			if (line[linelen - 1] == '\n')
> > -				line[linelen - 1] = '\0';
> > -			if (linecmp(h->lines[i] + 1, line, &mangled)) {
> > +			if (!lineq(&h->lines[i], line, linelen, &mangled)) {
> >  				err = got_error(GOT_ERR_HUNK_FAILED);
> >  				goto done;
> >  			}
> > @@ -522,13 +536,14 @@ apply_hunk(FILE *orig, FILE *tmp, struct got_patch_hun
> >  	size_t linesize = 0, i, new = 0;
> >  	char *line = NULL;
> >  	char mode;
> > +	size_t l;
> >  	ssize_t linelen;
> >  
> >  	if (orig != NULL && fseeko(orig, from, SEEK_SET) == -1)
> >  		return got_error_from_errno("fseeko");
> >  
> >  	for (i = 0; i < h->len; ++i) {
> > -		switch (mode = *h->lines[i]) {
> > +		switch (mode = h->lines[i].mode) {
> >  		case '-':
> >  		case ' ':
> >  			(*lineno)++;
> > @@ -541,18 +556,24 @@ apply_hunk(FILE *orig, FILE *tmp, struct got_patch_hun
> >  				if (line[linelen - 1] == '\n')
> >  					line[linelen - 1] = '\0';
> >  				t = line;
> > -			} else
> > -				t = h->lines[i] + 1;
> > +				l = linelen - 1;
> > +			} else {
> > +				t = h->lines[i].line;
> > +				l = h->lines[i].len;
> > +			}
> >  			if (mode == '-')
> >  				continue;
> > -			if (fprintf(tmp, "%s\n", t) < 0) {
> > +			if (fwrite(t, 1, l, tmp) != l ||
> > +			    fputc('\n', tmp) == EOF) {
> >  				err = got_error_from_errno("fprintf");
> >  				goto done;
> >  			}
> >  			break;
> >  		case '+':
> >  			new++;
> > -			if (fprintf(tmp, "%s", h->lines[i] + 1) < 0) {
> > +			t = h->lines[i].line;
> > +			l = h->lines[i].len;
> > +			if (fwrite(t, 1, l, tmp) != l) {
> >  				err = got_error_from_errno("fprintf");
> >  				goto done;
> >  			}
> > blob - a5fe8c2470856e960745214cc536ddbc804c7ef0
> > file + libexec/got-read-patch/got-read-patch.c
> > --- libexec/got-read-patch/got-read-patch.c
> > +++ libexec/got-read-patch/got-read-patch.c
> > @@ -445,23 +445,29 @@ send_line(const char *line)
> >  }
> >  
> >  static const struct got_error *
> > -send_line(const char *line)
> > +send_line(const char *line, size_t len)
> >  {
> >  	static const struct got_error *err = NULL;
> > -	char *p = NULL;
> > +	struct iovec iov[2];
> > +	int iovcnt = 0;
> >  
> > +	memset(&iov, 0, sizeof(iov));
> > +
> >  	if (*line != '+' && *line != '-' && *line != ' ' && *line != '\\') {
> > -		if (asprintf(&p, " %s", line) == -1)
> > -			return got_error_from_errno("asprintf");
> > -		line = p;
> > +		iov[iovcnt].iov_base = (void *)" ";
> > +		iov[iovcnt].iov_len = 1;
> > +		iovcnt++;
> >  	}
> >  
> > -	if (imsg_compose(&ibuf, GOT_IMSG_PATCH_LINE, 0, 0, -1,
> > -	    line, strlen(line) + 1) == -1)
> > +	iov[iovcnt].iov_base = (void *)line;
> > +	iov[iovcnt].iov_len = len;
> > +	iovcnt++;
> > +
> > +	if (imsg_composev(&ibuf, GOT_IMSG_PATCH_LINE, 0, 0, -1,
> > +	    iov, iovcnt) == -1)
> >  		err = got_error_from_errno(
> >  		    "imsg_compose GOT_IMSG_PATCH_LINE");
> >  
> > -	free(p);
> >  	return err;
> >  }
> >  
> > @@ -478,7 +484,7 @@ peek_special_line(FILE *fp)
> >  	}
> >  
> >  	if (ch == '\\') {
> > -		err = send_line("\\");
> > +		err = send_line("\\", 2);
> >  		if (err)
> >  			return err;
> >  	}
> > @@ -568,7 +574,7 @@ parse_hunk(FILE *fp, int *done)
> >  			goto done;
> >  		}
> >  
> > -		err = send_line(line);
> > +		err = send_line(line, linelen);
> >  		if (err)
> >  			goto done;
> >  
> > blob - 17db471e5c3aa81e0eb9233b406861cdccd08fcd
> > file + regress/cmdline/patch.sh
> > --- regress/cmdline/patch.sh
> > +++ regress/cmdline/patch.sh
> > @@ -1956,6 +1956,57 @@ test_parseargs "$@"
> >  	test_done $testroot 0
> >  }
> >  
> > +test_patch_binary() {
> > +	local testroot=`test_init patch_binary`
> > +
> > +	if ! got checkout "$testroot/repo" "$testroot/wt" >/dev/null; then
> > +		test_done "$testroot" $ret
> > +		return 1
> > +	fi
> > +
> > +	printf 'e\0ta' > $testroot/wt/eta
> > +	(cd "$testroot/wt" && got add eta && got commit -m '+eta') >/dev/null
> > +	ret=$?
> > +	if [ $ret -ne 0 ]; then
> > +		echo "got add failed unexpectedly" >&2
> > +		test_done "$testroot" $ret
> > +		return 1
> > +	fi
> > +
> > +	printf 'et\0a' | tee "$testroot/wt/eta.expected" > $testroot/wt/eta
> > +
> > +	(cd "$testroot/wt" && got diff -a > patch && got revert eta) >/dev/null
> > +	ret=$?
> > +	if [ $ret -ne 0 ]; then
> > +		echo "got diff or revert failed unexpectedly" >&2
> > +		test_done "$testroot" $ret
> > +		return 1
> > +	fi
> > +
> > +	(cd "$testroot/wt" && got patch <patch) > $testroot/stdout
> > +	ret=$?
> > +	if [ $ret -ne 0 ]; then
> > +		echo "got patch failed unexpectedly" >&2
> > +		test_done "$testroot" $ret
> > +		return 1
> > +	fi
> > +
> > +	echo 'G  eta' > $testroot/stdout.expected
> > +	if ! cmp -s "$testroot/stdout.expected" "$testroot/stdout"; then
> > +		diff -u "$testroot/stdout.expected" "$testroot/stdout"
> > +		test_done "$testroot" 1
> > +		return 1
> > +	fi
> > +
> > +	if ! cmp -s "$testroot/wt/eta.expected" "$testroot/wt/eta"; then
> > +		diff -u "$testroot/wt/eta.expected" "$testroot/wt/eta"
> > +		test_done "$testroot" 1
> > +		return 1
> > +	fi
> > +
> > +	test_done "$testroot" 0
> > +}
> > +
> >  test_parseargs "$@"
> >  run_test test_patch_basic
> >  run_test test_patch_dont_apply
> > @@ -1986,3 +2037,4 @@ run_test test_patch_remove_binary_file
> >  run_test test_patch_newfile_xbit_git_diff
> >  run_test test_patch_umask
> >  run_test test_patch_remove_binary_file
> > +run_test test_patch_binary
> 
> 

-- 
Mark Jamsek <fnc.bsdbox.org|got.bsdbox.org>
GPG: F2FF 13DE 6A06 C471 CA80  E6E2 2930 DC66 86EE CF68