"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: teach got patch how to strip
To:
Omar Polo <op@omarpolo.com>
Cc:
gameoftrees@openbsd.org
Date:
Mon, 4 Apr 2022 10:25:18 +0200

Download raw body.

Thread
On Sun, Apr 03, 2022 at 12:33:36PM +0200, Omar Polo wrote:
> as per subject, this adds a `-p strip' flag that works mostly as
> expected.
> 
> There's a bit of churn because I'd like to move some bits from the
> libexec helper to the main process: until now it was got-read-patch to
> notice the `diff --git' marker and automatically strip "a/" and "b/"s
> from paths, now it's done in the main process (so -p can be respected.)
> 
> If no -p is specified `got patch' will still "intelligently" drop a/ and
> b/ from git-style diffs.  I'm not sure if/how document it, patch(1) does
> the same but it's not mentioned in the manpage.

I remember this change being introduced when more Git-style diffs against
OpenBSD src started showing up on mailing lists, and some developers got
fed up with patch's default behaviour when trying to apply such patches
without -p.

It won't hurt to document this behaviour in the got(1) man page, at least.

> diff refs/heads/main refs/heads/pstrip
> blob - a9f60f53d707e188b849ee505d8cedffab3a89b9
> blob + e8627ffb65c8a5df5d3542f970533bc3855e14a0
> --- got/got.1
> +++ got/got.1
> @@ -1285,7 +1285,7 @@ option)
>  .El
>  .El
>  .Tg pa
> -.It Cm patch Oo Fl n Oc Op Ar patchfile
> +.It Cm patch Oo Fl n Oc Oo Fl p Ar strip Oc Op Ar patchfile
>  .Dl Pq alias: Cm pa
>  Apply changes from
>  .Ar patchfile
> @@ -1354,6 +1354,10 @@ If the
>  .Ar patchfile
>  contains diffs that affect the same file multiple times, the results
>  displayed may be incorrect.
> +.It Fl p Ar strip
> +Specifies the number of leading path components to strip from paths
> +parsed from
> +.Ar patchfile .
>  .El
>  .Tg rv
>  .It Cm revert Oo Fl p Oc Oo Fl F Ar response-script Oc Oo Fl R Oc Ar path ...
> blob - f3608d970c5ae77fee89a4e9ca8fdb89b015c5f8
> blob + 36bd7fe005dea2816d23161fab0d7aed3b56e159
> --- got/got.c
> +++ got/got.c
> @@ -7222,15 +7222,22 @@ cmd_patch(int argc, char *argv[])
>  	const struct got_error *error = NULL, *close_error = NULL;
>  	struct got_worktree *worktree = NULL;
>  	struct got_repository *repo = NULL;
> +	const char *errstr;
>  	char *cwd = NULL;
> -	int ch, nop = 0;
> +	int ch, nop = 0, strip = -1;
>  	int patchfd;
>  
> -	while ((ch = getopt(argc, argv, "n")) != -1) {
> +	while ((ch = getopt(argc, argv, "np:")) != -1) {
>  		switch (ch) {
>  		case 'n':
>  			nop = 1;
>  			break;
> +		case 'p':
> +			strip = strtonum(optarg, 0, INT_MAX, &errstr);
> +			if (errstr != NULL)
> +				errx(1, "pathname strip count is %s: %s",
> +				     errstr, optarg);
> +			break;
>  		default:
>  			usage_patch();
>  			/* NOTREACHED */
> @@ -7278,8 +7285,8 @@ cmd_patch(int argc, char *argv[])
>  		err(1, "pledge");
>  #endif
>  
> -	error = got_patch(patchfd, worktree, repo, nop, &patch_progress,
> -	    NULL, check_cancelled, NULL);
> +	error = got_patch(patchfd, worktree, repo, nop, strip,
> +	    &patch_progress, NULL, check_cancelled, NULL);
>  
>  done:
>  	if (repo) {
> blob - 959a2129f32e5f561fc9ccfdc186d10507d45e00
> blob + 62c76b0eb2926a03364ef10e416183cdfa18f398
> --- include/got_patch.h
> +++ include/got_patch.h
> @@ -31,5 +31,5 @@ typedef const struct got_error *(*got_patch_progress_c
>   * The patch file descriptor *must* be seekable.
>   */
>  const struct got_error *
> -got_patch(int, struct got_worktree *, struct got_repository *, int,
> +got_patch(int, struct got_worktree *, struct got_repository *, int, int,
>      got_patch_progress_cb, void *, got_cancel_cb, void *);
> blob - fef20e3a85c35f0faa4743d896a34ac04f0a4397
> blob + 110fe049d86c1a33fb3b33e4fe74ffa8a3dbbfa8
> --- lib/got_lib_privsep.h
> +++ lib/got_lib_privsep.h
> @@ -525,6 +525,7 @@ struct got_imsg_remotes {
>   * Structure for GOT_IMSG_PATCH data.
>   */
>  struct got_imsg_patch {
> +	int	git;
>  	char	old[PATH_MAX];
>  	char	new[PATH_MAX];
>  };
> blob - 4bea692e5b2c362795981a69b86a1446aa52ec7c
> blob + 9f773ced0bf3edcd9a12327943967bb2baa86963
> --- lib/patch.c
> +++ lib/patch.c
> @@ -143,8 +143,20 @@ pushline(struct got_patch_hunk *h, const char *line)
>  	return NULL;
>  }
>  
> +const char *

This function should be marked 'static'.

> +strip_path(const char *path, int n)
> +{
> +	const char *c;
> +
> +	while (n > 0 && (c = strchr(path, '/')) != NULL) {
> +		path = c + 1;
> +		n--;
> +	}
> +	return path;
> +}
> +

The code above does not allow the counter 'n' to overrun the amount
of path components present in the input string.

What happens if 'n' is larger than the number of path components?

Should strip_path() try to detect the problem and error out?
I would expect that a too large 'n' is always a user error.

For example, given a patch for files:

  baz/bax/frob.c
  foo/bar.c

It looks like 'got patch -pN' with N >= 3 would try to patch:

  frob.c
  bar.c

Which effectively flattens the tree hierarchy and could even lead to
collisions in cases like this:

  baz/bax/README
  foo/README

Testing my example, I found that Larry patch(1) prompts for a file
path for frob.c unless I specify -p0.

However, if a frob.c file does happen to exist in the current directory:
  frob.c
  baz/bax/frob.c
  foo/bar.c
then Larry patch(1) -p3 will try to patch ./frob.c with the patch for
bax/bax/frob.c. This behaviour does not seem desirable to me. I don't
think we should copy it.

>  static const struct got_error *
> -recv_patch(struct imsgbuf *ibuf, int *done, struct got_patch *p)
> +recv_patch(struct imsgbuf *ibuf, int *done, struct got_patch *p, int strip)
>  {
>  	const struct got_error *err = NULL;
>  	struct imsg imsg;
> @@ -173,14 +185,36 @@ recv_patch(struct imsgbuf *ibuf, int *done, struct got
>  		goto done;
>  	}
>  	memcpy(&patch, imsg.data, sizeof(patch));
> -	if (*patch.old != '\0' && (p->old = strdup(patch.old)) == NULL) {
> -		err = got_error_from_errno("strdup");
> -		goto done;
> +
> +	/* automatically set strip=1 for git-style diffs */
> +	if (strip == -1 && patch.git &&
> +	    (*patch.old == '\0' || !strncmp(patch.old, "a/", 2)) &&
> +	    (*patch.new == '\0' || !strncmp(patch.new, "b/", 2)))
> +		strip = 1;
> +
> +	/* prefer the new name if not /dev/null for not git-style diffs */
> +	if (!patch.git && *patch.new != '\0' && *patch.old != '\0') {
> +		p->old = strdup(strip_path(patch.new, strip));
> +		if (p->old == NULL) {
> +			err = got_error_from_errno("strdup");
> +			goto done;
> +		}
> +	} else if (*patch.old != '\0') {
> +		p->old = strdup(strip_path(patch.old, strip));
> +		if (p->old == NULL) {
> +			err = got_error_from_errno("strdup");
> +			goto done;
> +		}
>  	}
> -	if (*patch.new != '\0' && (p->new = strdup(patch.new)) == NULL) {
> -		err = got_error_from_errno("strdup");
> -		goto done;
> +
> +	if (*patch.new != '\0') {
> +		p->new = strdup(strip_path(patch.new, strip));
> +		if (p->new == NULL) {
> +			err = got_error_from_errno("strdup");
> +			goto done;
> +		}
>  	}
> +
>  	if (p->old == NULL && p->new == NULL) {
>  		err = got_error(GOT_ERR_PATCH_MALFORMED);
>  		goto done;
> @@ -650,7 +684,7 @@ done:
>  
>  const struct got_error *
>  got_patch(int fd, struct got_worktree *worktree, struct got_repository *repo,
> -    int nop, got_patch_progress_cb progress_cb, void *progress_arg,
> +    int nop, int strip, got_patch_progress_cb progress_cb, void *progress_arg,
>      got_cancel_cb cancel_cb, void *cancel_arg)
>  {
>  	const struct got_error *err = NULL;
> @@ -706,7 +740,7 @@ got_patch(int fd, struct got_worktree *worktree, struc
>  		pa.progress_arg = progress_arg;
>  		pa.head = &p.head;
>  
> -		err = recv_patch(ibuf, &done, &p);
> +		err = recv_patch(ibuf, &done, &p, strip);
>  		if (err || done)
>  			break;
>  
> blob - 718736881402bf79f5c0307528c91464deea3105
> blob + b72f7f88829c3931ef203cba5d632edc4dc06195
> --- libexec/got-read-patch/got-read-patch.c
> +++ libexec/got-read-patch/got-read-patch.c
> @@ -66,18 +66,13 @@ send_patch(const char *oldname, const char *newname, i
>  
>  	memset(&p, 0, sizeof(p));
>  
> -	/*
> -	 * Prefer the new name if it's not /dev/null and it's not
> -	 * a git-style diff.
> -	 */
> -	if (!git && newname != NULL && oldname != NULL)
> -		strlcpy(p.old, newname, sizeof(p.old));
> -	else if (oldname != NULL)
> +	if (oldname != NULL)
>  		strlcpy(p.old, oldname, sizeof(p.old));
>  
>  	if (newname != NULL)
>  		strlcpy(p.new, newname, sizeof(p.new));
>  
> +	p.git = git;
>  	if (imsg_compose(&ibuf, GOT_IMSG_PATCH, 0, 0, -1,
>  	    &p, sizeof(p)) == -1)
>  		return got_error_from_errno("imsg_compose GOT_IMSG_PATCH");
> @@ -97,10 +92,9 @@ send_patch_done(void)
>  
>  /* based on fetchname from usr.bin/patch/util.c */
>  static const struct got_error *
> -filename(const char *at, char **name, int strip)
> +filename(const char *at, char **name)
>  {
> -	char	*fullname, *t;
> -	int	 l, tab;
> +	char	*tmp, *t;
>  
>  	*name = NULL;
>  	if (*at == '\0')
> @@ -113,24 +107,16 @@ filename(const char *at, char **name, int strip)
>  	if (!strncmp(at, _PATH_DEVNULL, sizeof(_PATH_DEVNULL) - 1))
>  		return NULL;
>  
> -	t = strdup(at);
> -	if (t == NULL)
> +	tmp = strdup(at);
> +	if (tmp == NULL)
>  		return got_error_from_errno("strdup");
> -	*name = fullname = t;
> -	tab = strchr(t, '\t') != NULL;
> +	if ((t = strchr(tmp, '\t')) != NULL)
> +		*t = '\0';
> +	if ((t = strchr(tmp, '\n')) != NULL)
> +		*t = '\0';
>  
> -	/* strip off path components and NUL-terminate */
> -	for (l = strip;
> -	    *t != '\0' && ((tab && *t != '\t') || !isspace((unsigned char)*t));
> -	    ++t) {
> -		if (t[0] == '/' && t[1] != '/' && t[1] != '\0')
> -			if (--l >= 0)
> -				*name = t + 1;
> -	}
> -	*t = '\0';
> -
> -	*name = strdup(*name);
> -	free(fullname);
> +	*name = strdup(tmp);
> +	free(tmp);
>  	if (*name == NULL)
>  		return got_error_from_errno("strdup");
>  	return NULL;
> @@ -152,18 +138,12 @@ find_patch(FILE *fp)
>  		 * we don't have to follow POSIX.
>  		 */
>  
> -		if (git && !strncmp(line, "--- a/", 6)) {
> +		if (!strncmp(line, "--- ", 4)) {
>  			free(old);
> -			err = filename(line+6, &old, 0);
> -		} else if (!strncmp(line, "--- ", 4)) {
> -			free(old);
> -			err = filename(line+4, &old, 0);
> -		} else if (git && !strncmp(line, "+++ b/", 6)) {
> -			free(new);
> -			err = filename(line+6, &new, 0);
> +			err = filename(line+4, &old);
>  		} else if (!strncmp(line, "+++ ", 4)) {
>  			free(new);
> -			err = filename(line+4, &new, 0);
> +			err = filename(line+4, &new);
>  		} else if (!strncmp(line, "diff --git a/", 13))
>  			git = 1;
>  
> blob - 654d85e31ae466ed47907c34cc051be70a8351e4
> blob + 51c9bdf7f7042a9ae2bbabc745382dfad58cc07f
> --- regress/cmdline/patch.sh
> +++ regress/cmdline/patch.sh
> @@ -1211,6 +1211,40 @@ EOF
>  	test_done $testroot $ret
>  }
>  
> +test_patch_strip() {
> +	local testroot=`test_init patch_strip`
> +
> +	got checkout $testroot/repo $testroot/wt > /dev/null
> +	ret=$?
> +	if [ $ret -ne 0 ]; then
> +		test_done $testroot $ret
> +		return 1
> +	fi
> +
> +	cat <<EOF > $testroot/wt/patch
> +--- foo/bar/alpha.orig
> ++++ foo/bar/alpha
> +@@ -1 +1 @@
> +-alpha
> ++ALPHA
> +EOF
> +
> +	(cd $testroot/wt && got patch -p 99 patch) > $testroot/stdout
> +	ret=$?
> +	if [ $ret -ne 0 ]; then
> +		test_done $testroot $ret
> +		return 1
> +	fi
> +
> +	echo "M  alpha" >> $testroot/stdout.expected
> +	cmp -s $testroot/stdout.expected $testroot/stdout
> +	ret=$?
> +	if [ $ret -ne 0 ]; then
> +		diff -u $testroot/stdout.expected $testroot/stdout
> +	fi
> +	test_done $testroot $ret
> +}
> +
>  test_parseargs "$@"
>  run_test test_patch_simple_add_file
>  run_test test_patch_simple_rm_file
> @@ -1231,3 +1265,4 @@ run_test test_patch_create_dirs
>  run_test test_patch_with_offset
>  run_test test_patch_prefer_new_path
>  run_test test_patch_no_newline
> +run_test test_patch_strip
> 
>