"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: teach got patch how to strip
To:
Stefan Sperling <stsp@stsp.name>
Cc:
gameoftrees@openbsd.org
Date:
Mon, 04 Apr 2022 21:53:17 +0200

Download raw body.

Thread
Stefan Sperling <stsp@stsp.name> wrote:
> On Sun, Apr 03, 2022 at 12:33:36PM +0200, Omar Polo wrote:
> > as per subject, this adds a `-p strip' flag that works mostly as
> > expected.
> > 
> > There's a bit of churn because I'd like to move some bits from the
> > libexec helper to the main process: until now it was got-read-patch to
> > notice the `diff --git' marker and automatically strip "a/" and "b/"s
> > from paths, now it's done in the main process (so -p can be respected.)
> > 
> > If no -p is specified `got patch' will still "intelligently" drop a/ and
> > b/ from git-style diffs.  I'm not sure if/how document it, patch(1) does
> > the same but it's not mentioned in the manpage.
> 
> I remember this change being introduced when more Git-style diffs against
> OpenBSD src started showing up on mailing lists, and some developers got
> fed up with patch's default behaviour when trying to apply such patches
> without -p.
> 
> It won't hurt to document this behaviour in the got(1) man page, at least.

yeah, right.  I'm doing an attempt to document it in the -p description.

> > [...]
> > +const char *
> 
> This function should be marked 'static'.

wops

> > +strip_path(const char *path, int n)
> > +{
> > +	const char *c;
> > +
> > +	while (n > 0 && (c = strchr(path, '/')) != NULL) {
> > +		path = c + 1;
> > +		n--;
> > +	}
> > +	return path;
> > +}
> > +
> 
> The code above does not allow the counter 'n' to overrun the amount
> of path components present in the input string.
> 
> What happens if 'n' is larger than the number of path components?
> 
> Should strip_path() try to detect the problem and error out?
> I would expect that a too large 'n' is always a user error.
> 
> For example, given a patch for files:
> 
>   baz/bax/frob.c
>   foo/bar.c
> 
> It looks like 'got patch -pN' with N >= 3 would try to patch:
> 
>   frob.c
>   bar.c
> 
> Which effectively flattens the tree hierarchy and could even lead to
> collisions in cases like this:
> 
>   baz/bax/README
>   foo/README
> 
> Testing my example, I found that Larry patch(1) prompts for a file
> path for frob.c unless I specify -p0.
> 
> However, if a frob.c file does happen to exist in the current directory:
>   frob.c
>   baz/bax/frob.c
>   foo/bar.c
> then Larry patch(1) -p3 will try to patch ./frob.c with the patch for
> bax/bax/frob.c. This behaviour does not seem desirable to me. I don't
> think we should copy it.

i haven't thought about it before, but i agree with you.

in the updated diff i've moved the strip function to path.c and made it
errors when trying to strip more components than what are present.

diff refs/heads/main refs/heads/pstrip
blob - a9f60f53d707e188b849ee505d8cedffab3a89b9
blob + 3f3f4e07bca1f7023b974c2d2a95a5bf759784c4
--- got/got.1
+++ got/got.1
@@ -1285,7 +1285,7 @@ option)
 .El
 .El
 .Tg pa
-.It Cm patch Oo Fl n Oc Op Ar patchfile
+.It Cm patch Oo Fl n Oc Oo Fl p Ar strip Oc Op Ar patchfile
 .Dl Pq alias: Cm pa
 Apply changes from
 .Ar patchfile
@@ -1354,6 +1354,20 @@ If the
 .Ar patchfile
 contains diffs that affect the same file multiple times, the results
 displayed may be incorrect.
+.It Fl p Ar strip
+Specify the number of leading path components to strip from paths
+parsed from
+.Ar patchfile .
+If not given,
+recognize
+.Xr git 1
+style diffs and automatically drops the
+.Sq a/
+and
+.Sq b/
+prefixes.
+It's an error to attempt to strip more component than present in
+.Ar patchfile .
 .El
 .Tg rv
 .It Cm revert Oo Fl p Oc Oo Fl F Ar response-script Oc Oo Fl R Oc Ar path ...
blob - f3608d970c5ae77fee89a4e9ca8fdb89b015c5f8
blob + 36bd7fe005dea2816d23161fab0d7aed3b56e159
--- got/got.c
+++ got/got.c
@@ -7222,15 +7222,22 @@ cmd_patch(int argc, char *argv[])
 	const struct got_error *error = NULL, *close_error = NULL;
 	struct got_worktree *worktree = NULL;
 	struct got_repository *repo = NULL;
+	const char *errstr;
 	char *cwd = NULL;
-	int ch, nop = 0;
+	int ch, nop = 0, strip = -1;
 	int patchfd;
 
-	while ((ch = getopt(argc, argv, "n")) != -1) {
+	while ((ch = getopt(argc, argv, "np:")) != -1) {
 		switch (ch) {
 		case 'n':
 			nop = 1;
 			break;
+		case 'p':
+			strip = strtonum(optarg, 0, INT_MAX, &errstr);
+			if (errstr != NULL)
+				errx(1, "pathname strip count is %s: %s",
+				     errstr, optarg);
+			break;
 		default:
 			usage_patch();
 			/* NOTREACHED */
@@ -7278,8 +7285,8 @@ cmd_patch(int argc, char *argv[])
 		err(1, "pledge");
 #endif
 
-	error = got_patch(patchfd, worktree, repo, nop, &patch_progress,
-	    NULL, check_cancelled, NULL);
+	error = got_patch(patchfd, worktree, repo, nop, strip,
+	    &patch_progress, NULL, check_cancelled, NULL);
 
 done:
 	if (repo) {
blob - 959a2129f32e5f561fc9ccfdc186d10507d45e00
blob + 62c76b0eb2926a03364ef10e416183cdfa18f398
--- include/got_patch.h
+++ include/got_patch.h
@@ -31,5 +31,5 @@ typedef const struct got_error *(*got_patch_progress_c
  * The patch file descriptor *must* be seekable.
  */
 const struct got_error *
-got_patch(int, struct got_worktree *, struct got_repository *, int,
+got_patch(int, struct got_worktree *, struct got_repository *, int, int,
     got_patch_progress_cb, void *, got_cancel_cb, void *);
blob - f2d6ba2e472496846c8519d36002ed09491009ed
blob + 2453c214e0eca525ba6f32c7d6c09c944f279d31
--- include/got_path.h
+++ include/got_path.h
@@ -41,6 +41,12 @@ const struct got_error *got_canonpath(const char *, ch
 const struct got_error *got_path_skip_common_ancestor(char **, const char *,
     const char *);
 
+/*
+ * Remove leading components from path.  It's an error to strip more
+ * component than present.  The result is allocated dynamically.
+ */
+const struct got_error *got_path_strip(const char *, int, char **);
+
 /* Determine whether a path points to the root directory "/" . */
 int got_path_is_root_dir(const char *);
 
blob - fef20e3a85c35f0faa4743d896a34ac04f0a4397
blob + 110fe049d86c1a33fb3b33e4fe74ffa8a3dbbfa8
--- lib/got_lib_privsep.h
+++ lib/got_lib_privsep.h
@@ -525,6 +525,7 @@ struct got_imsg_remotes {
  * Structure for GOT_IMSG_PATCH data.
  */
 struct got_imsg_patch {
+	int	git;
 	char	old[PATH_MAX];
 	char	new[PATH_MAX];
 };
blob - 4bea692e5b2c362795981a69b86a1446aa52ec7c
blob + 5852a65ea078b1a5ac025ad86f59e91034badc62
--- lib/patch.c
+++ lib/patch.c
@@ -144,7 +144,7 @@ pushline(struct got_patch_hunk *h, const char *line)
 }
 
 static const struct got_error *
-recv_patch(struct imsgbuf *ibuf, int *done, struct got_patch *p)
+recv_patch(struct imsgbuf *ibuf, int *done, struct got_patch *p, int strip)
 {
 	const struct got_error *err = NULL;
 	struct imsg imsg;
@@ -173,14 +173,30 @@ recv_patch(struct imsgbuf *ibuf, int *done, struct got
 		goto done;
 	}
 	memcpy(&patch, imsg.data, sizeof(patch));
-	if (*patch.old != '\0' && (p->old = strdup(patch.old)) == NULL) {
-		err = got_error_from_errno("strdup");
-		goto done;
+
+	/* automatically set strip=1 for git-style diffs */
+	if (strip == -1 && patch.git &&
+	    (*patch.old == '\0' || !strncmp(patch.old, "a/", 2)) &&
+	    (*patch.new == '\0' || !strncmp(patch.new, "b/", 2)))
+		strip = 1;
+
+	/* prefer the new name if not /dev/null for not git-style diffs */
+	if (!patch.git && *patch.new != '\0' && *patch.old != '\0') {
+		err = got_path_strip(patch.new, strip, &p->old);
+		if (err)
+			goto done;
+	} else if (*patch.old != '\0') {
+		err = got_path_strip(patch.old, strip, &p->old);
+		if (err)
+			goto done;
 	}
-	if (*patch.new != '\0' && (p->new = strdup(patch.new)) == NULL) {
-		err = got_error_from_errno("strdup");
-		goto done;
+
+	if (*patch.new != '\0') {
+		err = got_path_strip(patch.new, strip, &p->new);
+		if (err)
+			goto done;
 	}
+
 	if (p->old == NULL && p->new == NULL) {
 		err = got_error(GOT_ERR_PATCH_MALFORMED);
 		goto done;
@@ -650,7 +666,7 @@ done:
 
 const struct got_error *
 got_patch(int fd, struct got_worktree *worktree, struct got_repository *repo,
-    int nop, got_patch_progress_cb progress_cb, void *progress_arg,
+    int nop, int strip, got_patch_progress_cb progress_cb, void *progress_arg,
     got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
@@ -706,7 +722,7 @@ got_patch(int fd, struct got_worktree *worktree, struc
 		pa.progress_arg = progress_arg;
 		pa.head = &p.head;
 
-		err = recv_patch(ibuf, &done, &p);
+		err = recv_patch(ibuf, &done, &p, strip);
 		if (err || done)
 			break;
 
blob - 0e05a1100c321abafef637ae199c587f70a6e018
blob + 51eb3d8d6a3c1c9e440695f0421c3e16571d509e
--- lib/path.c
+++ lib/path.c
@@ -117,6 +117,28 @@ got_path_skip_common_ancestor(char **child, const char
 	return NULL;
 }
 
+const struct got_error *
+got_path_strip(const char *path, int n, char **out)
+{
+	const char *p, *c;
+
+	p = path;
+	*out = NULL;
+
+	while (n > 0 && (c = strchr(path, '/')) != NULL) {
+		path = c + 1;
+		n--;
+	}
+
+	if (n > 0)
+		return got_error_fmt(GOT_ERR_BAD_PATH,
+		    "can't strip %d components from %s", n, p);
+
+	if ((*out = strdup(path)) == NULL)
+		return got_error_from_errno("strdup");
+	return NULL;
+}
+
 int
 got_path_is_root_dir(const char *path)
 {
blob - 718736881402bf79f5c0307528c91464deea3105
blob + b72f7f88829c3931ef203cba5d632edc4dc06195
--- libexec/got-read-patch/got-read-patch.c
+++ libexec/got-read-patch/got-read-patch.c
@@ -66,18 +66,13 @@ send_patch(const char *oldname, const char *newname, i
 
 	memset(&p, 0, sizeof(p));
 
-	/*
-	 * Prefer the new name if it's not /dev/null and it's not
-	 * a git-style diff.
-	 */
-	if (!git && newname != NULL && oldname != NULL)
-		strlcpy(p.old, newname, sizeof(p.old));
-	else if (oldname != NULL)
+	if (oldname != NULL)
 		strlcpy(p.old, oldname, sizeof(p.old));
 
 	if (newname != NULL)
 		strlcpy(p.new, newname, sizeof(p.new));
 
+	p.git = git;
 	if (imsg_compose(&ibuf, GOT_IMSG_PATCH, 0, 0, -1,
 	    &p, sizeof(p)) == -1)
 		return got_error_from_errno("imsg_compose GOT_IMSG_PATCH");
@@ -97,10 +92,9 @@ send_patch_done(void)
 
 /* based on fetchname from usr.bin/patch/util.c */
 static const struct got_error *
-filename(const char *at, char **name, int strip)
+filename(const char *at, char **name)
 {
-	char	*fullname, *t;
-	int	 l, tab;
+	char	*tmp, *t;
 
 	*name = NULL;
 	if (*at == '\0')
@@ -113,24 +107,16 @@ filename(const char *at, char **name, int strip)
 	if (!strncmp(at, _PATH_DEVNULL, sizeof(_PATH_DEVNULL) - 1))
 		return NULL;
 
-	t = strdup(at);
-	if (t == NULL)
+	tmp = strdup(at);
+	if (tmp == NULL)
 		return got_error_from_errno("strdup");
-	*name = fullname = t;
-	tab = strchr(t, '\t') != NULL;
+	if ((t = strchr(tmp, '\t')) != NULL)
+		*t = '\0';
+	if ((t = strchr(tmp, '\n')) != NULL)
+		*t = '\0';
 
-	/* strip off path components and NUL-terminate */
-	for (l = strip;
-	    *t != '\0' && ((tab && *t != '\t') || !isspace((unsigned char)*t));
-	    ++t) {
-		if (t[0] == '/' && t[1] != '/' && t[1] != '\0')
-			if (--l >= 0)
-				*name = t + 1;
-	}
-	*t = '\0';
-
-	*name = strdup(*name);
-	free(fullname);
+	*name = strdup(tmp);
+	free(tmp);
 	if (*name == NULL)
 		return got_error_from_errno("strdup");
 	return NULL;
@@ -152,18 +138,12 @@ find_patch(FILE *fp)
 		 * we don't have to follow POSIX.
 		 */
 
-		if (git && !strncmp(line, "--- a/", 6)) {
+		if (!strncmp(line, "--- ", 4)) {
 			free(old);
-			err = filename(line+6, &old, 0);
-		} else if (!strncmp(line, "--- ", 4)) {
-			free(old);
-			err = filename(line+4, &old, 0);
-		} else if (git && !strncmp(line, "+++ b/", 6)) {
-			free(new);
-			err = filename(line+6, &new, 0);
+			err = filename(line+4, &old);
 		} else if (!strncmp(line, "+++ ", 4)) {
 			free(new);
-			err = filename(line+4, &new, 0);
+			err = filename(line+4, &new);
 		} else if (!strncmp(line, "diff --git a/", 13))
 			git = 1;
 
blob - 654d85e31ae466ed47907c34cc051be70a8351e4
blob + b8c7e1512b611a7edb1e03ca3c25730eb9a323ee
--- regress/cmdline/patch.sh
+++ regress/cmdline/patch.sh
@@ -1211,6 +1211,40 @@ EOF
 	test_done $testroot $ret
 }
 
+test_patch_strip() {
+	local testroot=`test_init patch_strip`
+
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	cat <<EOF > $testroot/wt/patch
+--- foo/bar/alpha.orig
++++ foo/bar/alpha
+@@ -1 +1 @@
+-alpha
++ALPHA
+EOF
+
+	(cd $testroot/wt && got patch -p2 patch) > $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done $testroot $ret
+		return 1
+	fi
+
+	echo "M  alpha" >> $testroot/stdout.expected
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+	fi
+	test_done $testroot $ret
+}
+
 test_parseargs "$@"
 run_test test_patch_simple_add_file
 run_test test_patch_simple_rm_file
@@ -1231,3 +1265,4 @@ run_test test_patch_create_dirs
 run_test test_patch_with_offset
 run_test test_patch_prefer_new_path
 run_test test_patch_no_newline
+run_test test_patch_strip