From: Omar Polo Subject: Re: teach got patch how to strip To: Stefan Sperling Cc: gameoftrees@openbsd.org Date: Mon, 04 Apr 2022 21:53:17 +0200 Stefan Sperling wrote: > On Sun, Apr 03, 2022 at 12:33:36PM +0200, Omar Polo wrote: > > as per subject, this adds a `-p strip' flag that works mostly as > > expected. > > > > There's a bit of churn because I'd like to move some bits from the > > libexec helper to the main process: until now it was got-read-patch to > > notice the `diff --git' marker and automatically strip "a/" and "b/"s > > from paths, now it's done in the main process (so -p can be respected.) > > > > If no -p is specified `got patch' will still "intelligently" drop a/ and > > b/ from git-style diffs. I'm not sure if/how document it, patch(1) does > > the same but it's not mentioned in the manpage. > > I remember this change being introduced when more Git-style diffs against > OpenBSD src started showing up on mailing lists, and some developers got > fed up with patch's default behaviour when trying to apply such patches > without -p. > > It won't hurt to document this behaviour in the got(1) man page, at least. yeah, right. I'm doing an attempt to document it in the -p description. > > [...] > > +const char * > > This function should be marked 'static'. wops > > +strip_path(const char *path, int n) > > +{ > > + const char *c; > > + > > + while (n > 0 && (c = strchr(path, '/')) != NULL) { > > + path = c + 1; > > + n--; > > + } > > + return path; > > +} > > + > > The code above does not allow the counter 'n' to overrun the amount > of path components present in the input string. > > What happens if 'n' is larger than the number of path components? > > Should strip_path() try to detect the problem and error out? > I would expect that a too large 'n' is always a user error. > > For example, given a patch for files: > > baz/bax/frob.c > foo/bar.c > > It looks like 'got patch -pN' with N >= 3 would try to patch: > > frob.c > bar.c > > Which effectively flattens the tree hierarchy and could even lead to > collisions in cases like this: > > baz/bax/README > foo/README > > Testing my example, I found that Larry patch(1) prompts for a file > path for frob.c unless I specify -p0. > > However, if a frob.c file does happen to exist in the current directory: > frob.c > baz/bax/frob.c > foo/bar.c > then Larry patch(1) -p3 will try to patch ./frob.c with the patch for > bax/bax/frob.c. This behaviour does not seem desirable to me. I don't > think we should copy it. i haven't thought about it before, but i agree with you. in the updated diff i've moved the strip function to path.c and made it errors when trying to strip more components than what are present. diff refs/heads/main refs/heads/pstrip blob - a9f60f53d707e188b849ee505d8cedffab3a89b9 blob + 3f3f4e07bca1f7023b974c2d2a95a5bf759784c4 --- got/got.1 +++ got/got.1 @@ -1285,7 +1285,7 @@ option) .El .El .Tg pa -.It Cm patch Oo Fl n Oc Op Ar patchfile +.It Cm patch Oo Fl n Oc Oo Fl p Ar strip Oc Op Ar patchfile .Dl Pq alias: Cm pa Apply changes from .Ar patchfile @@ -1354,6 +1354,20 @@ If the .Ar patchfile contains diffs that affect the same file multiple times, the results displayed may be incorrect. +.It Fl p Ar strip +Specify the number of leading path components to strip from paths +parsed from +.Ar patchfile . +If not given, +recognize +.Xr git 1 +style diffs and automatically drops the +.Sq a/ +and +.Sq b/ +prefixes. +It's an error to attempt to strip more component than present in +.Ar patchfile . .El .Tg rv .It Cm revert Oo Fl p Oc Oo Fl F Ar response-script Oc Oo Fl R Oc Ar path ... blob - f3608d970c5ae77fee89a4e9ca8fdb89b015c5f8 blob + 36bd7fe005dea2816d23161fab0d7aed3b56e159 --- got/got.c +++ got/got.c @@ -7222,15 +7222,22 @@ cmd_patch(int argc, char *argv[]) const struct got_error *error = NULL, *close_error = NULL; struct got_worktree *worktree = NULL; struct got_repository *repo = NULL; + const char *errstr; char *cwd = NULL; - int ch, nop = 0; + int ch, nop = 0, strip = -1; int patchfd; - while ((ch = getopt(argc, argv, "n")) != -1) { + while ((ch = getopt(argc, argv, "np:")) != -1) { switch (ch) { case 'n': nop = 1; break; + case 'p': + strip = strtonum(optarg, 0, INT_MAX, &errstr); + if (errstr != NULL) + errx(1, "pathname strip count is %s: %s", + errstr, optarg); + break; default: usage_patch(); /* NOTREACHED */ @@ -7278,8 +7285,8 @@ cmd_patch(int argc, char *argv[]) err(1, "pledge"); #endif - error = got_patch(patchfd, worktree, repo, nop, &patch_progress, - NULL, check_cancelled, NULL); + error = got_patch(patchfd, worktree, repo, nop, strip, + &patch_progress, NULL, check_cancelled, NULL); done: if (repo) { blob - 959a2129f32e5f561fc9ccfdc186d10507d45e00 blob + 62c76b0eb2926a03364ef10e416183cdfa18f398 --- include/got_patch.h +++ include/got_patch.h @@ -31,5 +31,5 @@ typedef const struct got_error *(*got_patch_progress_c * The patch file descriptor *must* be seekable. */ const struct got_error * -got_patch(int, struct got_worktree *, struct got_repository *, int, +got_patch(int, struct got_worktree *, struct got_repository *, int, int, got_patch_progress_cb, void *, got_cancel_cb, void *); blob - f2d6ba2e472496846c8519d36002ed09491009ed blob + 2453c214e0eca525ba6f32c7d6c09c944f279d31 --- include/got_path.h +++ include/got_path.h @@ -41,6 +41,12 @@ const struct got_error *got_canonpath(const char *, ch const struct got_error *got_path_skip_common_ancestor(char **, const char *, const char *); +/* + * Remove leading components from path. It's an error to strip more + * component than present. The result is allocated dynamically. + */ +const struct got_error *got_path_strip(const char *, int, char **); + /* Determine whether a path points to the root directory "/" . */ int got_path_is_root_dir(const char *); blob - fef20e3a85c35f0faa4743d896a34ac04f0a4397 blob + 110fe049d86c1a33fb3b33e4fe74ffa8a3dbbfa8 --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -525,6 +525,7 @@ struct got_imsg_remotes { * Structure for GOT_IMSG_PATCH data. */ struct got_imsg_patch { + int git; char old[PATH_MAX]; char new[PATH_MAX]; }; blob - 4bea692e5b2c362795981a69b86a1446aa52ec7c blob + 5852a65ea078b1a5ac025ad86f59e91034badc62 --- lib/patch.c +++ lib/patch.c @@ -144,7 +144,7 @@ pushline(struct got_patch_hunk *h, const char *line) } static const struct got_error * -recv_patch(struct imsgbuf *ibuf, int *done, struct got_patch *p) +recv_patch(struct imsgbuf *ibuf, int *done, struct got_patch *p, int strip) { const struct got_error *err = NULL; struct imsg imsg; @@ -173,14 +173,30 @@ recv_patch(struct imsgbuf *ibuf, int *done, struct got goto done; } memcpy(&patch, imsg.data, sizeof(patch)); - if (*patch.old != '\0' && (p->old = strdup(patch.old)) == NULL) { - err = got_error_from_errno("strdup"); - goto done; + + /* automatically set strip=1 for git-style diffs */ + if (strip == -1 && patch.git && + (*patch.old == '\0' || !strncmp(patch.old, "a/", 2)) && + (*patch.new == '\0' || !strncmp(patch.new, "b/", 2))) + strip = 1; + + /* prefer the new name if not /dev/null for not git-style diffs */ + if (!patch.git && *patch.new != '\0' && *patch.old != '\0') { + err = got_path_strip(patch.new, strip, &p->old); + if (err) + goto done; + } else if (*patch.old != '\0') { + err = got_path_strip(patch.old, strip, &p->old); + if (err) + goto done; } - if (*patch.new != '\0' && (p->new = strdup(patch.new)) == NULL) { - err = got_error_from_errno("strdup"); - goto done; + + if (*patch.new != '\0') { + err = got_path_strip(patch.new, strip, &p->new); + if (err) + goto done; } + if (p->old == NULL && p->new == NULL) { err = got_error(GOT_ERR_PATCH_MALFORMED); goto done; @@ -650,7 +666,7 @@ done: const struct got_error * got_patch(int fd, struct got_worktree *worktree, struct got_repository *repo, - int nop, got_patch_progress_cb progress_cb, void *progress_arg, + int nop, int strip, got_patch_progress_cb progress_cb, void *progress_arg, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err = NULL; @@ -706,7 +722,7 @@ got_patch(int fd, struct got_worktree *worktree, struc pa.progress_arg = progress_arg; pa.head = &p.head; - err = recv_patch(ibuf, &done, &p); + err = recv_patch(ibuf, &done, &p, strip); if (err || done) break; blob - 0e05a1100c321abafef637ae199c587f70a6e018 blob + 51eb3d8d6a3c1c9e440695f0421c3e16571d509e --- lib/path.c +++ lib/path.c @@ -117,6 +117,28 @@ got_path_skip_common_ancestor(char **child, const char return NULL; } +const struct got_error * +got_path_strip(const char *path, int n, char **out) +{ + const char *p, *c; + + p = path; + *out = NULL; + + while (n > 0 && (c = strchr(path, '/')) != NULL) { + path = c + 1; + n--; + } + + if (n > 0) + return got_error_fmt(GOT_ERR_BAD_PATH, + "can't strip %d components from %s", n, p); + + if ((*out = strdup(path)) == NULL) + return got_error_from_errno("strdup"); + return NULL; +} + int got_path_is_root_dir(const char *path) { blob - 718736881402bf79f5c0307528c91464deea3105 blob + b72f7f88829c3931ef203cba5d632edc4dc06195 --- libexec/got-read-patch/got-read-patch.c +++ libexec/got-read-patch/got-read-patch.c @@ -66,18 +66,13 @@ send_patch(const char *oldname, const char *newname, i memset(&p, 0, sizeof(p)); - /* - * Prefer the new name if it's not /dev/null and it's not - * a git-style diff. - */ - if (!git && newname != NULL && oldname != NULL) - strlcpy(p.old, newname, sizeof(p.old)); - else if (oldname != NULL) + if (oldname != NULL) strlcpy(p.old, oldname, sizeof(p.old)); if (newname != NULL) strlcpy(p.new, newname, sizeof(p.new)); + p.git = git; if (imsg_compose(&ibuf, GOT_IMSG_PATCH, 0, 0, -1, &p, sizeof(p)) == -1) return got_error_from_errno("imsg_compose GOT_IMSG_PATCH"); @@ -97,10 +92,9 @@ send_patch_done(void) /* based on fetchname from usr.bin/patch/util.c */ static const struct got_error * -filename(const char *at, char **name, int strip) +filename(const char *at, char **name) { - char *fullname, *t; - int l, tab; + char *tmp, *t; *name = NULL; if (*at == '\0') @@ -113,24 +107,16 @@ filename(const char *at, char **name, int strip) if (!strncmp(at, _PATH_DEVNULL, sizeof(_PATH_DEVNULL) - 1)) return NULL; - t = strdup(at); - if (t == NULL) + tmp = strdup(at); + if (tmp == NULL) return got_error_from_errno("strdup"); - *name = fullname = t; - tab = strchr(t, '\t') != NULL; + if ((t = strchr(tmp, '\t')) != NULL) + *t = '\0'; + if ((t = strchr(tmp, '\n')) != NULL) + *t = '\0'; - /* strip off path components and NUL-terminate */ - for (l = strip; - *t != '\0' && ((tab && *t != '\t') || !isspace((unsigned char)*t)); - ++t) { - if (t[0] == '/' && t[1] != '/' && t[1] != '\0') - if (--l >= 0) - *name = t + 1; - } - *t = '\0'; - - *name = strdup(*name); - free(fullname); + *name = strdup(tmp); + free(tmp); if (*name == NULL) return got_error_from_errno("strdup"); return NULL; @@ -152,18 +138,12 @@ find_patch(FILE *fp) * we don't have to follow POSIX. */ - if (git && !strncmp(line, "--- a/", 6)) { + if (!strncmp(line, "--- ", 4)) { free(old); - err = filename(line+6, &old, 0); - } else if (!strncmp(line, "--- ", 4)) { - free(old); - err = filename(line+4, &old, 0); - } else if (git && !strncmp(line, "+++ b/", 6)) { - free(new); - err = filename(line+6, &new, 0); + err = filename(line+4, &old); } else if (!strncmp(line, "+++ ", 4)) { free(new); - err = filename(line+4, &new, 0); + err = filename(line+4, &new); } else if (!strncmp(line, "diff --git a/", 13)) git = 1; blob - 654d85e31ae466ed47907c34cc051be70a8351e4 blob + b8c7e1512b611a7edb1e03ca3c25730eb9a323ee --- regress/cmdline/patch.sh +++ regress/cmdline/patch.sh @@ -1211,6 +1211,40 @@ EOF test_done $testroot $ret } +test_patch_strip() { + local testroot=`test_init patch_strip` + + got checkout $testroot/repo $testroot/wt > /dev/null + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + cat < $testroot/wt/patch +--- foo/bar/alpha.orig ++++ foo/bar/alpha +@@ -1 +1 @@ +-alpha ++ALPHA +EOF + + (cd $testroot/wt && got patch -p2 patch) > $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + test_done $testroot $ret + return 1 + fi + + echo "M alpha" >> $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/stdout.expected $testroot/stdout + fi + test_done $testroot $ret +} + test_parseargs "$@" run_test test_patch_simple_add_file run_test test_patch_simple_rm_file @@ -1231,3 +1265,4 @@ run_test test_patch_create_dirs run_test test_patch_with_offset run_test test_patch_prefer_new_path run_test test_patch_no_newline +run_test test_patch_strip