From: Omar Polo Subject: got patch vs binary files To: gameoftrees@openbsd.org Date: Fri, 12 May 2023 21:06:15 +0200 back in february naddy reported that got patch choked on the perl update. https://marc.gameoftrees.org/thread/1675780522.55927_0.html The problem was two-fold: 1. the "Result too large" for very long lines 2. NUL bytes in the middle of the lines Here I'm adressing the most interesting one, the sencond. I'll handle the very long lines case next. got patch assumes lines can be encoded as strings, so embedded NUL bytes effectively truncates the line. diff below changes this assumption, by considering lines just binary strings LF-delimited. While doing this, I took the chance to ship some other minor changes. The type of the line (i.e. '+', '-' or ' ' aka context) now is a separate field instead of being encoded in the first byte of each line, this simplifies the various +1/-1 that were needed otherwise. I've renamed linecmp into lineq and made it boolean: it's simpler to write, to use and we'll never be interested in sorting lines anyway. One quirk is that I'm still allocating the space for a NUL terminator. This is not required, and in fact makes pushline() maybe not obvious, but it's useful for debugging so it's worth it I think. The diff is not committable as-is however. I had to disable a check in diff3 to allow got_merge_diff3 to proceed on binary files. I'm not sure what we want to do in for this, but I believe that even without the will to support "binary patches" most of the diff still makes sense, as it propagate lengths rather than recomputing strlen() here and there and avoids some extra useless allocations in got-read-patch. diff /home/op/w/got commit - ba0bed23e515b14cd3fe877c6847785c8c29971e path + /home/op/w/got blob - 5cd79150cc6476f78c90b997d76b00eaa99fc341 file + lib/diff3.c --- lib/diff3.c +++ lib/diff3.c @@ -231,6 +231,7 @@ diffreg(BUF **d, const char *path1, const char *path2, if (err) goto done; +#if 0 if (diffreg_result) { struct diff_result *diff_result = diffreg_result->result; int atomizer_flags = (diff_result->left->atomizer_flags | @@ -240,6 +241,7 @@ diffreg(BUF **d, const char *path1, const char *path2, goto done; } } +#endif err = got_diffreg_output(NULL, NULL, diffreg_result, 1, 1, "", "", GOT_DIFF_OUTPUT_PLAIN, 0, outfile); blob - 9f524a6bfc0deae40498090e43fe7f9238eb4e64 file + lib/patch.c --- lib/patch.c +++ lib/patch.c @@ -59,6 +59,12 @@ struct got_patch_hunk { #define MIN(a, b) ((a) < (b) ? (a) : (b)) #endif +struct got_patch_line { + char mode; + char *line; + size_t len; +}; + struct got_patch_hunk { STAILQ_ENTRY(got_patch_hunk) entries; const struct got_error *err; @@ -72,7 +78,7 @@ struct got_patch_hunk { int new_lines; size_t len; size_t cap; - char **lines; + struct got_patch_line *lines; }; STAILQ_HEAD(got_patch_hunk_head, got_patch_hunk); @@ -128,7 +134,7 @@ patch_free(struct got_patch *p) STAILQ_REMOVE_HEAD(&p->head, entries); for (i = 0; i < h->len; ++i) - free(h->lines[i]); + free(h->lines[i].line); free(h->lines); free(h); } @@ -141,7 +147,7 @@ pushline(struct got_patch_hunk *h, const char *line) } static const struct got_error * -pushline(struct got_patch_hunk *h, const char *line) +pushline(struct got_patch_hunk *h, const char *line, size_t len) { void *t; size_t newcap; @@ -157,10 +163,14 @@ pushline(struct got_patch_hunk *h, const char *line) h->cap = newcap; } - if ((t = strdup(line)) == NULL) - return got_error_from_errno("strdup"); + if ((t = malloc(len - 1)) == NULL) + return got_error_from_errno("malloc"); + memcpy(t, line + 1, len - 1); /* skip the line type */ - h->lines[h->len++] = t; + h->lines[h->len].mode = *line; + h->lines[h->len].line = t; + h->lines[h->len].len = len - 2; /* skip line type and trailing NUL */ + h->len++; return NULL; } @@ -301,7 +311,7 @@ recv_patch(struct imsgbuf *ibuf, int *done, struct got } if (*t != '\\') - err = pushline(h, t); + err = pushline(h, t, datalen); else if (lastmode == '-') h->old_nonl = 1; else if (lastmode == '+') @@ -351,10 +361,10 @@ reverse_patch(struct got_patch *p) h->new_nonl = tmp; for (i = 0; i < h->len; ++i) { - if (*h->lines[i] == '+') - *h->lines[i] = '-'; - else if (*h->lines[i] == '-') - *h->lines[i] = '+'; + if (h->lines[i].mode == '+') + h->lines[i].mode = '-'; + else if (h->lines[i].mode == '-') + h->lines[i].mode = '+'; } } } @@ -392,14 +402,15 @@ static int linecmp(const char *, const char *, int *); return NULL; } -static int linecmp(const char *, const char *, int *); +static int lineq(struct got_patch_line *, const char *, size_t, int *); static const struct got_error * locate_hunk(FILE *orig, struct got_patch_hunk *h, off_t *pos, int *lineno) { const struct got_error *err = NULL; + struct got_patch_line *l = &h->lines[0]; char *line = NULL; - char mode = *h->lines[0]; + char mode = l->mode; size_t linesize = 0; ssize_t linelen; off_t match = -1; @@ -414,12 +425,10 @@ locate_hunk(FILE *orig, struct got_patch_hunk *h, off_ err = got_error(GOT_ERR_HUNK_FAILED); break; } - if (line[linelen - 1] == '\n') - line[linelen - 1] = '\0'; (*lineno)++; - if ((mode == ' ' && !linecmp(h->lines[0] + 1, line, &mangled)) || - (mode == '-' && !linecmp(h->lines[0] + 1, line, &mangled)) || + if ((mode == ' ' && lineq(l, line, linelen, &mangled)) || + (mode == '-' && lineq(l, line, linelen, &mangled)) || (mode == '+' && *lineno == h->old_from)) { match = ftello(orig); if (match == -1) { @@ -449,27 +458,34 @@ linecmp(const char *a, const char *b, int *mangled) } static int -linecmp(const char *a, const char *b, int *mangled) +lineq(struct got_patch_line *l, const char *b, size_t len, int *mangled) { - int c; + char *a = l->line; + size_t i, j; + if (b[len - 1] == '\n') + len--; + *mangled = 0; - c = strcmp(a, b); - if (c == 0) - return c; + if (l->len == len && !memcmp(a, b, len)) + return 1; *mangled = 1; + + i = j = 0; for (;;) { - while (*a == '\t' || *a == ' ' || *a == '\f') - a++; - while (*b == '\t' || *b == ' ' || *b == '\f') - b++; - if (*a == '\0' || *a != *b) + while (i < l->len && + (a[i] == '\t' || a[i] == ' ' || a[i] == '\f')) + i++; + while (j < len && + (b[j] == '\t' || b[j] == ' ' || b[j] == '\f')) + j++; + if (i == l->len || j == len || a[i] != b[j]) break; - a++, b++; + i++, j++; } - return *a - *b; + return (i == l->len && j == len); } static const struct got_error * @@ -482,7 +498,7 @@ test_hunk(FILE *orig, struct got_patch_hunk *h) int mangled; for (i = 0; i < h->len; ++i) { - switch (*h->lines[i]) { + switch (h->lines[i].mode) { case '+': continue; case ' ': @@ -496,9 +512,7 @@ test_hunk(FILE *orig, struct got_patch_hunk *h) GOT_ERR_HUNK_FAILED); goto done; } - if (line[linelen - 1] == '\n') - line[linelen - 1] = '\0'; - if (linecmp(h->lines[i] + 1, line, &mangled)) { + if (!lineq(&h->lines[i], line, linelen, &mangled)) { err = got_error(GOT_ERR_HUNK_FAILED); goto done; } @@ -522,13 +536,14 @@ apply_hunk(FILE *orig, FILE *tmp, struct got_patch_hun size_t linesize = 0, i, new = 0; char *line = NULL; char mode; + size_t l; ssize_t linelen; if (orig != NULL && fseeko(orig, from, SEEK_SET) == -1) return got_error_from_errno("fseeko"); for (i = 0; i < h->len; ++i) { - switch (mode = *h->lines[i]) { + switch (mode = h->lines[i].mode) { case '-': case ' ': (*lineno)++; @@ -541,18 +556,24 @@ apply_hunk(FILE *orig, FILE *tmp, struct got_patch_hun if (line[linelen - 1] == '\n') line[linelen - 1] = '\0'; t = line; - } else - t = h->lines[i] + 1; + l = linelen - 1; + } else { + t = h->lines[i].line; + l = h->lines[i].len; + } if (mode == '-') continue; - if (fprintf(tmp, "%s\n", t) < 0) { + if (fwrite(t, 1, l, tmp) != l || + fputc('\n', tmp) == EOF) { err = got_error_from_errno("fprintf"); goto done; } break; case '+': new++; - if (fprintf(tmp, "%s", h->lines[i] + 1) < 0) { + t = h->lines[i].line; + l = h->lines[i].len; + if (fwrite(t, 1, l, tmp) != l) { err = got_error_from_errno("fprintf"); goto done; } blob - a5fe8c2470856e960745214cc536ddbc804c7ef0 file + libexec/got-read-patch/got-read-patch.c --- libexec/got-read-patch/got-read-patch.c +++ libexec/got-read-patch/got-read-patch.c @@ -445,23 +445,29 @@ send_line(const char *line) } static const struct got_error * -send_line(const char *line) +send_line(const char *line, size_t len) { static const struct got_error *err = NULL; - char *p = NULL; + struct iovec iov[2]; + int iovcnt = 0; + memset(&iov, 0, sizeof(iov)); + if (*line != '+' && *line != '-' && *line != ' ' && *line != '\\') { - if (asprintf(&p, " %s", line) == -1) - return got_error_from_errno("asprintf"); - line = p; + iov[iovcnt].iov_base = (void *)" "; + iov[iovcnt].iov_len = 1; + iovcnt++; } - if (imsg_compose(&ibuf, GOT_IMSG_PATCH_LINE, 0, 0, -1, - line, strlen(line) + 1) == -1) + iov[iovcnt].iov_base = (void *)line; + iov[iovcnt].iov_len = len; + iovcnt++; + + if (imsg_composev(&ibuf, GOT_IMSG_PATCH_LINE, 0, 0, -1, + iov, iovcnt) == -1) err = got_error_from_errno( "imsg_compose GOT_IMSG_PATCH_LINE"); - free(p); return err; } @@ -478,7 +484,7 @@ peek_special_line(FILE *fp) } if (ch == '\\') { - err = send_line("\\"); + err = send_line("\\", 2); if (err) return err; } @@ -568,7 +574,7 @@ parse_hunk(FILE *fp, int *done) goto done; } - err = send_line(line); + err = send_line(line, linelen); if (err) goto done; blob - 17db471e5c3aa81e0eb9233b406861cdccd08fcd file + regress/cmdline/patch.sh --- regress/cmdline/patch.sh +++ regress/cmdline/patch.sh @@ -1956,6 +1956,57 @@ test_parseargs "$@" test_done $testroot 0 } +test_patch_binary() { + local testroot=`test_init patch_binary` + + if ! got checkout "$testroot/repo" "$testroot/wt" >/dev/null; then + test_done "$testroot" $ret + return 1 + fi + + printf 'e\0ta' > $testroot/wt/eta + (cd "$testroot/wt" && got add eta && got commit -m '+eta') >/dev/null + ret=$? + if [ $ret -ne 0 ]; then + echo "got add failed unexpectedly" >&2 + test_done "$testroot" $ret + return 1 + fi + + printf 'et\0a' | tee "$testroot/wt/eta.expected" > $testroot/wt/eta + + (cd "$testroot/wt" && got diff -a > patch && got revert eta) >/dev/null + ret=$? + if [ $ret -ne 0 ]; then + echo "got diff or revert failed unexpectedly" >&2 + test_done "$testroot" $ret + return 1 + fi + + (cd "$testroot/wt" && got patch $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + echo "got patch failed unexpectedly" >&2 + test_done "$testroot" $ret + return 1 + fi + + echo 'G eta' > $testroot/stdout.expected + if ! cmp -s "$testroot/stdout.expected" "$testroot/stdout"; then + diff -u "$testroot/stdout.expected" "$testroot/stdout" + test_done "$testroot" 1 + return 1 + fi + + if ! cmp -s "$testroot/wt/eta.expected" "$testroot/wt/eta"; then + diff -u "$testroot/wt/eta.expected" "$testroot/wt/eta" + test_done "$testroot" 1 + return 1 + fi + + test_done "$testroot" 0 +} + test_parseargs "$@" run_test test_patch_basic run_test test_patch_dont_apply @@ -1986,3 +2037,4 @@ run_test test_patch_remove_binary_file run_test test_patch_newfile_xbit_git_diff run_test test_patch_umask run_test test_patch_remove_binary_file +run_test test_patch_binary