Download raw body.
fix merge bug with dot on a line by itself
The merge code we inherited from OpenRCS is... gross :( In order to create the merged output the code generates a merge script which uses a pseudo ed-like scripting format. This script is essentially a list of patch instructions, describing lines which need to be added or changed in order to make the merged output appear in the target file of the merge. (Any lines deleted by the merge are eliminated by skipping them when reading input files. Hence the edit script only needs instructions for added lines and changed lines.) Whenever we need to copy a bunch of lines into the merged output we read data from an input file in BUFSIZ chunks and copy the data verbatim into the merge script. The entire block of BUFSIZ chunks is then terminated with a dot on a line by itself, as one would do in ed(1) input mode: . Later on, the merge script, which inlines this data, is read back from disk and split up into individual lines by rcs_splitlines(). If data we read in chunks from the input file also contained a dot on a line by itself, this dot will appear earlier than the dot which was intended to serve as the data block's delimiter. Any input lines after this dot end up being discarded during the merge. This silly bug is even mentioned in the man page of diff3(1). The man pages of rcsmerge(1) and merge(1) which use the same code are silent about the issue... HISTORY A diff3 command appeared in Version7 AT&T UNIX. BUGS The -e option cannot catch and change lines which have . as the first and only character on the line. The resulting script will fail on that line as . is an ed(1) editing command. The problem can be reproduced with the regression test I added in commit c1e86b1d5bb299f6bcc93a8024575a12da339286. Alternatively, get a clean work tree of Got and try to cherrypick the commit which added a test written by Omar, which uses an ed(1) script, into its parent commit: $ got up -c aaef19b43a34ed335737d733fbfea51056f2e2bb $ got cy 09209b8a13411e9d8464267c5e65c2c848455496 This is how I was reminded of this annoying issue. Most of Omar's test code had disappeared after I used 'got histedit' on a local branch which contained Omar's patch. Mixing meta data and content in the same data stream is a reliable way to screw things up. Ideally, we would rewrite diff3.c such that meta-data and file content are neatly separated, instead of mixing them up together. I have already started an attempt to do this, but it results in an uncomfortably large change. So I started looking for a smaller fix. Another way of "fixing" the issue without a major rewrite is to read data from input files in units of lines rather than BUFSIZ chunks. We can then prepend a symbol of our choice (other than a dot) before each line which represents content that is supposed to appear in the merged output. Now all content is "escaped" and will never result in a dot on a line by itself that can be mistaken as a delimiter. We strip our leading symbol before writing a line of content out. The patch below does this and makes the corresponding test pass. I still hope this code will be rewritten eventually. But with this change in place we can keep the code on life support a bit longer. ok? ----------------------------------------------- commit c29a883a0f450fc1faeeeab61d408831e845f249 (dotline) from: Stefan Sperling <stsp@stsp.name> date: Fri Oct 8 19:35:04 2021 UTC Fix merging of files which contain a dot on a line by itself. diff 394f52035a7769ff27296a70b8b254d8f04e3f06 49d08bec9b07d09edc4491554155e5c304a422e6 blob - 957c2c39351c73dff16d9995d46aa9be345f27ef blob + 4619d898de367bdd4a4c3ff090a223135dbb0935 --- lib/diff3.c +++ lib/diff3.c @@ -550,6 +550,10 @@ ed_patch_lines(struct rcs_lines *dlines, struct rcs_li lp->l_line[1] == '\n') break; + if (lp->l_line[0] == ':') { + lp->l_line++; + lp->l_len--; + } TAILQ_REMOVE(&(plines->l_lines), lp, l_list); TAILQ_INSERT_AFTER(&(dlines->l_lines), dlp, lp, l_list); @@ -975,8 +979,10 @@ static const struct got_error * edscript(int n, struct diff3_state *d3s) { const struct got_error *err = NULL; - size_t k, len; - char block[BUFSIZ+1]; + off_t len; + char *line = NULL; + size_t linesize = 0; + ssize_t linelen, k; for (; n > 0; n--) { if (!d3s->overlap[n]) { @@ -985,79 +991,76 @@ edscript(int n, struct diff3_state *d3s) return err; } else if (d3s->de[n].oldo.from < d3s->de[n].oldo.to) { /* Output a block of 3-way diff base file content. */ - err = diff_output(d3s->diffbuf, "%da\n%s\n", + err = diff_output(d3s->diffbuf, "%da\n:%s\n", d3s->de[n].old.to - 1, d3s->f2mark); if (err) return err; if (fseeko(d3s->fp[1], d3s->de[n].oldo.from, SEEK_SET) == -1) return got_error_from_errno("fseeko"); - k = (size_t)(d3s->de[n].oldo.to - d3s->de[n].oldo.from); - for (; k > 0; k -= len) { - size_t r; - len = k > BUFSIZ ? BUFSIZ : k; - r = fread(block, 1, len, d3s->fp[1]); - if (r == 0) { + len = (d3s->de[n].oldo.to - d3s->de[n].oldo.from); + for (k = 0; k < (ssize_t)len; k += linelen) { + linelen = getline(&line, &linesize, d3s->fp[1]); + if (linelen == -1) { if (feof(d3s->fp[1])) break; - return got_ferror(d3s->fp[1], + err = got_ferror(d3s->fp[1], GOT_ERR_IO); + goto done; } - if (r != len) - len = r; - block[len] = '\0'; - err = diff_output(d3s->diffbuf, "%s", block); + err = diff_output(d3s->diffbuf, ":%s", line); if (err) - return err; + goto done; } - err = diff_output(d3s->diffbuf, "%s\n", + err = diff_output(d3s->diffbuf, "%s%s\n", + line[linelen] == '\n' ? ":" : "", GOT_DIFF_CONFLICT_MARKER_SEP); if (err) - return err; + goto done; } else { - err = diff_output(d3s->diffbuf, "%da\n%s\n", + err = diff_output(d3s->diffbuf, "%da\n:%s\n", d3s->de[n].old.to -1, GOT_DIFF_CONFLICT_MARKER_SEP); if (err) - return err; + goto done; } if (fseeko(d3s->fp[2], d3s->de[n].newo.from, SEEK_SET) - == -1) - return got_error_from_errno("fseek"); - k = (size_t)(d3s->de[n].newo.to - d3s->de[n].newo.from); - for (; k > 0; k -= len) { - size_t r; - len = k > BUFSIZ ? BUFSIZ : k; - r = fread(block, 1, len, d3s->fp[2]); - if (r == 0) { + == -1) { + err = got_error_from_errno("fseek"); + goto done; + } + len = (d3s->de[n].newo.to - d3s->de[n].newo.from); + for (k = 0; k < (ssize_t)len; k += linelen) { + linelen = getline(&line, &linesize, d3s->fp[2]); + if (linelen == -1) { if (feof(d3s->fp[2])) break; - return got_ferror(d3s->fp[2], - GOT_ERR_IO); + err = got_ferror(d3s->fp[2], GOT_ERR_IO); + goto done; } - if (r != len) - len = r; - block[len] = '\0'; - err = diff_output(d3s->diffbuf, "%s", block); + err = diff_output(d3s->diffbuf, ":%s", line); if (err) - return err; + goto done; } if (!d3s->overlap[n]) { err = diff_output(d3s->diffbuf, ".\n"); if (err) - return err; + goto done; } else { - err = diff_output(d3s->diffbuf, "%s\n.\n", d3s->f3mark); + err = diff_output(d3s->diffbuf, "%s%s\n.\n", + line[linelen] == '\n' ? ":" : "", + d3s->f3mark); if (err) - return err; - err = diff_output(d3s->diffbuf, "%da\n%s\n.\n", + goto done; + err = diff_output(d3s->diffbuf, "%da\n:%s\n.\n", d3s->de[n].old.from - 1, d3s->f1mark); if (err) - return err; + goto done; } } - - return NULL; +done: + free(line); + return err; } static const struct got_error * blob - fdd6d5c983d568d8d1d4b48e66ec1d1400a5abea blob + e03fd43fd1cb428c958535c9201a1f65fe2366ad --- regress/cmdline/cherrypick.sh +++ regress/cmdline/cherrypick.sh @@ -1435,7 +1435,7 @@ test_cherrypick_dot_on_a_line_by_itself() { fi (cd $testroot/repo && git checkout -q -b newbranch) - printf "modified\ndelta\n.\non\nbranch\n" > $testroot/repo/gamma/delta + printf "modified\n:delta\n.\non\n:branch\n" > $testroot/repo/gamma/delta git_commit $testroot/repo -m "committing to delta on newbranch" local branch_rev=`git_show_head $testroot/repo` @@ -1452,13 +1452,12 @@ test_cherrypick_dot_on_a_line_by_itself() { return 1 fi - printf "modified\ndelta\n.\non\nbranch\n" > $testroot/content.expected + printf "modified\n:delta\n.\non\n:branch\n" > $testroot/content.expected cat $testroot/wt/gamma/delta > $testroot/content cmp -s $testroot/content.expected $testroot/content ret="$?" if [ "$ret" != "0" ]; then - #diff -u $testroot/content.expected $testroot/content - ret="xfail (badly merged content)" + diff -u $testroot/content.expected $testroot/content fi test_done "$testroot" "$ret" }
fix merge bug with dot on a line by itself