Download raw body.
diff.git: Fix ed script output
On Tue, Aug 30, 2022 at 04:55:39PM +0100, Tom Jones wrote:
> On Tue, Aug 30, 2022 at 05:35:31PM +0200, Stefan Sperling wrote:
> > On Tue, Aug 30, 2022 at 04:28:07PM +0100, Tom Jones wrote:
> > > On Tue, Aug 30, 2022 at 05:19:29PM +0200, Stefan Sperling wrote:
> > > > On Tue, Aug 30, 2022 at 02:47:58PM +0100, Tom Jones wrote:
> > > > > Prior to this change ed script output was in the wrong order, i.e. in
> > > > > the order diff_result provides and changes where missing added or
> > > > > changed lines. ed edits need to be in reverse order to keep the edited
> > > > > file in sync
> > > >
> > > > As it is, this patch breaks merging on Got, which relies on the
> > > > existing output. The existing output is based on the 'ed-style'
> > > > script which Caldera diff3(1) generates internally. As you've
> > > > discovered this seems to not match traditional diff(1) ed output,
> > > > which is unfortunate, and I was unaware of this when I added the
> > > > ed output module for the purpose of supporting Got's merging code.
> > > >
> > > > So we will need both variants. Should we add a flag?
> > > > Or would providing two different ed-output modules be better?
> > >
> > > Does the got version include the changed/added lines? The diff.git one
> > > didn't, which made me think it didn't work.
> > >
> > > In addition to ed script diff also has forward ed (-f) with mangled commands
> > > and reverse ed (-n).
> > >
> > > -f --forward-ed
> > > Identical output to that of the -e flag, but in reverse order.
> > > It cannot be digested by ed(1).
> > >
> > > -n Produces a script similar to that of -e, but in the opposite
> > > order and with a count of changed lines on each insert or delete
> > > command. This is the form used by rcsdiff.
> > >
> > > Is one of these the input that diff3 requires?
> >
> > I am not sure which it is.
> > I just worked towards what the diff3 code expects, and tested my changes
> > by running the Got test suite. The diff3 code used by Got is the same as
> > used by OpenRCS and OpenCVS. FreeBSD has this code in usr.bin/diff3/diff3.c.
>
> Weird.
>
> I have done some work on diff3 in FreeBSD recently. I'll have a look
> into this and respond with a suggestion for how to handle this, it might
> be that diff3 is happy just getting the edit ranges from the diff
> program and doesn't validate the actual ed script..
>
Hi,
I have looked into this. getchange in diff3.c takes any line that starts
with an integer and treats it as a hunk header.
The current diff_output_edscript.c is creating valid classic diff
headers rather than ed edits. This seems to mean that while the edscript
output works, it is isn't correct just in the way that diff3 wants.
I have substituted in the updated diff_output_plain.c from yesterday and
am able to pass got's `make regress`.
For this case there is an optimisation where diff_output_plain only
generates the headers for a hunk, but I'll leave that path for future
hackers.
Follows is a diff with the changes I made to got, they are minimal
beyond copying over the updated diff_output_plain.c
- Tom
diff --git a/lib/diff3.c b/lib/diff3.c
index 3acd3174..ca41b5e1 100644
--- a/lib/diff3.c
+++ b/lib/diff3.c
@@ -241,7 +241,7 @@ diffreg(BUF **d, const char *path1, const char *path2,
}
err = got_diffreg_output(NULL, NULL, diffreg_result, 1, 1, "", "",
- GOT_DIFF_OUTPUT_EDSCRIPT, 0, outfile);
+ GOT_DIFF_OUTPUT_PLAIN, 0, outfile);
if (err)
goto done;
diff --git a/lib/diff_output_plain.c b/lib/diff_output_plain.c
index 9053d5ed..1ead9121 100644
--- a/lib/diff_output_plain.c
+++ b/lib/diff_output_plain.c
@@ -27,42 +27,197 @@
#include "diff_internal.h"
+static int
+output_plain_chunk(struct diff_output_info *outinfo,
+ FILE *dest, const struct diff_input_info *info,
+ const struct diff_result *result,
+ struct diff_chunk_context *cc)
+{
+ off_t outoff = 0, *offp;
+ int left_start, left_len, right_start, right_len;
+ int rc;
+ bool change = false;
+
+ left_len = cc->left.end - cc->left.start;
+ if (left_len < 0)
+ return EINVAL;
+ else if (result->left->atoms.len == 0)
+ left_start = 0;
+ else if (left_len == 0 && cc->left.start > 0)
+ left_start = cc->left.start;
+ else if (cc->left.end > 0)
+ left_start = cc->left.start + 1;
+ else
+ left_start = cc->left.start;
+
+ right_len = cc->right.end - cc->right.start;
+ if (right_len < 0)
+ return EINVAL;
+ else if (result->right->atoms.len == 0)
+ right_start = 0;
+ else if (right_len == 0 && cc->right.start > 0)
+ right_start = cc->right.start;
+ else if (cc->right.end > 0)
+ right_start = cc->right.start + 1;
+ else
+ right_start = cc->right.start;
+
+ if (left_len == 0) {
+ /* addition */
+ if (right_len == 1) {
+ rc = fprintf(dest, "%da%d\n", left_start, right_start);
+ } else {
+ rc = fprintf(dest, "%da%d,%d\n", left_start,
+ right_start, cc->right.end);
+ }
+ } else if (right_len == 0) {
+ /* deletion */
+ if (left_len == 1) {
+ rc = fprintf(dest, "%dd%d\n", left_start,
+ right_start);
+ } else {
+ rc = fprintf(dest, "%d,%dd%d\n", left_start,
+ cc->left.end, right_start);
+ }
+ } else {
+ /* change */
+ change = true;
+ if (left_len == 1 && right_len == 1) {
+ rc = fprintf(dest, "%dc%d\n", left_start, right_start);
+ } else if (left_len == 1) {
+ rc = fprintf(dest, "%dc%d,%d\n", left_start,
+ right_start, cc->right.end);
+ } else if (right_len == 1) {
+ rc = fprintf(dest, "%d,%dc%d\n", left_start,
+ cc->left.end, right_start);
+ } else {
+ rc = fprintf(dest, "%d,%dc%d,%d\n", left_start,
+ cc->left.end, right_start, cc->right.end);
+ }
+ }
+
+ /*
+ * Now write out all the joined chunks.
+ *
+ * If the hunk denotes a change, it will come in the form of a deletion
+ * chunk followed by a addition chunk. Print a marker to break up the
+ * additions and deletions when this happens.
+ */
+ int c_idx;
+ for (c_idx = cc->chunk.start; c_idx < cc->chunk.end; c_idx++) {
+ const struct diff_chunk *c = &result->chunks.head[c_idx];
+ if (c->left_count && !c->right_count)
+ rc = diff_output_lines(outinfo, dest,
+ c->solved ? "< " : "?",
+ c->left_start, c->left_count);
+ else if (c->right_count && !c->left_count) {
+ if (change)
+ fprintf(dest, "---\n");
+ rc = diff_output_lines(outinfo, dest,
+ c->solved ? "> " : "?",
+ c->right_start, c->right_count);
+ }
+ if (rc)
+ return rc;
+ if (cc->chunk.end == result->chunks.len) {
+ rc = diff_output_trailing_newline_msg(outinfo, dest, c);
+ if (rc != DIFF_RC_OK)
+ return rc;
+ }
+ }
+
+ if (rc < 0)
+ return errno;
+ if (outinfo) {
+ ARRAYLIST_ADD(offp, outinfo->line_offsets);
+ if (offp == NULL)
+ return ENOMEM;
+ outoff += rc;
+ *offp = outoff;
+ }
+
+ return DIFF_RC_OK;
+}
+
int
-diff_output_plain(struct diff_output_info **output_info, FILE *dest,
- const struct diff_input_info *info,
- const struct diff_result *result)
+diff_output_plain(struct diff_output_info **output_info,
+ FILE *dest, const struct diff_input_info *info,
+ const struct diff_result *result)
{
struct diff_output_info *outinfo = NULL;
+ struct diff_chunk_context cc = {};
+ int atomizer_flags = (result->left->atomizer_flags|
+ result->right->atomizer_flags);
+ int flags = (result->left->root->diff_flags |
+ result->right->root->diff_flags);
+ bool force_text = (flags & DIFF_FLAG_FORCE_TEXT_DATA);
+ bool have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
int i, rc;
if (!result)
return EINVAL;
if (result->rc != DIFF_RC_OK)
return result->rc;
-
+
if (output_info) {
*output_info = diff_output_info_alloc();
if (*output_info == NULL)
- return errno;
+ return ENOMEM;
outinfo = *output_info;
}
+ if (have_binary && !force_text) {
+ for (i = 0; i < result->chunks.len; i++) {
+ struct diff_chunk *c = &result->chunks.head[i];
+ enum diff_chunk_type t = diff_chunk_type(c);
+
+ if (t != CHUNK_MINUS && t != CHUNK_PLUS)
+ continue;
+
+ fprintf(dest, "Binary files %s and %s differ\n",
+ diff_output_get_label_left(info),
+ diff_output_get_label_right(info));
+ break;
+ }
+
+ return DIFF_RC_OK;
+ }
+
for (i = 0; i < result->chunks.len; i++) {
- struct diff_chunk *c = &result->chunks.head[i];
- if (c->left_count && c->right_count)
- rc = diff_output_lines(outinfo, dest,
- c->solved ? " " : "?",
- c->left_start, c->left_count);
- else if (c->left_count && !c->right_count)
- rc = diff_output_lines(outinfo, dest,
- c->solved ? "-" : "?",
- c->left_start, c->left_count);
- else if (c->right_count && !c->left_count)
- rc = diff_output_lines(outinfo, dest,
- c->solved ? "+" : "?",
- c->right_start, c->right_count);
- if (rc)
+ struct diff_chunk *chunk = &result->chunks.head[i];
+ enum diff_chunk_type t = diff_chunk_type(chunk);
+ struct diff_chunk_context next;
+
+ if (t != CHUNK_MINUS && t != CHUNK_PLUS)
+ continue;
+
+ if (diff_chunk_context_empty(&cc)) {
+ /* Note down the start point, any number of subsequent
+ * chunks may be joined up to this chunk by being
+ * directly adjacent. */
+ diff_chunk_context_get(&cc, result, i, 0);
+ continue;
+ }
+
+ /* There already is a previous chunk noted down for being
+ * printed. Does it join up with this one? */
+ diff_chunk_context_get(&next, result, i, 0);
+
+ if (diff_chunk_contexts_touch(&cc, &next)) {
+ /* This next context touches or overlaps the previous
+ * one, join. */
+ diff_chunk_contexts_merge(&cc, &next);
+ /* When we merge the last chunk we can end up with one
+ * hanging chunk and have to come back for it after the
+ * loop */
+ continue;
+ }
+ rc = output_plain_chunk(outinfo, dest, info, result, &cc);
+ if (rc != DIFF_RC_OK)
return rc;
+ cc = next;
}
+ if (!diff_chunk_context_empty(&cc))
+ return output_plain_chunk(outinfo, dest, info, result, &cc);
return DIFF_RC_OK;
}
diff --git a/lib/diffreg.c b/lib/diffreg.c
index 6bd8285d..077eb7fc 100644
--- a/lib/diffreg.c
+++ b/lib/diffreg.c
@@ -277,8 +277,8 @@ got_diffreg_output(struct got_diff_line **lines, size_t *nlines,
if (rc != DIFF_RC_OK)
return got_error_set_errno(rc, "diff_output_unidiff");
break;
- case GOT_DIFF_OUTPUT_EDSCRIPT:
- rc = diff_output_edscript(lines ? &output_info : NULL,
+ case GOT_DIFF_OUTPUT_PLAIN:
+ rc = diff_output_plain(lines ? &output_info : NULL,
outfile, &info, diff_result->result);
if (rc != DIFF_RC_OK)
return got_error_set_errno(rc, "diff_output_edscript");
diff --git a/lib/got_lib_diff.h b/lib/got_lib_diff.h
index 740595ae..8aae9cbb 100644
--- a/lib/got_lib_diff.h
+++ b/lib/got_lib_diff.h
@@ -20,7 +20,7 @@
enum got_diff_output_format {
GOT_DIFF_OUTPUT_UNIDIFF,
- GOT_DIFF_OUTPUT_EDSCRIPT,
+ GOT_DIFF_OUTPUT_PLAIN,
};
struct got_diffreg_result {
diff.git: Fix ed script output