"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Mark Jamsek <mark@jamsek.com>
Subject:
Re: merge chokes, creates bogus conflicts
To:
Christian Weisgerber <naddy@mips.inka.de>, gameoftrees@openbsd.org
Date:
Sun, 19 Feb 2023 18:38:41 +1100

Download raw body.

Thread
On 23-02-18 04:45PM, Stefan Sperling wrote:
> On Sun, Feb 19, 2023 at 01:57:23AM +1100, Mark Jamsek wrote:
> > Updated diff incorporates all your suggestions. Thanks for the
> > improvements :)
> 
> Thanks! ok by me. We can add the diff_result performance tweak
> on top once you or myself find time to implement it.

The below diff implements this optimisation by using diff_result and
bypassing the somewhat expensive diff format and output path.

It also fixes a bug in the previous revision where the "from" file (f1)
was always empty because the blob was consumed by the caller and needed
a got_object_blob_rewind() call. So we were effectively adding the whole
file thus making the process that much slower!

Also, that update.sh change that I spent far too much time debating
myself over has now reverted from "M  foo" back to "C  foo"--and I think
this is actually right:

In the test, we delete "foo" in the previous commit, so it actually is
a new conflict. The previous approach of parsing the text diff
for added lines turned the report from "C  foo" to "M  foo" because it
wasn't forcing ASCII, thus the diff didn't have any '+' lines, only the
"Binary files ... differ" line (i.e., no conflict markers). If we set
the 'force_text_diff' flag in the previous revision, it does indeed
report as a conflict. Thus this diff is not only an optimisation but
also fixes two bugs :)

diffstat /home/mark/src/got
 M  lib/worktree.c             |  72+  45-
 M  regress/cmdline/update.sh  |   1+   1-

2 files changed, 73 insertions(+), 46 deletions(-)

diff /home/mark/src/got
commit - e5b3f6fb20301629598555a3a5be9845c20cf42f
path + /home/mark/src/got
blob - 055da929b846e6b27b801620463ae896ca9faf0a
file + lib/worktree.c
--- lib/worktree.c
+++ lib/worktree.c
@@ -1513,6 +1513,8 @@ done:
 	return err;
 }
 
+static const struct got_error *skip_one_line(FILE *);
+
 /*
  * Upgrade STATUS_MODIFY to STATUS_CONFLICT if a
  * conflict marker is found in newly added lines only.
@@ -1522,77 +1524,102 @@ get_modified_file_content_status(unsigned char *status
     struct got_blob_object *blob, const char *path, struct stat *sb,
     FILE *ondisk_file)
 {
-	const struct got_error *err = NULL;
+	const struct got_error *err, *free_err;
 	const char *markers[3] = {
 		GOT_DIFF_CONFLICT_MARKER_BEGIN,
 		GOT_DIFF_CONFLICT_MARKER_SEP,
 		GOT_DIFF_CONFLICT_MARKER_END
 	};
-	FILE *f = NULL, *f1 = NULL;
-	int i = 0;
+	FILE *f1 = NULL;
+	struct got_diffreg_result *diffreg_result = NULL;
+	struct diff_result *r;
+	int nchunks_parsed, n, i = 0, ln = 0;
 	char *line = NULL;
 	size_t linesize = 0;
 	ssize_t linelen;
-	off_t sz1 = 0;
 
-	if (*status == GOT_STATUS_MODIFY) {
-		f = got_opentemp();
-		if (f == NULL)
-			return got_error_from_errno("got_opentemp");
+	if (*status != GOT_STATUS_MODIFY)
+		return NULL;
 
-		f1 = got_opentemp();
-		if (f1 == NULL) {
-			err = got_error_from_errno("got_opentemp");
+	f1 = got_opentemp();
+	if (f1 == NULL)
+		return got_error_from_errno("got_opentemp");
+
+	if (blob) {
+		got_object_blob_rewind(blob);
+		err = got_object_blob_dump_to_file(NULL, NULL, NULL, f1, blob);
+		if (err)
 			goto done;
+	}
+
+	err = got_diff_files(&diffreg_result, f1, 1, NULL, ondisk_file,
+	    1, NULL, 0, 0, 1, NULL, GOT_DIFF_ALGORITHM_MYERS);
+	if (err)
+		goto done;
+
+	if (fseek(ondisk_file, 0L, SEEK_SET) == -1) {
+		err = got_ferror(ondisk_file, GOT_ERR_IO);
+		goto done;
+	}
+
+	r = diffreg_result->result;
+
+	for (n = 0; n < r->chunks.len; n += nchunks_parsed) {
+		struct diff_chunk *c;
+		struct diff_chunk_context cc = {};
+		int clc, crc;
+
+		/*
+		 * We can optimise a little by advancing straight
+		 * to the next chunk if this one has no added lines.
+		 */
+		c = diff_chunk_get(r, n);
+		clc = diff_chunk_get_left_count(c);
+		crc = diff_chunk_get_right_count(c);
+
+		if (!crc && clc) {
+			nchunks_parsed = 1;
+			continue;  /* removed lines */
 		}
 
-		if (blob) {
-			err = got_object_blob_dump_to_file(&sz1, NULL, NULL,
-			    f1, blob);
+		diff_chunk_context_load_change(&cc, &nchunks_parsed, r, n, 0);
+
+		while (ln < cc.right.start) {
+			err = skip_one_line(ondisk_file);
 			if (err)
 				goto done;
+			++ln;
 		}
 
-		err = got_diff_blob_file(blob, f1, sz1, NULL, ondisk_file, 1,
-		    sb, path, GOT_DIFF_ALGORITHM_MYERS, 0, 0, 0, NULL, f);
-		if (err)
-			goto done;
-
-		if (fflush(f) == EOF) {
-			err = got_error_from_errno("fflush");
-			goto done;
-		}
-		if (fseeko(f, 0L, SEEK_SET) == -1) {
-			err = got_error_from_errno("fseek");
-			goto done;
-		}
-	}
-
-	while (*status == GOT_STATUS_MODIFY) {
-		linelen = getline(&line, &linesize, f);
-		if (linelen == -1) {
-			if (feof(f))
+		while (ln < cc.right.end) {
+			linelen = getline(&line, &linesize, ondisk_file);
+			if (linelen == -1) {
+				if (feof(ondisk_file))
+					break;
+				err = got_ferror(ondisk_file, GOT_ERR_IO);
 				break;
-			err = got_ferror(f, GOT_ERR_IO);
-			break;
-		}
+			}
 
-		if (*line == '+' &&
-		    strncmp(line + 1, markers[i], strlen(markers[i])) == 0) {
-			if (strcmp(markers[i], GOT_DIFF_CONFLICT_MARKER_END)
-			    == 0)
-				*status = GOT_STATUS_CONFLICT;
-			else
-				i++;
+			if (line && strncmp(line, markers[i],
+			    strlen(markers[i])) == 0) {
+				if (strcmp(markers[i],
+				    GOT_DIFF_CONFLICT_MARKER_END) == 0) {
+					*status = GOT_STATUS_CONFLICT;
+					goto done;
+				} else
+					i++;
+			}
+			++ln;
 		}
 	}
 
 done:
 	free(line);
-	if (f != NULL && fclose(f) == EOF && err == NULL)
-		err = got_error_from_errno("fclose");
 	if (f1 != NULL && fclose(f1) == EOF && err == NULL)
 		err = got_error_from_errno("fclose");
+	free_err = got_diffreg_result_free(diffreg_result);
+	if (err == NULL)
+		err = free_err;
 
 	return err;
 }
blob - 1ab6bfe75bae74735e4040c6d98ea1dea7cd9dc3
file + regress/cmdline/update.sh
--- regress/cmdline/update.sh
+++ regress/cmdline/update.sh
@@ -2970,7 +2970,7 @@ test_update_binary_file() {
 	fi
 
 	(cd $testroot/wt && got status > $testroot/stdout)
-	echo 'M  foo' > $testroot/stdout.expected
+	echo 'C  foo' > $testroot/stdout.expected
 	echo -n '?  ' >> $testroot/stdout.expected
 	(cd $testroot/wt && ls foo-1-* >> $testroot/stdout.expected)
 	echo -n '?  ' >> $testroot/stdout.expected

-- 
Mark Jamsek <fnc.bsdbox.org|got.bsdbox.org>
GPG: F2FF 13DE 6A06 C471 CA80  E6E2 2930 DC66 86EE CF68