"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
rework the patch parser a bit
To:
gameoftrees@openbsd.org
Date:
Tue, 28 Jun 2022 00:16:02 +0200

Download raw body.

Thread
At a higher level, the patch parser works like this:

	while (find_patch()) {
		while (parse_hunk())
			;
		end_of_patch();
	}

this works for plain diffs and "git diffs", because each patch has a
header.  However, "got diff" is different: it prints some info only at
the top, like the "diff" and "commit -/+" headers.  It's fine IMHO, as
it avoids some noise, but got patch needs to be tweaked.

Currently "got patch" will read the commit from the first patch, but if
there is a second (or third, ...) patched file it forgets the commit and
only read the "blob -" line.  This matter because if the diff3 merge
ends up with a conflict we use a less-useful "blob abc" label instead of
"commit xyz".  (In the future we may also want to use the commit info
for other things.)

While here it'd be also nice to start making the patch parser a little
bit strictier.

The following patch splits the parser and introduces a `patch_start'
routine that finds the "diff" header (if there is).  This way, we can
persist some state (commit id and wether it's a "git diff") while
processing the content of the diff.

diff refs/heads/main refs/heads/refdif
commit - db0dfdd7e5c2c5a38ed7c3291a0615132bcb5945
commit + 75d1b739753f13a1ec27c7619001e6f37ed071f6
blob - d6eea51f8d3203ae05b110adaac04ee330bd4d28
blob + a48a4df951d4896c4fd29a461f3c17002b01bc3e
--- libexec/got-read-patch/got-read-patch.c
+++ libexec/got-read-patch/got-read-patch.c
@@ -150,16 +150,61 @@ blobid(const char *line, char **blob, int git)
 }
 
 static const struct got_error *
-find_patch(int *done, FILE *fp)
+patch_start(int *git, char **cid, FILE *fp)
 {
 	const struct got_error *err = NULL;
+	char	*line = NULL;
+	size_t	 linesize = 0;
+	ssize_t	 linelen;
+
+	*git = 0;
+
+	while ((linelen = getline(&line, &linesize, fp)) != -1) {
+		if (!strncmp(line, "diff --git ", 11)) {
+			*git = 1;
+			free(*cid);
+			*cid = NULL;
+			break;
+		} else if (!strncmp(line, "diff ", 5)) {
+			*git = 0;
+			free(*cid);
+			*cid = NULL;
+		} else if (!strncmp(line, "commit - ", 9)) {
+			free(*cid);
+			err = blobid(line + 9, cid, *git);
+			if (err)
+				break;
+		} else if (!strncmp(line, "--- ", 4) ||
+		    !strncmp(line, "+++ ", 4) ||
+		    !strncmp(line, "blob - ", 7)) {
+			/* rewind to previous line */
+			if (fseeko(fp, -linelen, SEEK_CUR) == -1)
+				err = got_error_from_errno("fseeko");
+			break;
+		}
+	}
+
+	free(line);
+	if (ferror(fp) && err == NULL)
+		err = got_error_from_errno("getline");
+	if (feof(fp) && err == NULL)
+		err = got_error(GOT_ERR_NO_PATCH);
+	return err;
+}
+
+static const struct got_error *
+find_diff(int *done, int *next, FILE *fp, int git, const char *commitid)
+{
+	const struct got_error *err = NULL;
 	char	*old = NULL, *new = NULL;
-	char	*commitid = NULL, *blob = NULL;
+	char	*blob = NULL;
 	char	*line = NULL;
 	size_t	 linesize = 0;
 	ssize_t	 linelen;
-	int	 create, rename = 0, git = 0;
+	int	 create, rename = 0;
 
+	*done = 0;
+	*next = 0;
 	while ((linelen = getline(&line, &linesize, fp)) != -1) {
 		/*
 		 * Ignore the Index name like GNU and larry' patch,
@@ -186,18 +231,12 @@ find_patch(int *done, FILE *fp)
 		else if (git && !strncmp(line, "index ", 6)) {
 			free(blob);
 			err = blobid(line + 6, &blob, git);
-		} else if (!strncmp(line, "diff --git a/", 13)) {
-			git = 1;
-			free(commitid);
-			commitid = NULL;
-			free(blob);
-			blob = NULL;
-		} else if (!git && !strncmp(line, "diff ", 5)) {
-			free(commitid);
-			err = blobid(line + 5, &commitid, git);
-		} else if (!git && !strncmp(line, "commit - ", 9)) {
-			free(commitid);
-			err = blobid(line + 9, &commitid, git);
+		} else if (!strncmp(line, "diff ", 5)) {
+			/* rewind to previous line */
+			if (fseeko(fp, -linelen, SEEK_CUR) == -1)
+				err = got_error_from_errno("fseeko");
+			*next = 1;
+			break;
 		}
 
 		if (err)
@@ -236,7 +275,6 @@ find_patch(int *done, FILE *fp)
 
 	free(old);
 	free(new);
-	free(commitid);
 	free(blob);
 	free(line);
 	if (ferror(fp) && err == NULL)
@@ -480,7 +518,8 @@ read_patch(struct imsgbuf *ibuf, int fd)
 {
 	const struct got_error *err = NULL;
 	FILE *fp;
-	int patch_found = 0;
+	int git, patch_found = 0;
+	char *cid = NULL;
 
 	if ((fp = fdopen(fd, "r")) == NULL) {
 		err = got_error_from_errno("fdopen");
@@ -488,12 +527,14 @@ read_patch(struct imsgbuf *ibuf, int fd)
 		return err;
 	}
 
-	while (!feof(fp)) {
-		int done = 0;
+	while ((err = patch_start(&git, &cid, fp)) == NULL) {
+		int done, next;
 
-		err = find_patch(&done, fp);
+		err = find_diff(&done, &next, fp, git, cid);
 		if (err)
 			goto done;
+		if (next)
+			continue;
 
 		patch_found = 1;
 
@@ -510,6 +551,7 @@ read_patch(struct imsgbuf *ibuf, int fd)
 
 done:
 	fclose(fp);
+	free(cid);
 
 	/* ignore trailing gibberish */
 	if (err != NULL && err->code == GOT_ERR_NO_PATCH && patch_found)
blob - 51903218ccbe2f14d00ba02be9bbc8ead7c2f1e4
blob + 40309e5b82bddf39321e6da00a91b79dd7ec65b0
--- regress/cmdline/patch.sh
+++ regress/cmdline/patch.sh
@@ -1585,9 +1585,10 @@ test_patch_merge_conflict() {
 	local commit_id=`git_show_head $testroot/repo`
 
 	jot 10 | sed 's/6/six/g' > $testroot/wt/numbers
+	echo ALPHA > $testroot/wt/alpha
 
 	(cd $testroot/wt && got diff > $testroot/old.diff \
-		&& got revert numbers) >/dev/null
+		&& got revert alpha numbers) >/dev/null
 	ret=$?
 	if [ $ret -ne 0 ]; then
 		test_done $testroot $ret
@@ -1595,7 +1596,8 @@ test_patch_merge_conflict() {
 	fi
 
 	jot 10 | sed 's/6/3+3/g' > $testroot/wt/numbers
-	(cd $testroot/wt && got commit -m 'edit numbers') \
+	jot -c 3 a > $testroot/wt/alpha
+	(cd $testroot/wt && got commit -m 'edit alpha and numbers') \
 		> /dev/null
 	ret=$?
 	if [ $ret -ne 0 ]; then
@@ -1612,7 +1614,8 @@ test_patch_merge_conflict() {
 		return 1
 	fi
 
-	echo 'C  numbers' > $testroot/stdout.expected
+	echo 'C  alpha' > $testroot/stdout.expected
+	echo 'C  numbers' >> $testroot/stdout.expected
 	cmp -s $testroot/stdout $testroot/stdout.expected
 	ret=$?
 	if [ $ret -ne 0 ]; then
@@ -1623,6 +1626,18 @@ test_patch_merge_conflict() {
 
 	# XXX: prefixing every line with a tab otherwise got thinks
 	# the file has conflicts in it.
+	cat <<-EOF > $testroot/wt/alpha.expected
+	<<<<<<< --- alpha
+	ALPHA
+	||||||| commit $commit_id
+	alpha
+	=======
+	a
+	b
+	c
+	>>>>>>> +++ alpha
+EOF
+
 	cat <<-EOF > $testroot/wt/numbers.expected
 	1
 	2
@@ -1642,6 +1657,14 @@ test_patch_merge_conflict() {
 	10
 EOF
 
+	cmp -s $testroot/wt/alpha $testroot/wt/alpha.expected
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/wt/alpha $testroot/wt/alpha.expected
+		test_done $testroot $ret
+		return 1
+	fi
+
 	cmp -s $testroot/wt/numbers $testroot/wt/numbers.expected
 	ret=$?
 	if [ $ret -ne 0 ]; then