"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: new diff implementation
To:
Christian Weisgerber <naddy@mips.inka.de>, gameoftrees@openbsd.org
Date:
Tue, 10 Nov 2020 22:02:24 +0100

Download raw body.

Thread
On Sat, Nov 07, 2020 at 10:08:04PM +0100, Stefan Sperling wrote:
> On Sat, Oct 24, 2020 at 02:50:17PM +0200, Stefan Sperling wrote:
> > Meanwhile Neels has fixed the segfault in patience diff. The patience
> > diff code does not have the above problem. The updated patch below was
> > generated with patience diff and round-trips correctly with a fresh
> > checkout of the Got source tree.
> 
> Here is a new patch which integrates the latest diff code.
> Neels has fixed several more issues in the patience implementation.
> It seems performance has improved quite a bit.
> 
> Since this is a large and potentially destabilizing change I would
> appreciate it a lot if people could run with this and report regressions.
> I have found several issues by running this code during OpenBSD development.
> It is not unlikely that there are still a few bugs hiding in here. Thanks!

One problem I have noticed is that 'got blame' shows wrong annotations
in some cases. Neels and I discovered that this is not a regression introduced
by the new diff implementation. Rather, there is a problem in the blame
algorithm implemented by Got which occurs regardless of the underlying diff
algorithm. 'got blame' assumes that a series of diffs of all older versions
of a file against the latest version of the file will result in stable diff
output for all newly added lines across history, but this is not guaranteed.

To address this problem we could switch to a blame algorithm which iteratively
compares commit N to commit N-1 and maps the result to the latest version of
the file by taking shifted line numbers into account.

I do not consider this problem a blocker for merging the new diff
implementation. In some cases the blame output will change but this
is just a different manifestation of the problem in Got's blame algorithm.

As one example, note how all lines belonging to 'struct tog_cmd {' in
tog/tog.c from Got's own source tree are misattributed to commit c2301be8
by got 0.42 (i.e. still using the old stone diff algorithm):
0073) c2301be8 2018-04-30 stsp     struct tog_cmd {
0074) c2301be8 2018-04-30 stsp     	const char *name;
0075) c2301be8 2018-04-30 stsp     	const struct got_error *(*cmd_main)(int, char *[]);
0076) c2301be8 2018-04-30 stsp     	void (*cmd_usage)(void);
0077) c2301be8 2018-04-30 stsp     };

The diff for commit c2301be8 follows below. It clearly did not introduce
all of the lines shown above.

The new diff code maps the same set of lines to commit cbb6b58a, which
is not correct either:
0073) cbb6b58a 2018-05-20 stsp     struct tog_cmd {
0074) cbb6b58a 2018-05-20 stsp     	const char *name;
0075) cbb6b58a 2018-05-20 stsp     	const struct got_error *(*cmd_main)(int, char *[]);
0076) cbb6b58a 2018-05-20 stsp     	void (*cmd_usage)(void);
0077) cbb6b58a 2018-05-20 stsp     };

-----------------------------------------------
commit c2301be8a8ce61e6c8f12911c32a70d636b2e5c9
from: Stefan Sperling <stsp@stsp.name>
date: Mon Apr 30 12:20:02 2018 UTC
 
 make tog(1) command argument optional
 
diff 202934099ce0b6cf60d87bebd61bdfe810becbd8 ed0eeb0dd6d54ee92bb9da1990b80ba9f3a3f1f3
blob - 66799dbd7b848d6c94d915932f14a15e397ee0c7
blob + 2b977a10f0a83d20daf46dce7156c9be121e59c2
--- tog/tog.c
+++ tog/tog.c
@@ -42,11 +42,11 @@ enum tog_view_id {
 };
 
 struct tog_cmd {
-	const char *cmd_name;
+	const char *name;
 	const struct got_error *(*cmd_main)(int, char *[]);
 	void (*cmd_usage)(void);
 	enum tog_view_id view;
-	const char *cmd_descr;
+	const char *descr;
 };
 
 __dead void	usage(void);
@@ -436,13 +436,34 @@ usage(void)
 {
 	int i;
 
-	fprintf(stderr, "usage: %s [-h] command [arg ...]\n\n"
+	fprintf(stderr, "usage: %s [-h] [command] [arg ...]\n\n"
 	    "Available commands:\n", getprogname());
 	for (i = 0; i < nitems(tog_commands); i++) {
 		struct tog_cmd *cmd = &tog_commands[i];
-		fprintf(stderr, "    %s: %s\n", cmd->cmd_name, cmd->cmd_descr);
+		fprintf(stderr, "    %s: %s\n", cmd->name, cmd->descr);
 	}
 	exit(1);
+}
+
+static char **
+make_argv(const char *arg0, const char *arg1)
+{
+	char **argv;
+	int argc = (arg1 == NULL ? 1 : 2);
+
+	argv = calloc(argc, sizeof(char *));
+	if (argv == NULL)
+		err(1, "calloc");
+	argv[0] = strdup(arg0);
+	if (argv[0] == NULL)
+		err(1, "calloc");
+	if (arg1) {
+		argv[1] = strdup(arg1);
+		if (argv[1] == NULL)
+			err(1, "calloc");
+	}
+
+	return argv;
 }
 
 int
@@ -451,6 +472,7 @@ main(int argc, char *argv[])
 	const struct got_error *error = NULL;
 	struct tog_cmd *cmd = NULL;
 	int ch, hflag = 0;
+	char **cmd_argv = NULL;
 
 	setlocale(LC_ALL, "");
 
@@ -468,14 +490,19 @@ main(int argc, char *argv[])
 	argc -= optind;
 	argv += optind;
 	optind = 0;
+	optreset = 1;
 
-	if (argc == 0)
+	if (argc == 0) {
+		/* Build an argument vector which runs a default command. */
 		cmd = &tog_commands[0];
-	else {
+		cmd_argv = make_argv(cmd->name, NULL);
+		argc = 1;
+	} else {
 		int i;
 
+		/* Did the user specific a command? */
 		for (i = 0; i < nitems(tog_commands); i++) {
-			if (strncmp(tog_commands[i].cmd_name, argv[0],
+			if (strncmp(tog_commands[i].name, argv[0],
 			    strlen(argv[0])) == 0) {
 				cmd = &tog_commands[i];
 				if (hflag)
@@ -484,9 +511,25 @@ main(int argc, char *argv[])
 			}
 		}
 		if (cmd == NULL) {
-			fprintf(stderr, "%s: unknown command '%s'\n",
-			    getprogname(), argv[0]);
-			return 1;
+			/* Did the user specify a repository? */
+			char *repo_path = realpath(argv[0], NULL);
+			if (repo_path) {
+				struct got_repository *repo;
+				error = got_repo_open(&repo, repo_path);
+				if (error == NULL)
+					got_repo_close(repo);
+			} else
+				error = got_error(GOT_ERR_NOT_GIT_REPO);
+			if (error) {
+				free(repo_path);
+				fprintf(stderr, "%s: unknown command '%s'\n",
+				    getprogname(), argv[0]);
+				return 1;
+			}
+			cmd = &tog_commands[0];
+			cmd_argv = make_argv(cmd->name, repo_path);
+			argc = 2;
+			free(repo_path);
 		}
 	}
 
@@ -496,11 +539,12 @@ main(int argc, char *argv[])
 		return 1;
 	}
 
-	error = cmd->cmd_main(argc, argv);
+	error = cmd->cmd_main(argc, cmd_argv ? cmd_argv : argv);
 	if (error)
 		goto done;
 done:
 	endwin();
+	free(cmd_argv);
 	if (error)
 		fprintf(stderr, "%s: %s\n", getprogname(), error->msg);
 	return 0;