"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: got: add diffstat to got diffg
To:
Mark Jamsek <mark@jamsek.com>
Cc:
Game of Trees <gameoftrees@openbsd.org>
Date:
Mon, 09 Jan 2023 13:17:05 +0100

Download raw body.

Thread
On 2023/01/09 22:38:37 +1100, Mark Jamsek <mark@jamsek.com> wrote:
> As mentioned on irc, you convinced me we should always display the diff
> with -d, so the below diff makes this change from the previous revision.
> 
> We now output the diffstat first, which is followed by the diff. One
> positive result is that, unlike tog and got log, we only compute the
> diff once, so this improves performance a little. We also now need to
> use a temp file, though, because we want the diffstat output first, but
> we need the entire diff to get our field widths and totals. As such, we
> can't print to stdout as we build the diff like we previously did. But
> this isn't a big deal.
> 
> Sorry for the large diff; if you prefer, I can send a diff of changes
> from the previous as it's a bit smaller. Just let me know!

It breaks godwebd.  In gotwebd/got_operations.c the calls to
got_diff_objects_as_* needs to be tweaked.  I'd say (for the moment at
least) to just disable the diffstat there; if wanted we can enable it
but the styling would need to be tweaked too and my diff to
templateify that part is still pending.

(actually my templatefication of gotweb_render_diff is very likely to
be broken by this; not a big deal, i'll rebase it after this gets in.)

Some comments inline based on a quick read; I don't have a strong
opinion on having got showing a diffstat (I actually have my own
standalone diffstat script that I sometimes use for fun) but otherwise
I think the diff reads fine.

> [...]
> --- got/got.c
> +++ got/got.c
> [...]
> +static const struct got_error *
>  cmd_diff(int argc, char *argv[])
>  {
>  	const struct got_error *error;
> @@ -4993,17 +5047,19 @@ cmd_diff(int argc, char *argv[])
>  	char *labels[2] = { NULL, NULL };
>  	int type1 = GOT_OBJ_TYPE_ANY, type2 = GOT_OBJ_TYPE_ANY;
>  	int diff_context = 3, diff_staged = 0, ignore_whitespace = 0, ch, i;
> -	int force_text_diff = 0, force_path = 0, rflag = 0;
> +	int force_text_diff = 0, force_path = 0, rflag = 0, show_diffstat = 0;
>  	const char *errstr;
>  	struct got_reflist_head refs;
> -	struct got_pathlist_head paths;
> +	struct got_pathlist_head diffstat_paths, paths;
>  	struct got_pathlist_entry *pe;
> -	FILE *f1 = NULL, *f2 = NULL;
> +	FILE *f1 = NULL, *f2 = NULL, *outfile = NULL;
>  	int fd1 = -1, fd2 = -1;
>  	int *pack_fds = NULL;
> +	struct got_diffstat_cb_arg dsa;
>  
>  	TAILQ_INIT(&refs);
>  	TAILQ_INIT(&paths);
> +	TAILQ_INIT(&diffstat_paths);
>  
>  #ifndef PROFILE
>  	if (pledge("stdio rpath wpath cpath flock proc exec sendfd unveil",
> @@ -5011,7 +5067,7 @@ cmd_diff(int argc, char *argv[])
>  		err(1, "pledge");
>  #endif
>  
> -	while ((ch = getopt(argc, argv, "aC:c:Pr:sw")) != -1) {
> +	while ((ch = getopt(argc, argv, "aC:c:dPr:sw")) != -1) {
>  		switch (ch) {
>  		case 'a':
>  			force_text_diff = 1;
> @@ -5028,6 +5084,9 @@ cmd_diff(int argc, char *argv[])
>  				errx(1, "too many -c options used");
>  			commit_args[ncommit_args++] = optarg;
>  			break;
> +		case 'd':
> +			show_diffstat = 1;
> +			break;
>  		case 'P':
>  			force_path = 1;
>  			break;
> @@ -5091,6 +5150,14 @@ cmd_diff(int argc, char *argv[])
>  	if (error != NULL)
>  		goto done;
>  
> +	if (show_diffstat) {
> +		memset(&dsa, 0, sizeof(dsa));

I'd always do the memset, right at the start of the function.  Avoids
possible troubles.

> +		dsa.paths = &diffstat_paths;
> +		dsa.force_text = force_text_diff;
> +		dsa.ignore_ws = ignore_whitespace;
> +		dsa.diff_algo = GOT_DIFF_ALGORITHM_PATIENCE;
> +	}
> +
>  	if (rflag || worktree == NULL || ncommit_args > 0) {
>  		if (force_path) {
>  			error = got_error_msg(GOT_ERR_NOT_IMPL,
> @@ -5150,6 +5217,12 @@ cmd_diff(int argc, char *argv[])
>  		goto done;
>  	}
>  
> +	outfile = got_opentemp();
> +	if (outfile == NULL) {
> +		error = got_error_from_errno("got_opentemp");
> +		goto done;
> +	}
> +
>  	if (ncommit_args == 0 && (ids[0] == NULL || ids[1] == NULL)) {
>  		struct print_diff_arg arg;
>  		char *id_str;
> @@ -5189,12 +5262,33 @@ cmd_diff(int argc, char *argv[])
>  		arg.diff_staged = diff_staged;
>  		arg.ignore_whitespace = ignore_whitespace;
>  		arg.force_text_diff = force_text_diff;
> +		arg.show_diffstat = show_diffstat;
> +		arg.diffstat = &dsa;
>  		arg.f1 = f1;
>  		arg.f2 = f2;
> +		arg.outfile = outfile;
>  
>  		error = got_worktree_status(worktree, &paths, repo, 0,
>  		    print_diff, &arg, check_cancelled, NULL);
>  		free(id_str);
> +		if (error)
> +			goto done;
> +
> +		if (show_diffstat && dsa.nfiles > 0) {
> +			char *header;
> +
> +			if (asprintf(&header, "diffstat %s%s",
> +			    diff_staged ? "-s " : "",
> +			    got_worktree_get_root_path(worktree)) == -1)
> +				goto done;
> +
> +			error = print_diffstat(&dsa, &diffstat_paths, header);
> +			free(header);
> +			if (error)
> +				goto done;
> +		}
> +
> +		error = printfile(outfile);
>  		goto done;
>  	}
>  
> @@ -5329,24 +5423,45 @@ cmd_diff(int argc, char *argv[])
>  		error = got_diff_objects_as_blobs(NULL, NULL, f1, f2,
>  		    fd1, fd2, ids[0], ids[1], NULL, NULL,
>  		    GOT_DIFF_ALGORITHM_PATIENCE, diff_context,
> -		    ignore_whitespace, force_text_diff, repo, stdout);
> +		    ignore_whitespace, force_text_diff, show_diffstat,
> +		    show_diffstat ? &dsa : NULL, repo, outfile);
>  		break;
>  	case GOT_OBJ_TYPE_TREE:
>  		error = got_diff_objects_as_trees(NULL, NULL, f1, f2, fd1, fd2,
>  		    ids[0], ids[1], &paths, "", "",
>  		    GOT_DIFF_ALGORITHM_PATIENCE, diff_context,
> -		    ignore_whitespace, force_text_diff, repo, stdout);
> +		    ignore_whitespace, force_text_diff, show_diffstat,
> +		    show_diffstat ? &dsa : NULL, repo, outfile);
>  		break;
>  	case GOT_OBJ_TYPE_COMMIT:
> -		printf("diff %s %s\n", labels[0], labels[1]);
> +		fprintf(outfile, "diff %s %s\n", labels[0], labels[1]);
>  		error = got_diff_objects_as_commits(NULL, NULL, f1, f2,
>  		    fd1, fd2, ids[0], ids[1], &paths,
>  		    GOT_DIFF_ALGORITHM_PATIENCE, diff_context,
> -		    ignore_whitespace, force_text_diff, repo, stdout);
> +		    ignore_whitespace, force_text_diff, show_diffstat,
> +		    show_diffstat ? &dsa : NULL, repo, outfile);
>  		break;
>  	default:
>  		error = got_error(GOT_ERR_OBJ_TYPE);
>  	}
> +	if (error)
> +		goto done;
> +
> +	if (show_diffstat && dsa.nfiles > 0) {
> +		char *header = NULL;
> +
> +		if (asprintf(&header, "diffstat %s %s",
> +		    labels[0], labels[1]) == -1)
> +			goto done;
> +
> +		error = print_diffstat(&dsa, &diffstat_paths, header);
> +		free(header);
> +		if (error)
> +			goto done;
> +	}
> +
> +	error = printfile(outfile);
> +
>  done:
>  	free(labels[0]);
>  	free(labels[1]);
> @@ -5368,7 +5483,14 @@ done:
>  	TAILQ_FOREACH(pe, &paths, entry)
>  		free((char *)pe->path);
>  	got_pathlist_free(&paths);
> +	TAILQ_FOREACH(pe, &diffstat_paths, entry) {
> +		free((char *)pe->path);
> +		free(pe->data);
> +	}
> +	got_pathlist_free(&diffstat_paths);
>  	got_ref_list_free(&refs);
> +	if (outfile && fclose(outfile) == EOF && error == NULL)
> +		error = got_error_from_errno("fclose");
>  	if (f1 && fclose(f1) == EOF && error == NULL)
>  		error = got_error_from_errno("fclose");
>  	if (f2 && fclose(f2) == EOF && error == NULL)
> blob - 18fe6a19ccc9b5b2450e313f81d9246ce51b630d
> blob + 1d85a74b4cff17c1a1603795bdddac204025f5fc
> --- include/got_diff.h
> +++ include/got_diff.h
> @@ -44,6 +44,7 @@ struct got_diff_line {
>  	uint8_t	type;
>  };
>  
> +struct got_diffstat_cb_arg;

style nit: I'd leave an empty line between this and the following
comment.

>  /*
>   * Compute the differences between two blobs and write unified diff text
>   * to the provided output file. Two open temporary files must be provided
> [...]
> blob - 5e9010e76c3aa5de6de83e6b5cd54a4749c4f51a
> blob + 709e47798537a5f216664229cc751413b0a66942
> --- lib/diff.c
> +++ lib/diff.c
> [...]
> +static const struct got_error *
>  diff_blobs(struct got_diff_line **lines, size_t *nlines,
>      struct got_diffreg_result **resultp, struct got_blob_object *blob1,
>      struct got_blob_object *blob2, FILE *f1, FILE *f2,
>      const char *label1, const char *label2, mode_t mode1, mode_t mode2,
> -    int diff_context, int ignore_whitespace, int force_text_diff, FILE *outfile,
> +    int diff_context, int ignore_whitespace, int force_text_diff,
> +    int show_diffstat, struct got_diffstat_cb_arg *ds, FILE *outfile,
>      enum got_diff_algorithm diff_algo)
>  {
>  	const struct got_error *err = NULL, *free_err;
> @@ -170,11 +233,60 @@ diff_blobs(struct got_diff_line **lines, size_t *nline
>  		free(modestr1);
>  		free(modestr2);
>  	}
> +
>  	err = got_diffreg(&result, f1, f2, diff_algo, ignore_whitespace,
>  	     force_text_diff);
>  	if (err)
>  		goto done;
>  
> +	if (show_diffstat) {
> +		struct got_diff_changed_path	*change = NULL;
> +		char				*path = NULL;
> +
> +		if (label1 == NULL && label2 == NULL) {
> +			/* diffstat of blobs, show hash instead of path */
> +			if (asprintf(&path, "%.10s -> %.10s",
> +			    idstr1, idstr2) == -1) {
> +				err = got_error_from_errno("asprintf");
> +				goto done;
> +			}
> +		} else {
> +			path = strdup(label2 ? label2 : label1);
> +			if (path == NULL) {
> +				err = got_error_from_errno("malloc");
> +				goto done;
> +			}
> +		}
> +
> +		change = malloc(sizeof(*change));

I'd prefer calloc here, and could drop change->{add,rm} = 0 in
get_diffstat.  Or just pass GOT_STATUS_* to get_diffstat and allocate
`change' there.

Path could also be defined in the function scope and be free'd at the
end, avoiding all these free(path) on every failure point.

(the whole function has IMHO too much allocations saved in narrower
scopes that makes hard to figure out leaks.  Just the chunk before
this change, the one guarded by if (outfile) has several memory leaks
on failures due to modestr1/modestr2 being in a restricted scope.)

> +		if (change == NULL) {
> +			free(path);
> +			err = got_error_from_errno("malloc");
> +			goto done;
> +		}
> +
> +		/*
> +		 * Ignore 'm'ode status change: if there's no accompanying
> +		 * content change, there'll be no diffstat, and if there
> +		 * are actual changes, 'M'odified takes precedence.
> +		 */
> +		change->status = GOT_STATUS_NO_CHANGE;
> +		if (blob1 == NULL)
> +			change->status = GOT_STATUS_ADD;
> +		else if (blob2 == NULL)
> +			change->status = GOT_STATUS_DELETE;
> +		else
> +			change->status = GOT_STATUS_MODIFY;
> +
> +		err = get_diffstat(ds, path, change, result->result,
> +		    force_text_diff);
> +		if (err) {
> +			free(change);
> +			free(path);
> +			goto done;
> +		}
> +	}
> +
>  	if (outfile) {
>  		err = got_diffreg_output(lines, nlines, result,
>  		    blob1 != NULL, blob2 != NULL,
> @@ -208,7 +320,8 @@ got_diff_blob_output_unidiff(void *arg, struct got_blo
>  
>  	return diff_blobs(&a->lines, &a->nlines, NULL,
>  	    blob1, blob2, f1, f2, label1, label2, mode1, mode2, a->diff_context,
> -	    a->ignore_whitespace, a->force_text_diff, a->outfile, a->diff_algo);
> +	    a->ignore_whitespace, a->force_text_diff, a->show_diffstat,
> +	    a->diffstat, a->outfile, a->diff_algo);
>  }
>  
>  const struct got_error *
> @@ -216,11 +329,12 @@ got_diff_blob(struct got_diff_line **lines, size_t*nli
>      struct got_blob_object *blob1, struct got_blob_object *blob2,
>      FILE *f1, FILE *f2, const char *label1, const char *label2,
>      enum got_diff_algorithm diff_algo, int diff_context,
> -    int ignore_whitespace, int force_text_diff, FILE *outfile)
> +    int ignore_whitespace, int force_text_diff, int show_diffstat,
> +    struct got_diffstat_cb_arg *ds, FILE *outfile)
>  {
>  	return diff_blobs(lines, nlines, NULL, blob1, blob2, f1, f2,
>  	    label1, label2, 0, 0, diff_context, ignore_whitespace,
> -	    force_text_diff, outfile, diff_algo);
> +	    force_text_diff, show_diffstat, ds, outfile, diff_algo);
>  }
>  
>  static const struct got_error *
> @@ -228,7 +342,8 @@ diff_blob_file(struct got_diffreg_result **resultp,
>      struct got_blob_object *blob1, FILE *f1, off_t size1, const char *label1,
>      FILE *f2, int f2_exists, struct stat *sb2, const char *label2,
>      enum got_diff_algorithm diff_algo, int diff_context, int ignore_whitespace,
> -    int force_text_diff, FILE *outfile)
> +    int force_text_diff, int show_diffstat, struct got_diffstat_cb_arg *ds,
> +    FILE *outfile)
>  {
>  	const struct got_error *err = NULL, *free_err;
>  	char hex1[SHA1_DIGEST_STRING_LENGTH];
> @@ -277,6 +392,45 @@ done:
>  			goto done;
>  	}
>  
> +	if (show_diffstat) {
> +		struct got_diff_changed_path	*change = NULL;
> +		char				*path = NULL;
> +
> +		path = strdup(label2 ? label2 : label1);
> +		if (path == NULL) {
> +			err = got_error_from_errno("malloc");
> +			goto done;
> +		}
> +
> +		change = malloc(sizeof(*change));

ditto.  (including the suggestion to move change to get_diffstat and
path at function scope.)

> +		if (change == NULL) {
> +			free(path);
> +			err = got_error_from_errno("malloc");
> +			goto done;
> +		}
> +
> +		/*
> +		 * Ignore 'm'ode status change: if there's no accompanying
> +		 * content change, there'll be no diffstat, and if there
> +		 * are actual changes, 'M'odified takes precedence.
> +		 */
> +		change->status = GOT_STATUS_NO_CHANGE;
> +		if (blob1 == NULL)
> +			change->status = GOT_STATUS_ADD;
> +		else if (!f2_exists)
> +			change->status = GOT_STATUS_DELETE;
> +		else
> +			change->status = GOT_STATUS_MODIFY;
> +
> +		err = get_diffstat(ds, path, change, result->result,
> +		    force_text_diff);
> +		if (err) {
> +			free(change);
> +			free(path);
> +			goto done;
> +		}
> +	}
> +
>  done:
>  	if (resultp && err == NULL)
>  		*resultp = result;