"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: changed paths in 'got log' and tog
To:
Matthieu Herrb <matthieu@herrb.eu>, gameoftrees@openbsd.org, matthieu@openbsd.org
Date:
Tue, 5 May 2020 09:55:35 +0200

Download raw body.

Thread
On Tue, May 05, 2020 at 09:37:58AM +0200, Stefan Sperling wrote:
> Thank you for catching this. I forgot to clear the list of paths collected
> for the current commit when the current commit failed to match. So while
 
> Fixed in the new version of my patch below:

I found another problem: Performance was worse than expected because
diffing of file content was accidentally enabled during 'log -P'.

In this version of the patch we only diff tree entry IDs and skip diffing
file content, which is a lot faster and especially noticeable when searching.

diff refs/heads/master refs/heads/changed-paths
blob - b6a8a5486fe953fc59245fdd11ce1796d3f9b0a1
blob + 37523a3f4cb248caf256b0e5a47510f04058befc
--- got/got.1
+++ got/got.1
@@ -683,7 +683,7 @@ in a pattern.
 .It Cm st
 Short alias for
 .Cm status .
-.It Cm log Oo Fl b Oc Oo Fl c Ar commit Oc Oo Fl C Ar number Oc Oo Fl l Ar N Oc Oo Fl p Oc Oo Fl s Ar search-pattern Oc Oo Fl r Ar repository-path Oc Oo Fl R Oc Oo Fl x Ar commit Oc Op Ar path
+.It Cm log Oo Fl b Oc Oo Fl c Ar commit Oc Oo Fl C Ar number Oc Oo Fl l Ar N Oc Oo Fl p Oc Oo Fl P Oc Oo Fl s Ar search-pattern Oc Oo Fl r Ar repository-path Oc Oo Fl R Oc Oo Fl x Ar commit Oc Op Ar path
 Display history of a repository.
 If a
 .Ar path
@@ -729,10 +729,22 @@ Display the patch of modifications made in each commit
 If a
 .Ar path
 is specified, only show the patch of modifications at or within this path.
+.It Fl P
+Display the list of file paths changed in each commit, using the following
+status codes:
+.Bl -column YXZ description
+.It M Ta modified file
+.It D Ta file was deleted
+.It A Ta new file was added
+.It m Ta modified file modes (executable bit only)
+.El
 .It Fl s Ar search-pattern
 If specified, show only commits with a log message matched by the extended
 regular expression
 .Ar search-pattern .
+When used together with
+.Fl P
+then the file paths changed by a commit can be matched as well.
 Regular expression syntax is documented in
 .Xr re_format 7 .
 .It Fl r Ar repository-path
blob - 874e65af899a466219bd8c3a9b1e4a2c30a94d62
blob + 59658b35760c486d0d715c2ec24cb57826fc7739
--- got/got.c
+++ got/got.c
@@ -2892,6 +2892,49 @@ done:
 }
 
 static const struct got_error *
+get_changed_paths(struct got_pathlist_head *paths,
+    struct got_commit_object *commit, struct got_repository *repo)
+{
+	const struct got_error *err = NULL;
+	struct got_object_id *tree_id1 = NULL, *tree_id2 = NULL;
+	struct got_tree_object *tree1 = NULL, *tree2 = NULL;
+	struct got_object_qid *qid;
+
+	qid = SIMPLEQ_FIRST(got_object_commit_get_parent_ids(commit));
+	if (qid != NULL) {
+		struct got_commit_object *pcommit;
+		err = got_object_open_as_commit(&pcommit, repo,
+		    qid->id);
+		if (err)
+			return err;
+
+		tree_id1 = got_object_commit_get_tree_id(pcommit);
+		got_object_commit_close(pcommit);
+
+	}
+
+	if (tree_id1) {
+		err = got_object_open_as_tree(&tree1, repo, tree_id1);
+		if (err)
+			goto done;
+	}
+
+	tree_id2 = got_object_commit_get_tree_id(commit);
+	err = got_object_open_as_tree(&tree2, repo, tree_id2);
+	if (err)
+		goto done;
+
+	err = got_diff_tree(tree1, tree2, "", "", repo,
+	    got_diff_tree_collect_changed_paths, paths, 0);
+done:
+	if (tree1)
+		got_object_tree_close(tree1);
+	if (tree2)
+		got_object_tree_close(tree2);
+	return err;
+}
+
+static const struct got_error *
 print_patch(struct got_commit_object *commit, struct got_object_id *id,
     const char *path, int diff_context, struct got_repository *repo)
 {
@@ -3020,11 +3063,29 @@ done:
 	return err;
 }
 
+static void
+match_changed_paths(int *have_match, struct got_pathlist_head *changed_paths,
+    regex_t *regex)
+{
+	regmatch_t regmatch;
+	struct got_pathlist_entry *pe;
+
+	*have_match = 0;
+
+	TAILQ_FOREACH(pe, changed_paths, entry) {
+		if (regexec(regex, pe->path, 1, &regmatch, 0) == 0) {
+			*have_match = 1;
+			break;
+		}
+	}
+}
+
 #define GOT_COMMIT_SEP_STR "-----------------------------------------------\n"
 
 static const struct got_error *
 print_commit(struct got_commit_object *commit, struct got_object_id *id,
-    struct got_repository *repo, const char *path, int show_patch,
+    struct got_repository *repo, const char *path,
+    struct got_pathlist_head *changed_paths, int show_patch,
     int diff_context, struct got_reflist_head *refs)
 {
 	const struct got_error *err = NULL;
@@ -3127,6 +3188,14 @@ print_commit(struct got_commit_object *commit, struct 
 	} while (line);
 	free(logmsg0);
 
+	if (changed_paths) {
+		struct got_pathlist_entry *pe;
+		TAILQ_FOREACH(pe, changed_paths, entry) {
+			struct got_diff_changed_path *cp = pe->data;
+			printf(" %c  %s\n", cp->status, pe->path);
+		}
+		printf("\n");
+	}
 	if (show_patch) {
 		err = print_patch(commit, id, path, diff_context, repo);
 		if (err == 0)
@@ -3140,9 +3209,9 @@ print_commit(struct got_commit_object *commit, struct 
 
 static const struct got_error *
 print_commits(struct got_object_id *root_id, struct got_object_id *end_id,
-    struct got_repository *repo, const char *path, int show_patch,
-    const char *search_pattern, int diff_context, int limit, int log_branches,
-    int reverse_display_order, struct got_reflist_head *refs)
+    struct got_repository *repo, const char *path, int show_changed_paths,
+    int show_patch, const char *search_pattern, int diff_context, int limit,
+    int log_branches, int reverse_display_order, struct got_reflist_head *refs)
 {
 	const struct got_error *err;
 	struct got_commit_graph *graph;
@@ -3151,8 +3220,11 @@ print_commits(struct got_object_id *root_id, struct go
 	struct got_object_id_queue reversed_commits;
 	struct got_object_qid *qid;
 	struct got_commit_object *commit;
+	struct got_pathlist_head changed_paths;
+	struct got_pathlist_entry *pe;
 
 	SIMPLEQ_INIT(&reversed_commits);
+	TAILQ_INIT(&changed_paths);
 
 	if (search_pattern && regcomp(&regex, search_pattern,
 	    REG_EXTENDED | REG_NOSUB | REG_NEWLINE))
@@ -3185,14 +3257,28 @@ print_commits(struct got_object_id *root_id, struct go
 		if (err)
 			break;
 
+		if (show_changed_paths) {
+			err = get_changed_paths(&changed_paths, commit, repo);
+			if (err)
+				break;
+		}
+
 		if (search_pattern) {
 			err = match_logmsg(&have_match, id, commit, &regex);
 			if (err) {
 				got_object_commit_close(commit);
 				break;
 			}
+			if (have_match == 0 && show_changed_paths)
+				match_changed_paths(&have_match,
+				    &changed_paths, &regex);
 			if (have_match == 0) {
 				got_object_commit_close(commit);
+				TAILQ_FOREACH(pe, &changed_paths, entry) {
+					free((char *)pe->path);
+					free(pe->data);
+				}
+				got_pathlist_free(&changed_paths);
 				continue;
 			}
 		}
@@ -3204,8 +3290,9 @@ print_commits(struct got_object_id *root_id, struct go
 			SIMPLEQ_INSERT_HEAD(&reversed_commits, qid, entry);
 			got_object_commit_close(commit);
 		} else {
-			err = print_commit(commit, id, repo, path, show_patch,
-			    diff_context, refs);
+			err = print_commit(commit, id, repo, path,
+			    show_changed_paths ? &changed_paths : NULL,
+			    show_patch, diff_context, refs);
 			got_object_commit_close(commit);
 			if (err)
 				break;
@@ -3213,6 +3300,12 @@ print_commits(struct got_object_id *root_id, struct go
 		if ((limit && --limit == 0) ||
 		    (end_id && got_object_id_cmp(id, end_id) == 0))
 			break;
+
+		TAILQ_FOREACH(pe, &changed_paths, entry) {
+			free((char *)pe->path);
+			free(pe->data);
+		}
+		got_pathlist_free(&changed_paths);
 	}
 	if (reverse_display_order) {
 		SIMPLEQ_FOREACH(qid, &reversed_commits, entry) {
@@ -3220,6 +3313,7 @@ print_commits(struct got_object_id *root_id, struct go
 			if (err)
 				break;
 			err = print_commit(commit, qid->id, repo, path,
+			    show_changed_paths ? &changed_paths : NULL,
 			    show_patch, diff_context, refs);
 			got_object_commit_close(commit);
 			if (err)
@@ -3232,6 +3326,11 @@ done:
 		SIMPLEQ_REMOVE_HEAD(&reversed_commits, entry);
 		got_object_qid_free(qid);
 	}
+	TAILQ_FOREACH(pe, &changed_paths, entry) {
+		free((char *)pe->path);
+		free(pe->data);
+	}
+	got_pathlist_free(&changed_paths);
 	if (search_pattern)
 		regfree(&regex);
 	got_commit_graph_close(graph);
@@ -3242,8 +3341,8 @@ __dead static void
 usage_log(void)
 {
 	fprintf(stderr, "usage: %s log [-b] [-c commit] [-C number] [ -l N ] "
-	    "[-p] [-x commit] [-s search-pattern] [-r repository-path] [-R] "
-	    "[path]\n", getprogname());
+	    "[-p] [-P] [-x commit] [-s search-pattern] [-r repository-path] "
+	    "[-R] [path]\n", getprogname());
 	exit(1);
 }
 
@@ -3322,7 +3421,7 @@ cmd_log(int argc, char *argv[])
 	const char *start_commit = NULL, *end_commit = NULL;
 	const char *search_pattern = NULL;
 	int diff_context = -1, ch;
-	int show_patch = 0, limit = 0, log_branches = 0;
+	int show_changed_paths = 0, show_patch = 0, limit = 0, log_branches = 0;
 	int reverse_display_order = 0;
 	const char *errstr;
 	struct got_reflist_head refs;
@@ -3338,11 +3437,14 @@ cmd_log(int argc, char *argv[])
 
 	limit = get_default_log_limit();
 
-	while ((ch = getopt(argc, argv, "bpc:C:l:r:Rs:x:")) != -1) {
+	while ((ch = getopt(argc, argv, "bpPc:C:l:r:Rs:x:")) != -1) {
 		switch (ch) {
 		case 'p':
 			show_patch = 1;
 			break;
+		case 'P':
+			show_changed_paths = 1;
+			break;
 		case 'c':
 			start_commit = optarg;
 			break;
@@ -3494,8 +3596,8 @@ cmd_log(int argc, char *argv[])
 	if (error)
 		goto done;
 
-	error = print_commits(start_id, end_id, repo, path, show_patch,
-	    search_pattern, diff_context, limit, log_branches,
+	error = print_commits(start_id, end_id, repo, path, show_changed_paths,
+	    show_patch, search_pattern, diff_context, limit, log_branches,
 	    reverse_display_order, &refs);
 done:
 	free(path);
blob - d7f3479403b5d8286740e682f101b99f5f2782b4
blob + fd8185fd7383c6da58510578d7d7f7217653f964
--- include/got_diff.h
+++ include/got_diff.h
@@ -79,6 +79,27 @@ const struct got_error *got_diff_tree(struct got_tree_
     struct got_repository *, got_diff_blob_cb cb, void *cb_arg, int);
 
 /*
+ * A pre-defined implementation of got_diff_blob_cb() which collects a list
+ * of file paths that differ between two trees.
+ * The caller must allocate and initialize a got_pathlist_head * argument.
+ * Data pointers of entries added to the path list will point to a struct
+ * got_diff_changed_path object.
+ * The caller is expected to free both the path and data pointers of all
+ * entries on the path list.
+ */
+struct got_diff_changed_path {
+	/*
+	 * The modification status of this path. It can be GOT_STATUS_ADD,
+	 * GOT_STATUS_DELETE, GOT_STATUS_MODIFY, or GOT_STATUS_MODE_CHANGE.
+	 */
+	int status;
+};
+const struct got_error *got_diff_tree_collect_changed_paths(void *,
+    struct got_blob_object *, struct got_blob_object *,
+    struct got_object_id *, struct got_object_id *,
+    const char *, const char *, mode_t, mode_t, struct got_repository *);
+
+/*
  * Diff two objects, assuming both objects are blobs. Two const char * diff
  * header labels may be provided which will be used to identify each blob in
  * the diff output. If a label is NULL, use the blob's SHA1 checksum instead.
blob - 7874be93a5e85387f051461ac0865dfb7e514500
blob + 152352cad8daa30cc23016c559df4f22a087bd74
--- lib/diff.c
+++ lib/diff.c
@@ -30,6 +30,8 @@
 #include "got_diff.h"
 #include "got_opentemp.h"
 #include "got_path.h"
+#include "got_cancel.h"
+#include "got_worktree.h"
 
 #include "got_lib_diff.h"
 #include "got_lib_delta.h"
@@ -591,6 +593,48 @@ diff_entry_new_old(struct got_tree_entry *te2,
 
 	return cb(cb_arg, NULL, NULL, NULL, &te2->id, NULL, label2, 0,
 	    te2->mode, repo);
+}
+
+const struct got_error *
+got_diff_tree_collect_changed_paths(void *arg, struct got_blob_object *blob1,
+    struct got_blob_object *blob2, struct got_object_id *id1,
+    struct got_object_id *id2, const char *label1, const char *label2,
+    mode_t mode1, mode_t mode2, struct got_repository *repo)
+{
+	const struct got_error *err = NULL;
+	struct got_pathlist_head *paths = arg;
+	struct got_diff_changed_path *change = NULL;
+	char *path = NULL;
+
+	path = strdup(label2 ? label2 : label1);
+	if (path == NULL)
+		return got_error_from_errno("malloc");
+
+	change = malloc(sizeof(*change));
+	if (change == NULL) {
+		err = got_error_from_errno("malloc");
+		goto done;
+	}
+
+	change->status = GOT_STATUS_NO_CHANGE;
+	if (id1 == NULL)
+		change->status = GOT_STATUS_ADD;
+	else if (id2 == NULL)
+		change->status = GOT_STATUS_DELETE;
+	else {
+		if (got_object_id_cmp(id1, id2) != 0)
+			change->status = GOT_STATUS_MODIFY;
+		else if (mode1 != mode2)
+			change->status = GOT_STATUS_MODE_CHANGE;
+	}
+
+	err = got_pathlist_insert(NULL, paths, path, change);
+done:
+	if (err) {
+		free(path);
+		free(change);
+	}
+	return err;
 }
 
 const struct got_error *
blob - b05b0f42244e812865a92ed40ef8eba754724e6c
blob + d88d0fa11e29a8dc0efae3812238dcf959a1891b
--- regress/cmdline/log.sh
+++ regress/cmdline/log.sh
@@ -646,6 +646,50 @@ function test_log_in_worktree_different_repo {
 	test_done "$testroot" "0"
 }
 
+function test_log_changed_paths {
+	local testroot=`test_init log_changed_paths`
+	local commit_id0=`git_show_head $testroot/repo`
+
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	echo "modified alpha" > $testroot/wt/alpha
+	(cd $testroot/wt && got commit -m 'test log_changed_paths' > /dev/null)
+	local commit_id1=`git_show_head $testroot/repo`
+
+	(cd $testroot/wt && got rm beta >/dev/null)
+	(cd $testroot/wt && chmod +x epsilon/zeta >/dev/null)
+	(cd $testroot/wt && got commit -m 'test log_changed_paths' > /dev/null)
+	local commit_id2=`git_show_head $testroot/repo`
+
+	echo "new file" > $testroot/wt/new
+	(cd $testroot/wt && got add new >/dev/null)
+	(cd $testroot/wt && got commit -m 'test log_changed_paths' > /dev/null)
+	local commit_id3=`git_show_head $testroot/repo`
+
+	(cd $testroot/wt && got log -P | grep '^ [MDmA]' > $testroot/stdout)
+
+	echo " A  new" > $testroot/stdout.expected
+	echo " D  beta" >> $testroot/stdout.expected
+	echo " m  epsilon/zeta" >> $testroot/stdout.expected
+	echo " M  alpha" >> $testroot/stdout.expected
+	echo " A  alpha" >> $testroot/stdout.expected
+	echo " A  beta" >> $testroot/stdout.expected
+	echo " A  epsilon/zeta" >> $testroot/stdout.expected
+	echo " A  gamma/delta" >> $testroot/stdout.expected
+
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+	fi
+	test_done "$testroot" "$ret"
+}
+
 run_test test_log_in_repo
 run_test test_log_in_bare_repo
 run_test test_log_in_worktree
@@ -657,3 +701,4 @@ run_test test_log_nonexistent_path
 run_test test_log_end_at_commit
 run_test test_log_reverse_display
 run_test test_log_in_worktree_different_repo
+run_test test_log_changed_paths
blob - 59e36d0a3c8e1f763e1bb8cc6e6290a98b85ab53
blob + de21c04b04cf92a585fa3fd019dce29f5c235ff7
--- tog/tog.c
+++ tog/tog.c
@@ -2894,6 +2894,49 @@ get_datestr(time_t *time, char *datebuf)
 }
 
 static const struct got_error *
+get_changed_paths(struct got_pathlist_head *paths,
+    struct got_commit_object *commit, struct got_repository *repo)
+{
+	const struct got_error *err = NULL;
+	struct got_object_id *tree_id1 = NULL, *tree_id2 = NULL;
+	struct got_tree_object *tree1 = NULL, *tree2 = NULL;
+	struct got_object_qid *qid;
+
+	qid = SIMPLEQ_FIRST(got_object_commit_get_parent_ids(commit));
+	if (qid != NULL) {
+		struct got_commit_object *pcommit;
+		err = got_object_open_as_commit(&pcommit, repo,
+		    qid->id);
+		if (err)
+			return err;
+
+		tree_id1 = got_object_commit_get_tree_id(pcommit);
+		got_object_commit_close(pcommit);
+
+	}
+
+	if (tree_id1) {
+		err = got_object_open_as_tree(&tree1, repo, tree_id1);
+		if (err)
+			goto done;
+	}
+
+	tree_id2 = got_object_commit_get_tree_id(commit);
+	err = got_object_open_as_tree(&tree2, repo, tree_id2);
+	if (err)
+		goto done;
+
+	err = got_diff_tree(tree1, tree2, "", "", repo,
+	    got_diff_tree_collect_changed_paths, paths, 0);
+done:
+	if (tree1)
+		got_object_tree_close(tree1);
+	if (tree2)
+		got_object_tree_close(tree2);
+	return err;
+}
+
+static const struct got_error *
 write_commit_info(struct got_object_id *commit_id,
     struct got_reflist_head *refs, struct got_repository *repo, FILE *outfile)
 {
@@ -2904,7 +2947,11 @@ write_commit_info(struct got_object_id *commit_id,
 	time_t committer_time;
 	const char *author, *committer;
 	char *refs_str = NULL;
+	struct got_pathlist_head changed_paths;
+	struct got_pathlist_entry *pe;
 
+	TAILQ_INIT(&changed_paths);
+
 	if (refs) {
 		err = build_refs_str(&refs_str, refs, commit_id, repo);
 		if (err)
@@ -2951,7 +2998,18 @@ write_commit_info(struct got_object_id *commit_id,
 		err = got_error_from_errno("fprintf");
 		goto done;
 	}
+	err = get_changed_paths(&changed_paths, commit, repo);
+	if (err)
+		goto done;
+	TAILQ_FOREACH(pe, &changed_paths, entry) {
+		struct got_diff_changed_path *cp = pe->data;
+		fprintf(outfile, "%c  %s\n", cp->status, pe->path);
+		free((char *)pe->path);
+		free(pe->data);
+	}
+	fputc('\n', outfile);
 done:
+	got_pathlist_free(&changed_paths);
 	free(id_str);
 	free(logmsg);
 	free(refs_str);
@@ -3089,7 +3147,7 @@ create_diff(struct tog_diff_view_state *s)
 			goto done;
 		/* Show commit info if we're diffing to a parent/root commit. */
 		if (s->id1 == NULL) {
-			err =write_commit_info(s->id2, s->refs, s->repo, s->f);
+			err = write_commit_info(s->id2, s->refs, s->repo, s->f);
 			if (err)
 				goto done;
 		} else {
@@ -3282,7 +3340,8 @@ open_diff_view(struct tog_view *view, struct got_objec
 		}
 
 		err = add_color(&s->colors,
-		    "^(commit|(blob|file) [-+] )", TOG_COLOR_DIFF_META,
+		    "^(commit|(blob|file) [-+] |[MDmA]  [^ ])",
+		    TOG_COLOR_DIFF_META,
 		    get_color_value("TOG_COLOR_DIFF_META"));
 		if (err) {
 			free_colors(&s->colors);