From: Stefan Sperling Subject: Re: changed paths in 'got log' and tog To: Matthieu Herrb , gameoftrees@openbsd.org, matthieu@openbsd.org Date: Tue, 5 May 2020 09:55:35 +0200 On Tue, May 05, 2020 at 09:37:58AM +0200, Stefan Sperling wrote: > Thank you for catching this. I forgot to clear the list of paths collected > for the current commit when the current commit failed to match. So while > Fixed in the new version of my patch below: I found another problem: Performance was worse than expected because diffing of file content was accidentally enabled during 'log -P'. In this version of the patch we only diff tree entry IDs and skip diffing file content, which is a lot faster and especially noticeable when searching. diff refs/heads/master refs/heads/changed-paths blob - b6a8a5486fe953fc59245fdd11ce1796d3f9b0a1 blob + 37523a3f4cb248caf256b0e5a47510f04058befc --- got/got.1 +++ got/got.1 @@ -683,7 +683,7 @@ in a pattern. .It Cm st Short alias for .Cm status . -.It Cm log Oo Fl b Oc Oo Fl c Ar commit Oc Oo Fl C Ar number Oc Oo Fl l Ar N Oc Oo Fl p Oc Oo Fl s Ar search-pattern Oc Oo Fl r Ar repository-path Oc Oo Fl R Oc Oo Fl x Ar commit Oc Op Ar path +.It Cm log Oo Fl b Oc Oo Fl c Ar commit Oc Oo Fl C Ar number Oc Oo Fl l Ar N Oc Oo Fl p Oc Oo Fl P Oc Oo Fl s Ar search-pattern Oc Oo Fl r Ar repository-path Oc Oo Fl R Oc Oo Fl x Ar commit Oc Op Ar path Display history of a repository. If a .Ar path @@ -729,10 +729,22 @@ Display the patch of modifications made in each commit If a .Ar path is specified, only show the patch of modifications at or within this path. +.It Fl P +Display the list of file paths changed in each commit, using the following +status codes: +.Bl -column YXZ description +.It M Ta modified file +.It D Ta file was deleted +.It A Ta new file was added +.It m Ta modified file modes (executable bit only) +.El .It Fl s Ar search-pattern If specified, show only commits with a log message matched by the extended regular expression .Ar search-pattern . +When used together with +.Fl P +then the file paths changed by a commit can be matched as well. Regular expression syntax is documented in .Xr re_format 7 . .It Fl r Ar repository-path blob - 874e65af899a466219bd8c3a9b1e4a2c30a94d62 blob + 59658b35760c486d0d715c2ec24cb57826fc7739 --- got/got.c +++ got/got.c @@ -2892,6 +2892,49 @@ done: } static const struct got_error * +get_changed_paths(struct got_pathlist_head *paths, + struct got_commit_object *commit, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_object_id *tree_id1 = NULL, *tree_id2 = NULL; + struct got_tree_object *tree1 = NULL, *tree2 = NULL; + struct got_object_qid *qid; + + qid = SIMPLEQ_FIRST(got_object_commit_get_parent_ids(commit)); + if (qid != NULL) { + struct got_commit_object *pcommit; + err = got_object_open_as_commit(&pcommit, repo, + qid->id); + if (err) + return err; + + tree_id1 = got_object_commit_get_tree_id(pcommit); + got_object_commit_close(pcommit); + + } + + if (tree_id1) { + err = got_object_open_as_tree(&tree1, repo, tree_id1); + if (err) + goto done; + } + + tree_id2 = got_object_commit_get_tree_id(commit); + err = got_object_open_as_tree(&tree2, repo, tree_id2); + if (err) + goto done; + + err = got_diff_tree(tree1, tree2, "", "", repo, + got_diff_tree_collect_changed_paths, paths, 0); +done: + if (tree1) + got_object_tree_close(tree1); + if (tree2) + got_object_tree_close(tree2); + return err; +} + +static const struct got_error * print_patch(struct got_commit_object *commit, struct got_object_id *id, const char *path, int diff_context, struct got_repository *repo) { @@ -3020,11 +3063,29 @@ done: return err; } +static void +match_changed_paths(int *have_match, struct got_pathlist_head *changed_paths, + regex_t *regex) +{ + regmatch_t regmatch; + struct got_pathlist_entry *pe; + + *have_match = 0; + + TAILQ_FOREACH(pe, changed_paths, entry) { + if (regexec(regex, pe->path, 1, ®match, 0) == 0) { + *have_match = 1; + break; + } + } +} + #define GOT_COMMIT_SEP_STR "-----------------------------------------------\n" static const struct got_error * print_commit(struct got_commit_object *commit, struct got_object_id *id, - struct got_repository *repo, const char *path, int show_patch, + struct got_repository *repo, const char *path, + struct got_pathlist_head *changed_paths, int show_patch, int diff_context, struct got_reflist_head *refs) { const struct got_error *err = NULL; @@ -3127,6 +3188,14 @@ print_commit(struct got_commit_object *commit, struct } while (line); free(logmsg0); + if (changed_paths) { + struct got_pathlist_entry *pe; + TAILQ_FOREACH(pe, changed_paths, entry) { + struct got_diff_changed_path *cp = pe->data; + printf(" %c %s\n", cp->status, pe->path); + } + printf("\n"); + } if (show_patch) { err = print_patch(commit, id, path, diff_context, repo); if (err == 0) @@ -3140,9 +3209,9 @@ print_commit(struct got_commit_object *commit, struct static const struct got_error * print_commits(struct got_object_id *root_id, struct got_object_id *end_id, - struct got_repository *repo, const char *path, int show_patch, - const char *search_pattern, int diff_context, int limit, int log_branches, - int reverse_display_order, struct got_reflist_head *refs) + struct got_repository *repo, const char *path, int show_changed_paths, + int show_patch, const char *search_pattern, int diff_context, int limit, + int log_branches, int reverse_display_order, struct got_reflist_head *refs) { const struct got_error *err; struct got_commit_graph *graph; @@ -3151,8 +3220,11 @@ print_commits(struct got_object_id *root_id, struct go struct got_object_id_queue reversed_commits; struct got_object_qid *qid; struct got_commit_object *commit; + struct got_pathlist_head changed_paths; + struct got_pathlist_entry *pe; SIMPLEQ_INIT(&reversed_commits); + TAILQ_INIT(&changed_paths); if (search_pattern && regcomp(®ex, search_pattern, REG_EXTENDED | REG_NOSUB | REG_NEWLINE)) @@ -3185,14 +3257,28 @@ print_commits(struct got_object_id *root_id, struct go if (err) break; + if (show_changed_paths) { + err = get_changed_paths(&changed_paths, commit, repo); + if (err) + break; + } + if (search_pattern) { err = match_logmsg(&have_match, id, commit, ®ex); if (err) { got_object_commit_close(commit); break; } + if (have_match == 0 && show_changed_paths) + match_changed_paths(&have_match, + &changed_paths, ®ex); if (have_match == 0) { got_object_commit_close(commit); + TAILQ_FOREACH(pe, &changed_paths, entry) { + free((char *)pe->path); + free(pe->data); + } + got_pathlist_free(&changed_paths); continue; } } @@ -3204,8 +3290,9 @@ print_commits(struct got_object_id *root_id, struct go SIMPLEQ_INSERT_HEAD(&reversed_commits, qid, entry); got_object_commit_close(commit); } else { - err = print_commit(commit, id, repo, path, show_patch, - diff_context, refs); + err = print_commit(commit, id, repo, path, + show_changed_paths ? &changed_paths : NULL, + show_patch, diff_context, refs); got_object_commit_close(commit); if (err) break; @@ -3213,6 +3300,12 @@ print_commits(struct got_object_id *root_id, struct go if ((limit && --limit == 0) || (end_id && got_object_id_cmp(id, end_id) == 0)) break; + + TAILQ_FOREACH(pe, &changed_paths, entry) { + free((char *)pe->path); + free(pe->data); + } + got_pathlist_free(&changed_paths); } if (reverse_display_order) { SIMPLEQ_FOREACH(qid, &reversed_commits, entry) { @@ -3220,6 +3313,7 @@ print_commits(struct got_object_id *root_id, struct go if (err) break; err = print_commit(commit, qid->id, repo, path, + show_changed_paths ? &changed_paths : NULL, show_patch, diff_context, refs); got_object_commit_close(commit); if (err) @@ -3232,6 +3326,11 @@ done: SIMPLEQ_REMOVE_HEAD(&reversed_commits, entry); got_object_qid_free(qid); } + TAILQ_FOREACH(pe, &changed_paths, entry) { + free((char *)pe->path); + free(pe->data); + } + got_pathlist_free(&changed_paths); if (search_pattern) regfree(®ex); got_commit_graph_close(graph); @@ -3242,8 +3341,8 @@ __dead static void usage_log(void) { fprintf(stderr, "usage: %s log [-b] [-c commit] [-C number] [ -l N ] " - "[-p] [-x commit] [-s search-pattern] [-r repository-path] [-R] " - "[path]\n", getprogname()); + "[-p] [-P] [-x commit] [-s search-pattern] [-r repository-path] " + "[-R] [path]\n", getprogname()); exit(1); } @@ -3322,7 +3421,7 @@ cmd_log(int argc, char *argv[]) const char *start_commit = NULL, *end_commit = NULL; const char *search_pattern = NULL; int diff_context = -1, ch; - int show_patch = 0, limit = 0, log_branches = 0; + int show_changed_paths = 0, show_patch = 0, limit = 0, log_branches = 0; int reverse_display_order = 0; const char *errstr; struct got_reflist_head refs; @@ -3338,11 +3437,14 @@ cmd_log(int argc, char *argv[]) limit = get_default_log_limit(); - while ((ch = getopt(argc, argv, "bpc:C:l:r:Rs:x:")) != -1) { + while ((ch = getopt(argc, argv, "bpPc:C:l:r:Rs:x:")) != -1) { switch (ch) { case 'p': show_patch = 1; break; + case 'P': + show_changed_paths = 1; + break; case 'c': start_commit = optarg; break; @@ -3494,8 +3596,8 @@ cmd_log(int argc, char *argv[]) if (error) goto done; - error = print_commits(start_id, end_id, repo, path, show_patch, - search_pattern, diff_context, limit, log_branches, + error = print_commits(start_id, end_id, repo, path, show_changed_paths, + show_patch, search_pattern, diff_context, limit, log_branches, reverse_display_order, &refs); done: free(path); blob - d7f3479403b5d8286740e682f101b99f5f2782b4 blob + fd8185fd7383c6da58510578d7d7f7217653f964 --- include/got_diff.h +++ include/got_diff.h @@ -79,6 +79,27 @@ const struct got_error *got_diff_tree(struct got_tree_ struct got_repository *, got_diff_blob_cb cb, void *cb_arg, int); /* + * A pre-defined implementation of got_diff_blob_cb() which collects a list + * of file paths that differ between two trees. + * The caller must allocate and initialize a got_pathlist_head * argument. + * Data pointers of entries added to the path list will point to a struct + * got_diff_changed_path object. + * The caller is expected to free both the path and data pointers of all + * entries on the path list. + */ +struct got_diff_changed_path { + /* + * The modification status of this path. It can be GOT_STATUS_ADD, + * GOT_STATUS_DELETE, GOT_STATUS_MODIFY, or GOT_STATUS_MODE_CHANGE. + */ + int status; +}; +const struct got_error *got_diff_tree_collect_changed_paths(void *, + struct got_blob_object *, struct got_blob_object *, + struct got_object_id *, struct got_object_id *, + const char *, const char *, mode_t, mode_t, struct got_repository *); + +/* * Diff two objects, assuming both objects are blobs. Two const char * diff * header labels may be provided which will be used to identify each blob in * the diff output. If a label is NULL, use the blob's SHA1 checksum instead. blob - 7874be93a5e85387f051461ac0865dfb7e514500 blob + 152352cad8daa30cc23016c559df4f22a087bd74 --- lib/diff.c +++ lib/diff.c @@ -30,6 +30,8 @@ #include "got_diff.h" #include "got_opentemp.h" #include "got_path.h" +#include "got_cancel.h" +#include "got_worktree.h" #include "got_lib_diff.h" #include "got_lib_delta.h" @@ -591,6 +593,48 @@ diff_entry_new_old(struct got_tree_entry *te2, return cb(cb_arg, NULL, NULL, NULL, &te2->id, NULL, label2, 0, te2->mode, repo); +} + +const struct got_error * +got_diff_tree_collect_changed_paths(void *arg, struct got_blob_object *blob1, + struct got_blob_object *blob2, struct got_object_id *id1, + struct got_object_id *id2, const char *label1, const char *label2, + mode_t mode1, mode_t mode2, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_pathlist_head *paths = arg; + struct got_diff_changed_path *change = NULL; + char *path = NULL; + + path = strdup(label2 ? label2 : label1); + if (path == NULL) + return got_error_from_errno("malloc"); + + change = malloc(sizeof(*change)); + if (change == NULL) { + err = got_error_from_errno("malloc"); + goto done; + } + + change->status = GOT_STATUS_NO_CHANGE; + if (id1 == NULL) + change->status = GOT_STATUS_ADD; + else if (id2 == NULL) + change->status = GOT_STATUS_DELETE; + else { + if (got_object_id_cmp(id1, id2) != 0) + change->status = GOT_STATUS_MODIFY; + else if (mode1 != mode2) + change->status = GOT_STATUS_MODE_CHANGE; + } + + err = got_pathlist_insert(NULL, paths, path, change); +done: + if (err) { + free(path); + free(change); + } + return err; } const struct got_error * blob - b05b0f42244e812865a92ed40ef8eba754724e6c blob + d88d0fa11e29a8dc0efae3812238dcf959a1891b --- regress/cmdline/log.sh +++ regress/cmdline/log.sh @@ -646,6 +646,50 @@ function test_log_in_worktree_different_repo { test_done "$testroot" "0" } +function test_log_changed_paths { + local testroot=`test_init log_changed_paths` + local commit_id0=`git_show_head $testroot/repo` + + got checkout $testroot/repo $testroot/wt > /dev/null + ret="$?" + if [ "$ret" != "0" ]; then + test_done "$testroot" "$ret" + return 1 + fi + + echo "modified alpha" > $testroot/wt/alpha + (cd $testroot/wt && got commit -m 'test log_changed_paths' > /dev/null) + local commit_id1=`git_show_head $testroot/repo` + + (cd $testroot/wt && got rm beta >/dev/null) + (cd $testroot/wt && chmod +x epsilon/zeta >/dev/null) + (cd $testroot/wt && got commit -m 'test log_changed_paths' > /dev/null) + local commit_id2=`git_show_head $testroot/repo` + + echo "new file" > $testroot/wt/new + (cd $testroot/wt && got add new >/dev/null) + (cd $testroot/wt && got commit -m 'test log_changed_paths' > /dev/null) + local commit_id3=`git_show_head $testroot/repo` + + (cd $testroot/wt && got log -P | grep '^ [MDmA]' > $testroot/stdout) + + echo " A new" > $testroot/stdout.expected + echo " D beta" >> $testroot/stdout.expected + echo " m epsilon/zeta" >> $testroot/stdout.expected + echo " M alpha" >> $testroot/stdout.expected + echo " A alpha" >> $testroot/stdout.expected + echo " A beta" >> $testroot/stdout.expected + echo " A epsilon/zeta" >> $testroot/stdout.expected + echo " A gamma/delta" >> $testroot/stdout.expected + + cmp -s $testroot/stdout.expected $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stdout.expected $testroot/stdout + fi + test_done "$testroot" "$ret" +} + run_test test_log_in_repo run_test test_log_in_bare_repo run_test test_log_in_worktree @@ -657,3 +701,4 @@ run_test test_log_nonexistent_path run_test test_log_end_at_commit run_test test_log_reverse_display run_test test_log_in_worktree_different_repo +run_test test_log_changed_paths blob - 59e36d0a3c8e1f763e1bb8cc6e6290a98b85ab53 blob + de21c04b04cf92a585fa3fd019dce29f5c235ff7 --- tog/tog.c +++ tog/tog.c @@ -2894,6 +2894,49 @@ get_datestr(time_t *time, char *datebuf) } static const struct got_error * +get_changed_paths(struct got_pathlist_head *paths, + struct got_commit_object *commit, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_object_id *tree_id1 = NULL, *tree_id2 = NULL; + struct got_tree_object *tree1 = NULL, *tree2 = NULL; + struct got_object_qid *qid; + + qid = SIMPLEQ_FIRST(got_object_commit_get_parent_ids(commit)); + if (qid != NULL) { + struct got_commit_object *pcommit; + err = got_object_open_as_commit(&pcommit, repo, + qid->id); + if (err) + return err; + + tree_id1 = got_object_commit_get_tree_id(pcommit); + got_object_commit_close(pcommit); + + } + + if (tree_id1) { + err = got_object_open_as_tree(&tree1, repo, tree_id1); + if (err) + goto done; + } + + tree_id2 = got_object_commit_get_tree_id(commit); + err = got_object_open_as_tree(&tree2, repo, tree_id2); + if (err) + goto done; + + err = got_diff_tree(tree1, tree2, "", "", repo, + got_diff_tree_collect_changed_paths, paths, 0); +done: + if (tree1) + got_object_tree_close(tree1); + if (tree2) + got_object_tree_close(tree2); + return err; +} + +static const struct got_error * write_commit_info(struct got_object_id *commit_id, struct got_reflist_head *refs, struct got_repository *repo, FILE *outfile) { @@ -2904,7 +2947,11 @@ write_commit_info(struct got_object_id *commit_id, time_t committer_time; const char *author, *committer; char *refs_str = NULL; + struct got_pathlist_head changed_paths; + struct got_pathlist_entry *pe; + TAILQ_INIT(&changed_paths); + if (refs) { err = build_refs_str(&refs_str, refs, commit_id, repo); if (err) @@ -2951,7 +2998,18 @@ write_commit_info(struct got_object_id *commit_id, err = got_error_from_errno("fprintf"); goto done; } + err = get_changed_paths(&changed_paths, commit, repo); + if (err) + goto done; + TAILQ_FOREACH(pe, &changed_paths, entry) { + struct got_diff_changed_path *cp = pe->data; + fprintf(outfile, "%c %s\n", cp->status, pe->path); + free((char *)pe->path); + free(pe->data); + } + fputc('\n', outfile); done: + got_pathlist_free(&changed_paths); free(id_str); free(logmsg); free(refs_str); @@ -3089,7 +3147,7 @@ create_diff(struct tog_diff_view_state *s) goto done; /* Show commit info if we're diffing to a parent/root commit. */ if (s->id1 == NULL) { - err =write_commit_info(s->id2, s->refs, s->repo, s->f); + err = write_commit_info(s->id2, s->refs, s->repo, s->f); if (err) goto done; } else { @@ -3282,7 +3340,8 @@ open_diff_view(struct tog_view *view, struct got_objec } err = add_color(&s->colors, - "^(commit|(blob|file) [-+] )", TOG_COLOR_DIFF_META, + "^(commit|(blob|file) [-+] |[MDmA] [^ ])", + TOG_COLOR_DIFF_META, get_color_value("TOG_COLOR_DIFF_META")); if (err) { free_colors(&s->colors);