"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
speed up got patch
To:
gameoftrees@openbsd.org
Date:
Fri, 13 May 2022 13:01:14 +0200

Download raw body.

Thread
  • Omar Polo:

    speed up got patch

got patch is quite slow: it takes more than three seconds to apply the
nds update diff.  While `got patch' is not the most critical part to
speed up, we're lagging behind patch(1) by quite a bit (patch takes a
tenth of a second) and it seems easy to get a decent speedup by avoiding
unnecessary work.

Profiling shows that ~60% of the time is spent in
got_worktree_schedule_add.  Unlike other parts of got that can build a
pathlist and just call the schedule function once patchfiles have
additions, modifications and removal interleaved so I have to call the
schedule function once per every patch processed.

The worktree schedule functions are quite heavy because they open the
fileindex and sync the changes afterwards.  Opening, syncing and closing
the fileindex in a loop once per every patch wastes a lot of time for no
real gain.

I have a couple of ideas on how to improve it.

The first, implemented in diff below, is to add two worktree schedule
functions specific for patch (got_worktree_patch_schedule_add/rm) that
operate on an already opened fileindex and deal with a single file at a
time.  This allows to open fileindex the start of got_patch (which is
already done btw), reuse it for the patch operation and close it (+sync)
at the end.

An alternative is to accumulate the added and removed files in two
pathlists and then schedule everything in one (actually two) calls to
the schedule functions but I don't really like it because it doesn't
keep the 'got patch' output in sync with the order of the patches.

Diff belows cuts the time to apply the nsd update by more than 50%.  It
seems hard to do better in patch right now: 54% of the runtime is taken
by open_fileindex and 21% by syncing and releasing the fileindex,
apply_patch is a mere 4%.

P.S.: I don't particularly like the got_patch_state name I came up with
      but don't have any better ideas.

diff 6d9c73d72e43db5dfe560cade0a61eed638b45d0 /home/op/w/got
blob - 83c0f25c125f10f982a164a3a028f7554654e8c4
file + include/got_worktree.h
--- include/got_worktree.h
+++ include/got_worktree.h
@@ -545,11 +545,16 @@ got_worktree_path_info(struct got_worktree *, struct g
 /* References pointing at pre-histedit commit backups. */
 #define GOT_WORKTREE_HISTEDIT_BACKUP_REF_PREFIX "refs/got/backup/histedit"
 
+struct got_patch_state {
+	struct got_fileindex	*fileindex;
+	char			*fileindex_path;
+};
+
 /*
  * Prepare for applying a patch.
  */
 const struct got_error *
-got_worktree_patch_prepare(struct got_fileindex **, struct got_worktree *);
+got_worktree_patch_prepare(struct got_patch_state *, struct got_worktree *);
 
 /*
  * Lookup paths for the "old" and "new" file before patching and check their
@@ -557,8 +562,18 @@ got_worktree_patch_prepare(struct got_fileindex **, st
  */
 const struct got_error *
 got_worktree_patch_check_path(const char *, const char *, char **, char **,
-    struct got_worktree *, struct got_repository *, struct got_fileindex *);
+    struct got_worktree *, struct got_repository *, struct got_patch_state *);
 
+const struct got_error *
+got_worktree_patch_schedule_add(const char *, struct got_repository *,
+    struct got_worktree *, struct got_patch_state *,
+    got_worktree_checkout_cb, void *);
+
+const struct got_error *
+got_worktree_patch_schedule_rm(const char *, struct got_repository *,
+    struct got_worktree *, struct got_patch_state *,
+    got_worktree_delete_cb, void *);
+
 /* Complete the patch operation. */
 const struct got_error *
-got_worktree_patch_complete(struct got_fileindex *);
+got_worktree_patch_complete(struct got_patch_state *);
blob - 9a66825918efbd6b934add0ed0636e397fb31edd
file + lib/patch.c
--- lib/patch.c
+++ lib/patch.c
@@ -575,8 +575,9 @@ patch_add(void *arg, unsigned char status, const char 
 
 static const struct got_error *
 apply_patch(struct got_worktree *worktree, struct got_repository *repo,
-    const char *old, const char *new, struct got_patch *p, int nop,
-    struct patch_args *pa, got_cancel_cb cancel_cb, void *cancel_arg)
+    struct got_patch_state *state, const char *old, const char *new,
+    struct got_patch *p, int nop, struct patch_args *pa,
+    got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
 	struct got_pathlist_head oldpaths, newpaths;
@@ -629,8 +630,8 @@ apply_patch(struct got_worktree *worktree, struct got_
 		goto done;
 
 	if (p->old != NULL && p->new == NULL) {
-		err = got_worktree_schedule_delete(worktree, &oldpaths,
-		    0, NULL, patch_delete, pa, repo, 0, 0);
+		err = got_worktree_patch_schedule_rm(old, repo, worktree,
+		    state, patch_delete, pa);
 		goto done;
 	}
 
@@ -660,16 +661,16 @@ apply_patch(struct got_worktree *worktree, struct got_
 	}
 
 	if (file_renamed) {
-		err = got_worktree_schedule_delete(worktree, &oldpaths,
-		    0, NULL, patch_delete, pa, repo, 0, 0);
+		err = got_worktree_patch_schedule_rm(old, repo, worktree,
+		    state, patch_delete, pa);
 		if (err == NULL)
-			err = got_worktree_schedule_add(worktree, &newpaths,
-			    patch_add, pa, repo, 1);
+			err = got_worktree_patch_schedule_add(new, repo,
+			    worktree, state, patch_add, pa);
 		if (err)
 			unlink(newpath);
 	} else if (p->old == NULL) {
-		err = got_worktree_schedule_add(worktree, &newpaths,
-		    patch_add, pa, repo, 1);
+		err = got_worktree_patch_schedule_add(new, repo, worktree,
+		    state, patch_add, pa);
 		if (err)
 			unlink(newpath);
 	} else
@@ -722,14 +723,16 @@ got_patch(int fd, struct got_worktree *worktree, struc
     int nop, int strip, int reverse, got_patch_progress_cb progress_cb,
     void *progress_arg, got_cancel_cb cancel_cb, void *cancel_arg)
 {
-	const struct got_error *err = NULL;
-	struct got_fileindex *fileindex = NULL;
+	const struct got_error *err = NULL, *complete_err;
+	struct got_patch_state state;
 	char *oldpath, *newpath;
 	struct imsgbuf *ibuf;
 	int imsg_fds[2] = {-1, -1};
 	int done = 0, failed = 0;
 	pid_t pid;
 
+	memset(&state, 0, sizeof(state));
+
 	ibuf = calloc(1, sizeof(*ibuf));
 	if (ibuf == NULL) {
 		err = got_error_from_errno("calloc");
@@ -763,7 +766,7 @@ got_patch(int fd, struct got_worktree *worktree, struc
 	if (err)
 		goto done;
 
-	err = got_worktree_patch_prepare(&fileindex, worktree);
+	err = got_worktree_patch_prepare(&state, worktree);
 	if (err)
 		goto done;
 
@@ -783,9 +786,9 @@ got_patch(int fd, struct got_worktree *worktree, struc
 			reverse_patch(&p);
 
 		err = got_worktree_patch_check_path(p.old, p.new, &oldpath,
-		    &newpath, worktree, repo, fileindex);
+		    &newpath, worktree, repo, &state);
 		if (err == NULL)
-			err = apply_patch(worktree, repo, oldpath, newpath,
+			err = apply_patch(worktree, repo, &state, oldpath, newpath,
 			    &p, nop, &pa, cancel_cb, cancel_arg);
 		if (err != NULL) {
 			failed = 1;
@@ -808,8 +811,9 @@ got_patch(int fd, struct got_worktree *worktree, struc
 	}
 
 done:
-	if (fileindex)
-		got_worktree_patch_complete(fileindex);
+	complete_err = got_worktree_patch_complete(&state);
+	if (complete_err && err == NULL)
+		err = complete_err;
 	if (fd != -1 && close(fd) == -1 && err == NULL)
 		err = got_error_from_errno("close");
 	if (ibuf != NULL)
blob - bbe95ae655a519c5a9d4dc6eac25f7ace38c7452
file + lib/worktree.c
--- lib/worktree.c
+++ lib/worktree.c
@@ -8857,21 +8857,16 @@ patch_can_edit(const char *path, unsigned char status,
 }
 
 const struct got_error *
-got_worktree_patch_prepare(struct got_fileindex **fileindex,
+got_worktree_patch_prepare(struct got_patch_state *s,
     struct got_worktree *worktree)
 {
-	const struct got_error *err;
-	char *fileindex_path = NULL;
-
-	err = open_fileindex(fileindex, &fileindex_path, worktree);
-	free(fileindex_path);
-	return err;
+	return open_fileindex(&s->fileindex, &s->fileindex_path, worktree);
 }
 
 const struct got_error *
 got_worktree_patch_check_path(const char *old, const char *new,
     char **oldpath, char **newpath, struct got_worktree *worktree,
-    struct got_repository *repo, struct got_fileindex *fileindex)
+    struct got_repository *repo, struct got_patch_state *s)
 {
 	const struct got_error *err = NULL;
 	int file_renamed = 0;
@@ -8882,12 +8877,12 @@ got_worktree_patch_check_path(const char *old, const c
 	*newpath = NULL;
 
 	err = patch_check_path(old != NULL ? old : new, oldpath,
-	    &status_old, &staged_status_old, fileindex, worktree, repo);
+	    &status_old, &staged_status_old, s->fileindex, worktree, repo);
 	if (err)
 		goto done;
 
 	err = patch_check_path(new != NULL ? new : old, newpath,
-	    &status_new, &staged_status_new, fileindex, worktree, repo);
+	    &status_new, &staged_status_new, s->fileindex, worktree, repo);
 	if (err)
 		goto done;
 
@@ -8916,8 +8911,55 @@ done:
 }
 
 const struct got_error *
-got_worktree_patch_complete(struct got_fileindex *fileindex)
+got_worktree_patch_schedule_add(const char *path, struct got_repository *repo,
+    struct got_worktree *worktree, struct got_patch_state *s,
+    got_worktree_checkout_cb progress_cb, void *progress_arg)
 {
-	got_fileindex_free(fileindex);
-	return NULL;
+	struct schedule_addition_args saa;
+
+	memset(&saa, 0, sizeof(saa));
+	saa.worktree = worktree;
+	saa.fileindex = s->fileindex;
+	saa.progress_cb = progress_cb;
+	saa.progress_arg = progress_arg;
+	saa.repo = repo;
+
+	return worktree_status(worktree, path, s->fileindex, repo,
+	    schedule_addition, &saa, NULL, NULL, 1, 0);
 }
+
+const struct got_error *
+got_worktree_patch_schedule_rm(const char *path, struct got_repository *repo,
+    struct got_worktree *worktree, struct got_patch_state *s,
+    got_worktree_delete_cb progress_cb, void *progress_arg)
+{
+	struct schedule_deletion_args sda;
+
+	memset(&sda, 0, sizeof(sda));
+	sda.worktree = worktree;
+	sda.fileindex = s->fileindex;
+	sda.progress_cb = progress_cb;
+	sda.progress_arg = progress_arg;
+	sda.repo = repo;
+	sda.delete_local_mods = 0;
+	sda.keep_on_disk = 0;
+	sda.ignore_missing_paths = 0;
+	sda.status_codes = NULL;
+
+	return worktree_status(worktree, path, s->fileindex, repo,
+	    schedule_for_deletion, &sda, NULL, NULL, 1, 1);
+}
+
+const struct got_error *
+got_worktree_patch_complete(struct got_patch_state *s)
+{
+	const struct got_error *err = NULL;
+
+	if (s->fileindex) {
+		err = sync_fileindex(s->fileindex, s->fileindex_path);
+		got_fileindex_free(s->fileindex);
+	}
+
+	free(s->fileindex_path);
+	return err;
+}