From: Omar Polo Subject: speed up got patch To: gameoftrees@openbsd.org Date: Fri, 13 May 2022 13:01:14 +0200 got patch is quite slow: it takes more than three seconds to apply the nds update diff. While `got patch' is not the most critical part to speed up, we're lagging behind patch(1) by quite a bit (patch takes a tenth of a second) and it seems easy to get a decent speedup by avoiding unnecessary work. Profiling shows that ~60% of the time is spent in got_worktree_schedule_add. Unlike other parts of got that can build a pathlist and just call the schedule function once patchfiles have additions, modifications and removal interleaved so I have to call the schedule function once per every patch processed. The worktree schedule functions are quite heavy because they open the fileindex and sync the changes afterwards. Opening, syncing and closing the fileindex in a loop once per every patch wastes a lot of time for no real gain. I have a couple of ideas on how to improve it. The first, implemented in diff below, is to add two worktree schedule functions specific for patch (got_worktree_patch_schedule_add/rm) that operate on an already opened fileindex and deal with a single file at a time. This allows to open fileindex the start of got_patch (which is already done btw), reuse it for the patch operation and close it (+sync) at the end. An alternative is to accumulate the added and removed files in two pathlists and then schedule everything in one (actually two) calls to the schedule functions but I don't really like it because it doesn't keep the 'got patch' output in sync with the order of the patches. Diff belows cuts the time to apply the nsd update by more than 50%. It seems hard to do better in patch right now: 54% of the runtime is taken by open_fileindex and 21% by syncing and releasing the fileindex, apply_patch is a mere 4%. P.S.: I don't particularly like the got_patch_state name I came up with but don't have any better ideas. diff 6d9c73d72e43db5dfe560cade0a61eed638b45d0 /home/op/w/got blob - 83c0f25c125f10f982a164a3a028f7554654e8c4 file + include/got_worktree.h --- include/got_worktree.h +++ include/got_worktree.h @@ -545,11 +545,16 @@ got_worktree_path_info(struct got_worktree *, struct g /* References pointing at pre-histedit commit backups. */ #define GOT_WORKTREE_HISTEDIT_BACKUP_REF_PREFIX "refs/got/backup/histedit" +struct got_patch_state { + struct got_fileindex *fileindex; + char *fileindex_path; +}; + /* * Prepare for applying a patch. */ const struct got_error * -got_worktree_patch_prepare(struct got_fileindex **, struct got_worktree *); +got_worktree_patch_prepare(struct got_patch_state *, struct got_worktree *); /* * Lookup paths for the "old" and "new" file before patching and check their @@ -557,8 +562,18 @@ got_worktree_patch_prepare(struct got_fileindex **, st */ const struct got_error * got_worktree_patch_check_path(const char *, const char *, char **, char **, - struct got_worktree *, struct got_repository *, struct got_fileindex *); + struct got_worktree *, struct got_repository *, struct got_patch_state *); +const struct got_error * +got_worktree_patch_schedule_add(const char *, struct got_repository *, + struct got_worktree *, struct got_patch_state *, + got_worktree_checkout_cb, void *); + +const struct got_error * +got_worktree_patch_schedule_rm(const char *, struct got_repository *, + struct got_worktree *, struct got_patch_state *, + got_worktree_delete_cb, void *); + /* Complete the patch operation. */ const struct got_error * -got_worktree_patch_complete(struct got_fileindex *); +got_worktree_patch_complete(struct got_patch_state *); blob - 9a66825918efbd6b934add0ed0636e397fb31edd file + lib/patch.c --- lib/patch.c +++ lib/patch.c @@ -575,8 +575,9 @@ patch_add(void *arg, unsigned char status, const char static const struct got_error * apply_patch(struct got_worktree *worktree, struct got_repository *repo, - const char *old, const char *new, struct got_patch *p, int nop, - struct patch_args *pa, got_cancel_cb cancel_cb, void *cancel_arg) + struct got_patch_state *state, const char *old, const char *new, + struct got_patch *p, int nop, struct patch_args *pa, + got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err = NULL; struct got_pathlist_head oldpaths, newpaths; @@ -629,8 +630,8 @@ apply_patch(struct got_worktree *worktree, struct got_ goto done; if (p->old != NULL && p->new == NULL) { - err = got_worktree_schedule_delete(worktree, &oldpaths, - 0, NULL, patch_delete, pa, repo, 0, 0); + err = got_worktree_patch_schedule_rm(old, repo, worktree, + state, patch_delete, pa); goto done; } @@ -660,16 +661,16 @@ apply_patch(struct got_worktree *worktree, struct got_ } if (file_renamed) { - err = got_worktree_schedule_delete(worktree, &oldpaths, - 0, NULL, patch_delete, pa, repo, 0, 0); + err = got_worktree_patch_schedule_rm(old, repo, worktree, + state, patch_delete, pa); if (err == NULL) - err = got_worktree_schedule_add(worktree, &newpaths, - patch_add, pa, repo, 1); + err = got_worktree_patch_schedule_add(new, repo, + worktree, state, patch_add, pa); if (err) unlink(newpath); } else if (p->old == NULL) { - err = got_worktree_schedule_add(worktree, &newpaths, - patch_add, pa, repo, 1); + err = got_worktree_patch_schedule_add(new, repo, worktree, + state, patch_add, pa); if (err) unlink(newpath); } else @@ -722,14 +723,16 @@ got_patch(int fd, struct got_worktree *worktree, struc int nop, int strip, int reverse, got_patch_progress_cb progress_cb, void *progress_arg, got_cancel_cb cancel_cb, void *cancel_arg) { - const struct got_error *err = NULL; - struct got_fileindex *fileindex = NULL; + const struct got_error *err = NULL, *complete_err; + struct got_patch_state state; char *oldpath, *newpath; struct imsgbuf *ibuf; int imsg_fds[2] = {-1, -1}; int done = 0, failed = 0; pid_t pid; + memset(&state, 0, sizeof(state)); + ibuf = calloc(1, sizeof(*ibuf)); if (ibuf == NULL) { err = got_error_from_errno("calloc"); @@ -763,7 +766,7 @@ got_patch(int fd, struct got_worktree *worktree, struc if (err) goto done; - err = got_worktree_patch_prepare(&fileindex, worktree); + err = got_worktree_patch_prepare(&state, worktree); if (err) goto done; @@ -783,9 +786,9 @@ got_patch(int fd, struct got_worktree *worktree, struc reverse_patch(&p); err = got_worktree_patch_check_path(p.old, p.new, &oldpath, - &newpath, worktree, repo, fileindex); + &newpath, worktree, repo, &state); if (err == NULL) - err = apply_patch(worktree, repo, oldpath, newpath, + err = apply_patch(worktree, repo, &state, oldpath, newpath, &p, nop, &pa, cancel_cb, cancel_arg); if (err != NULL) { failed = 1; @@ -808,8 +811,9 @@ got_patch(int fd, struct got_worktree *worktree, struc } done: - if (fileindex) - got_worktree_patch_complete(fileindex); + complete_err = got_worktree_patch_complete(&state); + if (complete_err && err == NULL) + err = complete_err; if (fd != -1 && close(fd) == -1 && err == NULL) err = got_error_from_errno("close"); if (ibuf != NULL) blob - bbe95ae655a519c5a9d4dc6eac25f7ace38c7452 file + lib/worktree.c --- lib/worktree.c +++ lib/worktree.c @@ -8857,21 +8857,16 @@ patch_can_edit(const char *path, unsigned char status, } const struct got_error * -got_worktree_patch_prepare(struct got_fileindex **fileindex, +got_worktree_patch_prepare(struct got_patch_state *s, struct got_worktree *worktree) { - const struct got_error *err; - char *fileindex_path = NULL; - - err = open_fileindex(fileindex, &fileindex_path, worktree); - free(fileindex_path); - return err; + return open_fileindex(&s->fileindex, &s->fileindex_path, worktree); } const struct got_error * got_worktree_patch_check_path(const char *old, const char *new, char **oldpath, char **newpath, struct got_worktree *worktree, - struct got_repository *repo, struct got_fileindex *fileindex) + struct got_repository *repo, struct got_patch_state *s) { const struct got_error *err = NULL; int file_renamed = 0; @@ -8882,12 +8877,12 @@ got_worktree_patch_check_path(const char *old, const c *newpath = NULL; err = patch_check_path(old != NULL ? old : new, oldpath, - &status_old, &staged_status_old, fileindex, worktree, repo); + &status_old, &staged_status_old, s->fileindex, worktree, repo); if (err) goto done; err = patch_check_path(new != NULL ? new : old, newpath, - &status_new, &staged_status_new, fileindex, worktree, repo); + &status_new, &staged_status_new, s->fileindex, worktree, repo); if (err) goto done; @@ -8916,8 +8911,55 @@ done: } const struct got_error * -got_worktree_patch_complete(struct got_fileindex *fileindex) +got_worktree_patch_schedule_add(const char *path, struct got_repository *repo, + struct got_worktree *worktree, struct got_patch_state *s, + got_worktree_checkout_cb progress_cb, void *progress_arg) { - got_fileindex_free(fileindex); - return NULL; + struct schedule_addition_args saa; + + memset(&saa, 0, sizeof(saa)); + saa.worktree = worktree; + saa.fileindex = s->fileindex; + saa.progress_cb = progress_cb; + saa.progress_arg = progress_arg; + saa.repo = repo; + + return worktree_status(worktree, path, s->fileindex, repo, + schedule_addition, &saa, NULL, NULL, 1, 0); } + +const struct got_error * +got_worktree_patch_schedule_rm(const char *path, struct got_repository *repo, + struct got_worktree *worktree, struct got_patch_state *s, + got_worktree_delete_cb progress_cb, void *progress_arg) +{ + struct schedule_deletion_args sda; + + memset(&sda, 0, sizeof(sda)); + sda.worktree = worktree; + sda.fileindex = s->fileindex; + sda.progress_cb = progress_cb; + sda.progress_arg = progress_arg; + sda.repo = repo; + sda.delete_local_mods = 0; + sda.keep_on_disk = 0; + sda.ignore_missing_paths = 0; + sda.status_codes = NULL; + + return worktree_status(worktree, path, s->fileindex, repo, + schedule_for_deletion, &sda, NULL, NULL, 1, 1); +} + +const struct got_error * +got_worktree_patch_complete(struct got_patch_state *s) +{ + const struct got_error *err = NULL; + + if (s->fileindex) { + err = sync_fileindex(s->fileindex, s->fileindex_path); + got_fileindex_free(s->fileindex); + } + + free(s->fileindex_path); + return err; +}