From: Stefan Sperling Subject: Re: teach gotadmin cleanup to remove redundant packfiles To: Omar Polo Cc: gameoftrees@openbsd.org Date: Mon, 19 Jun 2023 15:26:44 +0200 On Mon, Jun 19, 2023 at 12:46:10PM +0200, Omar Polo wrote: > i forgot about the _prepare()/_complete() naming. Here's hopefully a > better diff Yeah, let's go with this. ok > diff /home/op/w/got > commit - 664d62f9f2469ee2bc583ab0bbc90e56bf2d560e > path + /home/op/w/got > blob - b374606457024caef32ca71ee66bd3057552e4b1 > file + gotadmin/gotadmin.c > --- gotadmin/gotadmin.c > +++ gotadmin/gotadmin.c > @@ -1236,6 +1236,7 @@ cmd_cleanup(int argc, char *argv[]) > struct got_repository *repo = NULL; > int ch, dry_run = 0, npacked = 0, verbosity = 0; > int remove_lonely_packidx = 0, ignore_mtime = 0; > + struct got_lockfile *lock = NULL; > struct got_cleanup_progress_arg cpa; > struct got_lonely_packidx_progress_arg lpa; > off_t loose_before, loose_after; > @@ -1307,6 +1308,10 @@ cmd_cleanup(int argc, char *argv[]) > goto done; > } > > + error = got_repo_cleanup_prepare(repo, &lock); > + if (error) > + goto done; > + > if (remove_lonely_packidx) { > memset(&lpa, 0, sizeof(lpa)); > lpa.dry_run = dry_run; > @@ -1381,6 +1386,7 @@ done: > } > > done: > + got_repo_cleanup_complete(repo, lock); > if (repo) > got_repo_close(repo); > if (pack_fds) { > blob - 57bb3f85a90ec6fdf48dc338be1a61ccab5a133c > file + include/got_repository_admin.h > --- include/got_repository_admin.h > +++ include/got_repository_admin.h > @@ -14,6 +14,8 @@ > * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > */ > > +struct got_lockfile; > + > /* A callback function which gets invoked with progress information to print. */ > typedef const struct got_error *(*got_pack_progress_cb)(void *arg, > int ncolored, int nfound, int ntrees, off_t packfile_size, int ncommits, > @@ -68,6 +70,18 @@ got_repo_list_pack(FILE *packfile, struct got_object_i > struct got_repository *repo, got_pack_list_cb list_cb, void *list_arg, > got_cancel_cb cancel_cb, void *cancel_arg); > > +/* > + * Prepare for removing loose objects or redundant packfiles. > + * > + * These functions do the necessary locking in order to avoid > + * concurrent operation to irremediably damage the repository. > + */ > +const struct got_error * > +got_repo_cleanup_prepare(struct got_repository *, struct got_lockfile **); > + > +const struct got_error * > +got_repo_cleanup_complete(struct got_repository *, struct got_lockfile *); > + > /* A callback function which gets invoked with cleanup information to print. */ > typedef const struct got_error *(*got_cleanup_progress_cb)(void *arg, > int nloose, int ncommits, int npurged, int nredundant); > blob - 8220c815a61bbe282585b0a67e7b39f74ad2a9a4 > file + lib/got_lib_repository.h > --- lib/got_lib_repository.h > +++ lib/got_lib_repository.h > @@ -134,6 +134,9 @@ struct got_repository { > > /* Settings read from got.conf. */ > struct got_gotconfig *gotconfig; > + > + /* cleanup lockfile */ > + struct got_lockfile *cleanup_lock; > }; > > const struct got_error*got_repo_cache_object(struct got_repository *, > blob - e1a1870147bfefcb7e3caaaedf16d6b92e78a5c1 > file + lib/lockfile.c > --- lib/lockfile.c > +++ lib/lockfile.c > @@ -55,11 +55,11 @@ got_lockfile_lock(struct got_lockfile **lf, const char > do { > if (dir_fd != -1) { > (*lf)->fd = openat(dir_fd, (*lf)->path, > - O_RDONLY | O_CREAT | O_EXCL | O_EXLOCK | O_CLOEXEC, > + O_RDWR | O_CREAT | O_EXCL | O_EXLOCK | O_CLOEXEC, > GOT_DEFAULT_FILE_MODE); > } else { > (*lf)->fd = open((*lf)->path, > - O_RDONLY | O_CREAT | O_EXCL | O_EXLOCK | O_CLOEXEC, > + O_RDWR | O_CREAT | O_EXCL | O_EXLOCK | O_CLOEXEC, > GOT_DEFAULT_FILE_MODE); > } > if ((*lf)->fd != -1) > blob - 213bc1e83451612e1e2ba0be6bf664e3f4f44050 > file + lib/repository_admin.c > --- lib/repository_admin.c > +++ lib/repository_admin.c > @@ -613,6 +613,42 @@ static const struct got_error * > return err; > } > > +const struct got_error * > +got_repo_cleanup_prepare(struct got_repository *repo, > + struct got_lockfile **lk) > +{ > + const struct got_error *err; > + char myname[HOST_NAME_MAX + 1]; > + > + if (gethostname(myname, sizeof(myname)) == -1) > + return got_error_from_errno("gethostname"); > + > + err = got_lockfile_lock(lk, "gc.pid", got_repo_get_fd(repo)); > + if (err) > + return err; > + > + /* > + * Git uses these info to provide some verbiage when finds a > + * lock during `git gc --force' so don't try too hard to avoid > + * short writes and don't care if a race happens between the > + * lockfile creation and the write itself. > + */ > + if (dprintf((*lk)->fd, "%d %s", getpid(), myname) < 0) > + return got_error_from_errno("dprintf"); > + > + return NULL; > +} > + > +const struct got_error * > +got_repo_cleanup_complete(struct got_repository *repo, > + struct got_lockfile *lk) > +{ > + if (lk == NULL) > + return NULL; > + > + return got_lockfile_unlock(lk, got_repo_get_fd(repo)); > +} > + > static const struct got_error * > report_cleanup_progress(got_cleanup_progress_cb progress_cb, > void *progress_arg, struct got_ratelimit *rl, > @@ -1419,21 +1455,6 @@ static const struct got_error * > return err; > } > > -static const struct got_error * > -remove_packidx(int dir_fd, const char *relpath) > -{ > - const struct got_error *err, *unlock_err; > - struct got_lockfile *lf; > - > - err = got_lockfile_lock(&lf, relpath, dir_fd); > - if (err) > - return err; > - if (unlinkat(dir_fd, relpath, 0) == -1) > - err = got_error_from_errno("unlinkat"); > - unlock_err = got_lockfile_unlock(lf, dir_fd); > - return err ? err : unlock_err; > -} > - > const struct got_error * > got_repo_remove_lonely_packidx(struct got_repository *repo, int dry_run, > got_lonely_packidx_progress_cb progress_cb, void *progress_arg, > @@ -1491,9 +1512,10 @@ got_repo_remove_lonely_packidx(struct got_repository * > } > > if (!dry_run) { > - err = remove_packidx(packdir_fd, dent->d_name); > - if (err) > + if (unlinkat(packdir_fd, dent->d_name, 0) == -1) { > + err = got_error_from_errno("unlinkat"); > goto done; > + } > } > if (progress_cb) { > char *path; > >