Download raw body.
teach gotadmin cleanup to remove redundant packfiles
On Mon, Jun 19, 2023 at 12:46:10PM +0200, Omar Polo wrote:
> i forgot about the _prepare()/_complete() naming. Here's hopefully a
> better diff
Yeah, let's go with this. ok
> diff /home/op/w/got
> commit - 664d62f9f2469ee2bc583ab0bbc90e56bf2d560e
> path + /home/op/w/got
> blob - b374606457024caef32ca71ee66bd3057552e4b1
> file + gotadmin/gotadmin.c
> --- gotadmin/gotadmin.c
> +++ gotadmin/gotadmin.c
> @@ -1236,6 +1236,7 @@ cmd_cleanup(int argc, char *argv[])
> struct got_repository *repo = NULL;
> int ch, dry_run = 0, npacked = 0, verbosity = 0;
> int remove_lonely_packidx = 0, ignore_mtime = 0;
> + struct got_lockfile *lock = NULL;
> struct got_cleanup_progress_arg cpa;
> struct got_lonely_packidx_progress_arg lpa;
> off_t loose_before, loose_after;
> @@ -1307,6 +1308,10 @@ cmd_cleanup(int argc, char *argv[])
> goto done;
> }
>
> + error = got_repo_cleanup_prepare(repo, &lock);
> + if (error)
> + goto done;
> +
> if (remove_lonely_packidx) {
> memset(&lpa, 0, sizeof(lpa));
> lpa.dry_run = dry_run;
> @@ -1381,6 +1386,7 @@ done:
> }
>
> done:
> + got_repo_cleanup_complete(repo, lock);
> if (repo)
> got_repo_close(repo);
> if (pack_fds) {
> blob - 57bb3f85a90ec6fdf48dc338be1a61ccab5a133c
> file + include/got_repository_admin.h
> --- include/got_repository_admin.h
> +++ include/got_repository_admin.h
> @@ -14,6 +14,8 @@
> * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> */
>
> +struct got_lockfile;
> +
> /* A callback function which gets invoked with progress information to print. */
> typedef const struct got_error *(*got_pack_progress_cb)(void *arg,
> int ncolored, int nfound, int ntrees, off_t packfile_size, int ncommits,
> @@ -68,6 +70,18 @@ got_repo_list_pack(FILE *packfile, struct got_object_i
> struct got_repository *repo, got_pack_list_cb list_cb, void *list_arg,
> got_cancel_cb cancel_cb, void *cancel_arg);
>
> +/*
> + * Prepare for removing loose objects or redundant packfiles.
> + *
> + * These functions do the necessary locking in order to avoid
> + * concurrent operation to irremediably damage the repository.
> + */
> +const struct got_error *
> +got_repo_cleanup_prepare(struct got_repository *, struct got_lockfile **);
> +
> +const struct got_error *
> +got_repo_cleanup_complete(struct got_repository *, struct got_lockfile *);
> +
> /* A callback function which gets invoked with cleanup information to print. */
> typedef const struct got_error *(*got_cleanup_progress_cb)(void *arg,
> int nloose, int ncommits, int npurged, int nredundant);
> blob - 8220c815a61bbe282585b0a67e7b39f74ad2a9a4
> file + lib/got_lib_repository.h
> --- lib/got_lib_repository.h
> +++ lib/got_lib_repository.h
> @@ -134,6 +134,9 @@ struct got_repository {
>
> /* Settings read from got.conf. */
> struct got_gotconfig *gotconfig;
> +
> + /* cleanup lockfile */
> + struct got_lockfile *cleanup_lock;
> };
>
> const struct got_error*got_repo_cache_object(struct got_repository *,
> blob - e1a1870147bfefcb7e3caaaedf16d6b92e78a5c1
> file + lib/lockfile.c
> --- lib/lockfile.c
> +++ lib/lockfile.c
> @@ -55,11 +55,11 @@ got_lockfile_lock(struct got_lockfile **lf, const char
> do {
> if (dir_fd != -1) {
> (*lf)->fd = openat(dir_fd, (*lf)->path,
> - O_RDONLY | O_CREAT | O_EXCL | O_EXLOCK | O_CLOEXEC,
> + O_RDWR | O_CREAT | O_EXCL | O_EXLOCK | O_CLOEXEC,
> GOT_DEFAULT_FILE_MODE);
> } else {
> (*lf)->fd = open((*lf)->path,
> - O_RDONLY | O_CREAT | O_EXCL | O_EXLOCK | O_CLOEXEC,
> + O_RDWR | O_CREAT | O_EXCL | O_EXLOCK | O_CLOEXEC,
> GOT_DEFAULT_FILE_MODE);
> }
> if ((*lf)->fd != -1)
> blob - 213bc1e83451612e1e2ba0be6bf664e3f4f44050
> file + lib/repository_admin.c
> --- lib/repository_admin.c
> +++ lib/repository_admin.c
> @@ -613,6 +613,42 @@ static const struct got_error *
> return err;
> }
>
> +const struct got_error *
> +got_repo_cleanup_prepare(struct got_repository *repo,
> + struct got_lockfile **lk)
> +{
> + const struct got_error *err;
> + char myname[HOST_NAME_MAX + 1];
> +
> + if (gethostname(myname, sizeof(myname)) == -1)
> + return got_error_from_errno("gethostname");
> +
> + err = got_lockfile_lock(lk, "gc.pid", got_repo_get_fd(repo));
> + if (err)
> + return err;
> +
> + /*
> + * Git uses these info to provide some verbiage when finds a
> + * lock during `git gc --force' so don't try too hard to avoid
> + * short writes and don't care if a race happens between the
> + * lockfile creation and the write itself.
> + */
> + if (dprintf((*lk)->fd, "%d %s", getpid(), myname) < 0)
> + return got_error_from_errno("dprintf");
> +
> + return NULL;
> +}
> +
> +const struct got_error *
> +got_repo_cleanup_complete(struct got_repository *repo,
> + struct got_lockfile *lk)
> +{
> + if (lk == NULL)
> + return NULL;
> +
> + return got_lockfile_unlock(lk, got_repo_get_fd(repo));
> +}
> +
> static const struct got_error *
> report_cleanup_progress(got_cleanup_progress_cb progress_cb,
> void *progress_arg, struct got_ratelimit *rl,
> @@ -1419,21 +1455,6 @@ static const struct got_error *
> return err;
> }
>
> -static const struct got_error *
> -remove_packidx(int dir_fd, const char *relpath)
> -{
> - const struct got_error *err, *unlock_err;
> - struct got_lockfile *lf;
> -
> - err = got_lockfile_lock(&lf, relpath, dir_fd);
> - if (err)
> - return err;
> - if (unlinkat(dir_fd, relpath, 0) == -1)
> - err = got_error_from_errno("unlinkat");
> - unlock_err = got_lockfile_unlock(lf, dir_fd);
> - return err ? err : unlock_err;
> -}
> -
> const struct got_error *
> got_repo_remove_lonely_packidx(struct got_repository *repo, int dry_run,
> got_lonely_packidx_progress_cb progress_cb, void *progress_arg,
> @@ -1491,9 +1512,10 @@ got_repo_remove_lonely_packidx(struct got_repository *
> }
>
> if (!dry_run) {
> - err = remove_packidx(packdir_fd, dent->d_name);
> - if (err)
> + if (unlinkat(packdir_fd, dent->d_name, 0) == -1) {
> + err = got_error_from_errno("unlinkat");
> goto done;
> + }
> }
> if (progress_cb) {
> char *path;
>
>
teach gotadmin cleanup to remove redundant packfiles