Download raw body.
close race between 'gotadmin cleanup' and 'got commit'
This patch should allow 'got commit' to succeed even while 'gotamin cleanup' is running in parallel on the same repository. There is an inherent race when deleting loose object files while new objects are being added to the repository. New objects will not be referenced before a new reference is created or an existing reference is modified. I have decided to use the modification timestamp of the youngest reference in the repository as a guideline. This avoids issues when the system clock does not agree with timestamps stored in the repository. ok? diff refs/heads/main refs/heads/ref-mtime blob - 7447f80c0b353f5e721127e74978c14924336573 blob + 55073892c008a0924a723fa71265758d5fa587a2 --- gotadmin/gotadmin.1 +++ gotadmin/gotadmin.1 @@ -181,7 +181,7 @@ and a break-down of the number of objects per object t .It Cm ls Short alias for .Cm listpack . -.It Cm cleanup Oo Fl p Oc Oo Fl n Oc Oo Fl r Ar repository-path Oc Oo Fl q Oc +.It Cm cleanup Oo Fl a Oc Oo Fl p Oc Oo Fl n Oc Oo Fl r Ar repository-path Oc Oo Fl q Oc Purge unreferenced loose objects from the repository and display the amount of disk space which has been freed as a result. .Pp @@ -258,6 +258,13 @@ The options for .Cm gotadmin cleanup are as follows: .Bl -tag -width Ds +.It Fl a +Delete all loose objects. +By default, objects which are newer than an implementation-defined +modification timestamp are kept on disk to prevent race conditions +with other commands that add new objects to the repository while +.Cm gotadmin cleanup +is running. .It Fl p Instead of purging unreferenced loose objects, remove any pack index files which do not have a corresponding pack file. blob - 810a5cc02697494c98449bfb5e56bfdb7b79349e blob + 928fcd8b030f65a1702ac22c29886c7438444548 --- gotadmin/gotadmin.c +++ gotadmin/gotadmin.c @@ -897,7 +897,7 @@ done: __dead static void usage_cleanup(void) { - fprintf(stderr, "usage: %s cleanup [-p] [-n] [-r repository-path] " + fprintf(stderr, "usage: %s cleanup [-a] [-p] [-n] [-r repository-path] " "[-q]\n", getprogname()); exit(1); } @@ -989,7 +989,7 @@ cmd_cleanup(int argc, char *argv[]) char *cwd = NULL, *repo_path = NULL; struct got_repository *repo = NULL; int ch, dry_run = 0, npacked = 0, verbosity = 0; - int remove_lonely_packidx = 0; + int remove_lonely_packidx = 0, ignore_mtime = 0; struct got_cleanup_progress_arg cpa; struct got_lonely_packidx_progress_arg lpa; off_t size_before, size_after; @@ -999,8 +999,11 @@ cmd_cleanup(int argc, char *argv[]) char **extensions; int nextensions, i; - while ((ch = getopt(argc, argv, "pr:nq")) != -1) { + while ((ch = getopt(argc, argv, "apr:nq")) != -1) { switch (ch) { + case 'a': + ignore_mtime = 1; + break; case 'p': remove_lonely_packidx = 1; break; @@ -1071,7 +1074,7 @@ cmd_cleanup(int argc, char *argv[]) cpa.dry_run = dry_run; cpa.verbosity = verbosity; error = got_repo_purge_unreferenced_loose_objects(repo, - &size_before, &size_after, &npacked, dry_run, + &size_before, &size_after, &npacked, dry_run, ignore_mtime, cleanup_progress, &cpa, check_cancelled, NULL); if (cpa.printed_something) printf("\n"); blob - f666e84083ffb2ad05eab02a4fe4cc4a06853221 blob + 89ec5aabb1b27cad61902560dc7e83551e00665b --- include/got_reference.h +++ include/got_reference.h @@ -59,6 +59,9 @@ const char *got_ref_get_name(struct got_reference *); /* Get the name of the reference which a symoblic reference points at. */ const char *got_ref_get_symref_target(struct got_reference *); +/* Get the last modification timestamp of the reference. */ +time_t got_ref_get_mtime(struct got_reference *); + /* * Create a duplicate copy of a reference. * The caller must dispose of this copy with got_ref_close(). blob - d1e9be3e619d56a5e9e4569edfdc2cf9071ae49c blob + 8bc7ea591673f80b94917ac36991936b0fc46038 --- include/got_repository_admin.h +++ include/got_repository_admin.h @@ -76,6 +76,8 @@ typedef const struct got_error *(*got_cleanup_progress * Walk objects reachable via references to determine whether any loose * objects can be removed from disk. Do remove such objects from disk * unless the dry_run parameter is set. + * Do not remove objects with a modification timestamp above an + * implementation-defined timestamp threshold, unless ignore_mtime is set. * Return the disk space size occupied by loose objects before and after * the operation. * Return the number of loose objects which are also stored in a pack file. @@ -83,7 +85,7 @@ typedef const struct got_error *(*got_cleanup_progress const struct got_error * got_repo_purge_unreferenced_loose_objects(struct got_repository *repo, off_t *size_before, off_t *size_after, int *npacked, int dry_run, - got_cleanup_progress_cb progress_cb, void *progress_arg, + int ignore_mtime, got_cleanup_progress_cb progress_cb, void *progress_arg, got_cancel_cb cancel_cb, void *cancel_arg); /* A callback function which gets invoked with cleanup information to print. */ blob - ee703d1487de86d26b7373621514084605d208eb blob + 1f7e7d1033f43108af0c239bd50be641dccfc47d --- lib/reference.c +++ lib/reference.c @@ -87,11 +87,12 @@ struct got_reference { } ref; struct got_lockfile *lf; + time_t mtime; }; static const struct got_error * alloc_ref(struct got_reference **ref, const char *name, - struct got_object_id *id, int flags) + struct got_object_id *id, int flags, time_t mtime) { const struct got_error *err = NULL; @@ -102,6 +103,7 @@ alloc_ref(struct got_reference **ref, const char *name memcpy((*ref)->ref.ref.sha1, id->sha1, sizeof((*ref)->ref.ref.sha1)); (*ref)->flags = flags; (*ref)->ref.ref.name = strdup(name); + (*ref)->mtime = mtime; if ((*ref)->ref.ref.name == NULL) { err = got_error_from_errno("strdup"); got_ref_close(*ref); @@ -147,7 +149,8 @@ parse_symref(struct got_reference **ref, const char *n } static const struct got_error * -parse_ref_line(struct got_reference **ref, const char *name, const char *line) +parse_ref_line(struct got_reference **ref, const char *name, const char *line, + time_t mtime) { struct got_object_id id; @@ -159,7 +162,7 @@ parse_ref_line(struct got_reference **ref, const char if (!got_parse_sha1_digest(id.sha1, line)) return got_error(GOT_ERR_BAD_REF_DATA); - return alloc_ref(ref, name, &id, 0); + return alloc_ref(ref, name, &id, 0, mtime); } static const struct got_error * @@ -172,6 +175,7 @@ parse_ref_file(struct got_reference **ref, const char size_t linesize = 0; ssize_t linelen; struct got_lockfile *lf = NULL; + struct stat sb; if (lock) { err = got_lockfile_lock(&lf, abspath, -1); @@ -192,6 +196,10 @@ parse_ref_file(struct got_reference **ref, const char got_lockfile_unlock(lf, -1); return err; } + if (fstat(fileno(f), &sb) == -1) { + err = got_error_from_errno2("fstat", abspath); + goto done; + } linelen = getline(&line, &linesize, f); if (linelen == -1) { @@ -214,7 +222,7 @@ parse_ref_file(struct got_reference **ref, const char linelen--; } - err = parse_ref_line(ref, absname, line); + err = parse_ref_line(ref, absname, line, sb.st_mtime); if (lock) { if (err) got_lockfile_unlock(lf, -1); @@ -315,7 +323,7 @@ got_ref_alloc(struct got_reference **ref, const char * if (!is_valid_ref_name(name)) return got_error_path(name, GOT_ERR_BAD_REF_NAME); - return alloc_ref(ref, name, id, 0); + return alloc_ref(ref, name, id, 0, 0); } const struct got_error * @@ -330,7 +338,7 @@ got_ref_alloc_symref(struct got_reference **ref, const static const struct got_error * parse_packed_ref_line(struct got_reference **ref, const char *abs_refname, - const char *line) + const char *line, time_t mtime) { struct got_object_id id; const char *name; @@ -350,12 +358,12 @@ parse_packed_ref_line(struct got_reference **ref, cons } else name = line + SHA1_DIGEST_STRING_LENGTH; - return alloc_ref(ref, name, &id, GOT_REF_IS_PACKED); + return alloc_ref(ref, name, &id, GOT_REF_IS_PACKED, mtime); } static const struct got_error * open_packed_ref(struct got_reference **ref, FILE *f, const char **subdirs, - int nsubdirs, const char *refname) + int nsubdirs, const char *refname, time_t mtime) { const struct got_error *err = NULL; char *abs_refname; @@ -383,7 +391,8 @@ open_packed_ref(struct got_reference **ref, FILE *f, c asprintf(&abs_refname, "refs/%s/%s", subdirs[i], refname) == -1) return got_error_from_errno("asprintf"); - err = parse_packed_ref_line(ref, abs_refname, line); + err = parse_packed_ref_line(ref, abs_refname, line, + mtime); if (!ref_is_absolute) free(abs_refname); if (err || *ref != NULL) @@ -486,8 +495,14 @@ got_ref_open(struct got_reference **ref, struct got_re f = fopen(packed_refs_path, "rb"); free(packed_refs_path); if (f != NULL) { + struct stat sb; + if (fstat(fileno(f), &sb) == -1) { + err = got_error_from_errno2("fstat", + packed_refs_path); + goto done; + } err = open_packed_ref(ref, f, subdirs, nitems(subdirs), - refname); + refname, sb.st_mtime); if (!err) { if (fclose(f) == EOF) { err = got_error_from_errno("fclose"); @@ -670,6 +685,12 @@ got_ref_get_symref_target(struct got_reference *ref) return NULL; } +time_t +got_ref_get_mtime(struct got_reference *ref) +{ + return ref->mtime; +} + const struct got_error * got_ref_cmp_by_name(void *arg, int *cmp, struct got_reference *re1, struct got_reference* re2) @@ -1013,6 +1034,12 @@ got_ref_list(struct got_reflist_head *refs, struct got if (f) { size_t linesize = 0; ssize_t linelen; + struct stat sb; + + if (fstat(fileno(f), &sb) == -1) { + err = got_error_from_errno2("fstat", packed_refs_path); + goto done; + } for (;;) { linelen = getline(&line, &linesize, f); if (linelen == -1) { @@ -1023,7 +1050,8 @@ got_ref_list(struct got_reflist_head *refs, struct got } if (linelen > 0 && line[linelen - 1] == '\n') line[linelen - 1] = '\0'; - err = parse_packed_ref_line(&ref, NULL, line); + err = parse_packed_ref_line(&ref, NULL, line, + sb.st_mtime); if (err) goto done; if (ref) { @@ -1201,6 +1229,12 @@ got_ref_write(struct got_reference *ref, struct got_re } free(tmppath); tmppath = NULL; + + if (stat(path, &sb) == -1) { + err = got_error_from_errno2("stat", path); + goto done; + } + ref->mtime = sb.st_mtime; done: if (ref->lf == NULL && lf) unlock_err = got_lockfile_unlock(lf, -1); @@ -1268,7 +1302,7 @@ delete_packed_ref(struct got_reference *delref, struct } if (linelen > 0 && line[linelen - 1] == '\n') line[linelen - 1] = '\0'; - err = parse_packed_ref_line(&ref, NULL, line); + err = parse_packed_ref_line(&ref, NULL, line, 0); if (err) goto done; if (ref == NULL) blob - f315741522c813f2f4e3d56e2caa15969cd6200d blob + deb76e2c5553e4ba47753fc3862764b88217ec6a --- lib/repository_admin.c +++ lib/repository_admin.c @@ -1028,6 +1028,8 @@ struct purge_loose_object_arg { int npurged; off_t size_purged; int dry_run; + time_t max_mtime; + int ignore_mtime; }; static const struct got_error * @@ -1053,21 +1055,29 @@ purge_loose_object(struct got_object_id *id, void *dat goto done; } - if (!a->dry_run) { - err = got_lockfile_lock(&lf, path, -1); - if (err) - goto done; - if (unlink(path) == -1) { - err = got_error_from_errno2("unlink", path); - goto done; + /* + * Do not delete objects which are younger than our maximum + * modification time threshold. This prevents a race where + * new objects which are being added to the repository + * concurrently would be deleted. + */ + if (a->ignore_mtime || sb.st_mtime <= a->max_mtime) { + if (!a->dry_run) { + err = got_lockfile_lock(&lf, path, -1); + if (err) + goto done; + if (unlink(path) == -1) { + err = got_error_from_errno2("unlink", path); + goto done; + } } - } - a->npurged++; - a->size_purged += sb.st_size; - if (a->progress_cb) { - err = a->progress_cb(a->progress_arg, a->nloose, - a->ncommits, a->npurged); + a->npurged++; + a->size_purged += sb.st_size; + if (a->progress_cb) { + err = a->progress_cb(a->progress_arg, a->nloose, + a->ncommits, a->npurged); + } } done: if (fd != -1 && close(fd) == -1 && err == NULL) @@ -1081,7 +1091,7 @@ done: const struct got_error * got_repo_purge_unreferenced_loose_objects(struct got_repository *repo, off_t *size_before, off_t *size_after, int *npacked, int dry_run, - got_cleanup_progress_cb progress_cb, void *progress_arg, + int ignore_mtime, got_cleanup_progress_cb progress_cb, void *progress_arg, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err; @@ -1090,7 +1100,9 @@ got_repo_purge_unreferenced_loose_objects(struct got_r struct got_object_id **referenced_ids; int i, nreferenced, nloose, ncommits = 0; struct got_reflist_head refs; + struct got_reflist_entry *re; struct purge_loose_object_arg arg; + time_t max_mtime = 0; TAILQ_INIT(&refs); @@ -1117,6 +1129,19 @@ got_repo_purge_unreferenced_loose_objects(struct got_r err = got_ref_list(&refs, repo, "", got_ref_cmp_by_name, NULL); if (err) goto done; + if (!ignore_mtime) { + TAILQ_FOREACH(re, &refs, entry) { + time_t mtime = got_ref_get_mtime(re->ref); + if (mtime > max_mtime) + max_mtime = mtime; + } + /* + * For safety, keep objects created within 10 minutes + * before the youngest reference was created. + */ + if (max_mtime >= 600) + max_mtime -= 600; + } err = get_reflist_object_ids(&referenced_ids, &nreferenced, (1 << GOT_OBJ_TYPE_COMMIT) | (1 << GOT_OBJ_TYPE_TAG), @@ -1149,6 +1174,8 @@ got_repo_purge_unreferenced_loose_objects(struct got_r arg.size_purged = 0; arg.ncommits = ncommits; arg.dry_run = dry_run; + arg.max_mtime = max_mtime; + arg.ignore_mtime = ignore_mtime; err = got_object_idset_for_each(loose_ids, purge_loose_object, &arg); if (err) goto done; blob - 72a0111c612df2b974ee3335c72a8d724d58f0df blob + b403a1396b48b2fdd75dda910d436150bee4efbb --- regress/cmdline/cleanup.sh +++ regress/cmdline/cleanup.sh @@ -72,7 +72,7 @@ test_cleanup_unreferenced_loose_objects() { # cleanup -n should not remove any objects ls -R $testroot/repo/.git/objects > $testroot/objects-before - gotadmin cleanup -n -q -r $testroot/repo > $testroot/stdout + gotadmin cleanup -a -n -q -r $testroot/repo > $testroot/stdout echo -n > $testroot/stdout.expected cmp -s $testroot/stdout.expected $testroot/stdout ret="$?" @@ -91,7 +91,7 @@ test_cleanup_unreferenced_loose_objects() { fi # cleanup should remove loose objects that belonged to the branch - gotadmin cleanup -q -r $testroot/repo > $testroot/stdout + gotadmin cleanup -a -q -r $testroot/repo > $testroot/stdout ret="$?" if [ "$ret" != "0" ]; then echo "gotadmin cleanup failed unexpectedly" >&2 @@ -169,7 +169,7 @@ test_cleanup_redundant_loose_objects() { # cleanup -n should not remove any objects ls -R $testroot/repo/.git/objects > $testroot/objects-before - gotadmin cleanup -n -q -r $testroot/repo > $testroot/stdout + gotadmin cleanup -a -n -q -r $testroot/repo > $testroot/stdout echo -n > $testroot/stdout.expected cmp -s $testroot/stdout.expected $testroot/stdout ret="$?" @@ -196,7 +196,7 @@ test_cleanup_redundant_loose_objects() { fi # cleanup should remove all loose objects - gotadmin cleanup -q -r $testroot/repo > $testroot/stdout + gotadmin cleanup -a -q -r $testroot/repo > $testroot/stdout ret="$?" if [ "$ret" != "0" ]; then echo "gotadmin cleanup failed unexpectedly" >&2 @@ -244,7 +244,7 @@ test_cleanup_precious_objects() { (cd $testroot/repo && git config extensions.preciousObjects true) # cleanup should now refuse to purge objects - gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \ + gotadmin cleanup -a -q -r $testroot/repo > $testroot/stdout \ 2> $testroot/stderr ret="$?" if [ "$ret" == "0" ]; then @@ -294,7 +294,7 @@ test_cleanup_missing_pack_file() { rm $testroot/repo/.git/objects/pack/pack-$packname # cleanup should now refuse to purge objects - gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \ + gotadmin cleanup -a -q -r $testroot/repo > $testroot/stdout \ 2> $testroot/stderr ret="$?" if [ "$ret" == "0" ]; then @@ -317,7 +317,7 @@ test_cleanup_missing_pack_file() { return 1 fi - gotadmin cleanup -r $testroot/repo -p -n > $testroot/stdout + gotadmin cleanup -a -r $testroot/repo -p -n > $testroot/stdout ret="$?" if [ "$ret" != "0" ]; then echo "gotadmin cleanup failed unexpectedly" >&2 @@ -334,7 +334,7 @@ test_cleanup_missing_pack_file() { return 1 fi - gotadmin cleanup -r $testroot/repo -p > $testroot/stdout + gotadmin cleanup -a -r $testroot/repo -p > $testroot/stdout ret="$?" if [ "$ret" != "0" ]; then echo "gotadmin cleanup failed unexpectedly" >&2 @@ -351,7 +351,7 @@ test_cleanup_missing_pack_file() { fi # cleanup should now attempt to purge objects - gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \ + gotadmin cleanup -a -q -r $testroot/repo > $testroot/stdout \ 2> $testroot/stderr ret="$?" if [ "$ret" != "0" ]; then
close race between 'gotadmin cleanup' and 'got commit'