"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: error out when searching a pack index which lacks a corresponding pack file
To:
"Todd C. Miller" <Todd.Miller@sudo.ws>
Cc:
gameoftrees@openbsd.org
Date:
Tue, 6 Jul 2021 10:52:46 +0200

Download raw body.

Thread
On Sun, Jul 04, 2021 at 12:13:12PM -0600, Todd C. Miller wrote:
> On Sun, 04 Jul 2021 20:11:22 +0200, Stefan Sperling wrote:
> 
> > You can simply remove the orphaned .idx files with rm(1).
> > If that isn't obvious, perhaps the error message should suggest doing that?
> 
> I think that would be helpful.
> 
>  - todd
> 

Here is a new patch which also adds 'gotadmin cleanup -p' to deal with
lonely pack index files.

ok?

diff c2cf8015b6da213404a3d87a14ff228d1ca99c50 4ddb4bcafddc519c82552201a57e26c0b4d6e0f0
blob - 1290b7007bf14c58dc88fc618969d59fe988b94a
blob + 7447f80c0b353f5e721127e74978c14924336573
--- gotadmin/gotadmin.1
+++ gotadmin/gotadmin.1
@@ -181,7 +181,7 @@ and a break-down of the number of objects per object t
 .It Cm ls
 Short alias for
 .Cm listpack .
-.It Cm cleanup Oo Fl n Oc Oo Fl r Ar repository-path Oc Oo Fl q Oc
+.It Cm cleanup Oo Fl p Oc Oo Fl n Oc Oo Fl r Ar repository-path Oc Oo Fl q Oc
 Purge unreferenced loose objects from the repository and display
 the amount of disk space which has been freed as a result.
 .Pp
@@ -238,6 +238,16 @@ will only purge corresponding objects once such refere
 deleted with
 .Cm got ref -d .
 .Pp
+Some Git repositories contain pack index files which lack a corresponding
+pack file, which is an inconsistent repository state.
+In such cases,
+.Cm gotadmin cleanup -p -n
+will display a list of affected pack index files.
+Whenever possible the missing pack files should be restored.
+If restoring missing pack files is not possible then affected pack index
+files can be removed with
+.Cm gotadmin cleanup -p .
+.Pp
 The
 .Dq preciousObjects
 Git extension is intended to prevent the removal of objects from a repository.
@@ -248,9 +258,12 @@ The options for
 .Cm gotadmin cleanup
 are as follows:
 .Bl -tag -width Ds
+.It Fl p
+Instead of purging unreferenced loose objects, remove any pack index files
+which do not have a corresponding pack file.
 .It Fl n
 Display the usual progress output and summary information but do not actually
-purge any objects.
+remove any files from disk.
 .It Fl r Ar repository-path
 Use the repository at the specified path.
 If not specified, assume the repository is located at or above the current
blob - 3a8fe2211096a986460483094705912532891e8e
blob + 810a5cc02697494c98449bfb5e56bfdb7b79349e
--- gotadmin/gotadmin.c
+++ gotadmin/gotadmin.c
@@ -897,8 +897,8 @@ done:
 __dead static void
 usage_cleanup(void)
 {
-	fprintf(stderr, "usage: %s cleanup [-n] [-r repository-path] [-q]\n",
-	    getprogname());
+	fprintf(stderr, "usage: %s cleanup [-p] [-n] [-r repository-path] "
+	    "[-q]\n", getprogname());
 	exit(1);
 }
 
@@ -959,14 +959,39 @@ cleanup_progress(void *arg, int nloose, int ncommits, 
 	return NULL;
 }
 
+struct got_lonely_packidx_progress_arg {
+	int verbosity;
+	int printed_something;
+	int dry_run;
+};
+
 static const struct got_error *
+lonely_packidx_progress(void *arg, const char *path)
+{
+	struct got_lonely_packidx_progress_arg *a = arg;
+
+	if (a->verbosity < 0)
+		return NULL;
+
+	if (a->dry_run)
+		printf("%s could be removed\n", path);
+	else
+		printf("%s removed\n", path);
+
+	a->printed_something = 1;
+	return NULL;
+}
+
+static const struct got_error *
 cmd_cleanup(int argc, char *argv[])
 {
 	const struct got_error *error = NULL;
 	char *cwd = NULL, *repo_path = NULL;
 	struct got_repository *repo = NULL;
 	int ch, dry_run = 0, npacked = 0, verbosity = 0;
+	int remove_lonely_packidx = 0;
 	struct got_cleanup_progress_arg cpa;
+	struct got_lonely_packidx_progress_arg lpa;
 	off_t size_before, size_after;
 	char scaled_before[FMT_SCALED_STRSIZE];
 	char scaled_after[FMT_SCALED_STRSIZE];
@@ -974,8 +999,11 @@ cmd_cleanup(int argc, char *argv[])
 	char **extensions;
 	int nextensions, i;
 
-	while ((ch = getopt(argc, argv, "r:nq")) != -1) {
+	while ((ch = getopt(argc, argv, "pr:nq")) != -1) {
 		switch (ch) {
+		case 'p':
+			remove_lonely_packidx = 1;
+			break;
 		case 'r':
 			repo_path = realpath(optarg, NULL);
 			if (repo_path == NULL)
@@ -1028,6 +1056,15 @@ cmd_cleanup(int argc, char *argv[])
 		}
 	}
 
+	if (remove_lonely_packidx) {
+		memset(&lpa, 0, sizeof(lpa));
+		lpa.dry_run = dry_run;
+		lpa.verbosity = verbosity;
+		error = got_repo_remove_lonely_packidx(repo, dry_run,
+		    lonely_packidx_progress, &lpa, check_cancelled, NULL);
+		goto done;
+	}
+
 	memset(&cpa, 0, sizeof(cpa));
 	cpa.last_ncommits = -1;
 	cpa.last_npurged = -1;
blob - 851d4ad4128c5bcac0cd5616088fb5063844a83f
blob + 48165fc4dfb98dc55aa9bb49ceea9ff107e8fc26
--- include/got_error.h
+++ include/got_error.h
@@ -146,6 +146,7 @@
 #define GOT_ERR_BAD_SYMLINK	129
 #define GOT_ERR_GIT_REPO_EXT	130
 #define GOT_ERR_CANNOT_PACK	131
+#define GOT_ERR_LONELY_PACKIDX	132
 
 static const struct got_error {
 	int code;
@@ -299,6 +300,7 @@ static const struct got_error {
 	    "version control" },
 	{ GOT_ERR_GIT_REPO_EXT, "unsupported repository format extension" },
 	{ GOT_ERR_CANNOT_PACK, "not enough objects to pack" },
+	{ GOT_ERR_LONELY_PACKIDX, "pack index has no corresponding pack file" },
 };
 
 /*
blob - 99fa6426ad538680697e303f69032b8c33f9266a
blob + d1e9be3e619d56a5e9e4569edfdc2cf9071ae49c
--- include/got_repository_admin.h
+++ include/got_repository_admin.h
@@ -85,3 +85,13 @@ got_repo_purge_unreferenced_loose_objects(struct got_r
     off_t *size_before, off_t *size_after, int *npacked, int dry_run,
     got_cleanup_progress_cb progress_cb, void *progress_arg,
     got_cancel_cb cancel_cb, void *cancel_arg);
+
+/* A callback function which gets invoked with cleanup information to print. */
+typedef const struct got_error *(*got_lonely_packidx_progress_cb)(void *arg,
+    const char *path);
+
+/* Remove pack index files which do not have a corresponding pack file. */
+const struct got_error *
+got_repo_remove_lonely_packidx(struct got_repository *repo, int dry_run,
+    got_lonely_packidx_progress_cb progress_cb, void *progress_arg,
+    got_cancel_cb cancel_cb, void *cancel_arg);
blob - e26649f47637e11dd9477fd50158d147bac28aad
blob + 7f21b68ba60596d4c90ebe7b6ea9618f08bb68f4
--- lib/got_lib_repository.h
+++ lib/got_lib_repository.h
@@ -93,6 +93,7 @@ const struct got_error*got_repo_cache_tag(struct got_r
     struct got_object_id *, struct got_tag_object *);
 struct got_tag_object *got_repo_get_cached_tag(struct got_repository *,
     struct got_object_id *);
+int got_repo_is_packidx_filename(const char *, size_t);
 const struct got_error *got_repo_search_packidx(struct got_packidx **, int *,
     struct got_repository *, struct got_object_id *);
 const struct got_error *got_repo_cache_pack(struct got_pack **,
blob - 9bb2b1a4ab231495841a9af5bbea8d25a8d485fa
blob + 9d93bee5d0a3def590556db70197db9b3a2b4651
--- lib/pack.c
+++ lib/pack.c
@@ -332,34 +332,55 @@ got_packidx_open(struct got_packidx **packidx,
     int dir_fd, const char *relpath, int verify)
 {
 	const struct got_error *err = NULL;
-	struct got_packidx *p;
+	struct got_packidx *p = NULL;
+	char *pack_relpath;
 	struct stat sb;
 
 	*packidx = NULL;
 
+	err = got_packidx_get_packfile_path(&pack_relpath, relpath);
+	if (err)
+		return err;
+
+	/*
+	 * Ensure that a corresponding pack file exists.
+	 * Some Git repositories have this problem. Git seems to ignore
+	 * the existence of lonely pack index files but we do not.
+	 */
+	if (fstatat(dir_fd, pack_relpath, &sb, 0) == -1) {
+		if (errno == ENOENT) {
+			err = got_error_fmt(GOT_ERR_LONELY_PACKIDX,
+			    "%s", relpath);
+		} else
+			err = got_error_from_errno2("fstatat", pack_relpath);
+		goto done;
+	}
+
 	p = calloc(1, sizeof(*p));
-	if (p == NULL)
-		return got_error_from_errno("calloc");
+	if (p == NULL) {
+		err = got_error_from_errno("calloc");
+		goto done;
+	}
 
 	p->fd = openat(dir_fd, relpath, O_RDONLY | O_NOFOLLOW);
 	if (p->fd == -1) {
 		err = got_error_from_errno2("openat", relpath);
 		free(p);
-		return err;
+		goto done;
 	}
 
 	if (fstat(p->fd, &sb) != 0) {
 		err = got_error_from_errno2("fstat", relpath);
 		close(p->fd);
 		free(p);
-		return err;
+		goto done;
 	}
 	p->len = sb.st_size;
 	if (p->len < sizeof(p->hdr)) {
 		err = got_error(GOT_ERR_BAD_PACKIDX);
 		close(p->fd);
 		free(p);
-		return err;
+		goto done;
 	}
 
 	p->path_packidx = strdup(relpath);
@@ -381,11 +402,12 @@ got_packidx_open(struct got_packidx **packidx,
 
 	err = got_packidx_init_hdr(p, verify);
 done:
-	if (err)
-		got_packidx_close(p);
-	else
+	if (err) {
+		if (p)
+			got_packidx_close(p);
+	} else
 		*packidx = p;
-
+	free(pack_relpath);
 	return err;
 }
 
blob - 15c0782632559de89351d64aec03314766fc7a79
blob + 3d857e3a7784f358729bd0b93b349fa77eaa1c10
--- lib/repository.c
+++ lib/repository.c
@@ -938,8 +938,8 @@ cache_packidx(struct got_repository *repo, struct got_
 	return NULL;
 }
 
-static int
-is_packidx_filename(const char *name, size_t len)
+int
+got_repo_is_packidx_filename(const char *name, size_t len)
 {
 	if (len != GOT_PACKIDX_NAMELEN)
 		return 0;
@@ -1008,7 +1008,7 @@ got_repo_search_packidx(struct got_packidx **packidx, 
 	while ((dent = readdir(packdir)) != NULL) {
 		int is_cached = 0;
 
-		if (!is_packidx_filename(dent->d_name, dent->d_namlen))
+		if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen))
 			continue;
 
 		if (asprintf(&path_packidx, "%s/%s", GOT_OBJECTS_PACK_DIR,
@@ -1276,7 +1276,7 @@ match_packed_object(struct got_object_id **unique_id,
 		struct got_packidx *packidx;
 		struct got_object_qid *qid;
 
-		if (!is_packidx_filename(dent->d_name, dent->d_namlen))
+		if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen))
 			continue;
 
 		if (asprintf(&path_packidx, "%s/%s", GOT_OBJECTS_PACK_DIR,
@@ -1952,7 +1952,7 @@ got_repo_get_packfile_info(int *npackfiles, int *nobje
 	}
 
 	while ((dent = readdir(packdir)) != NULL) {
-		if (!is_packidx_filename(dent->d_name, dent->d_namlen))
+		if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen))
 			continue;
 
 		if (asprintf(&path_packidx, "%s/%s", GOT_OBJECTS_PACK_DIR,
blob - c5017d50e0b3fa7053f7d9159a76ee0467cf926a
blob + 42e4e1a923ac6b0970e0e49aaa75dcb4458deb0c
--- lib/repository_admin.c
+++ lib/repository_admin.c
@@ -1157,3 +1157,103 @@ done:
 	got_object_idset_free(traversed_ids);
 	return err;
 }
+
+static const struct got_error *
+remove_packidx(int dir_fd, const char *relpath)
+{
+	const struct got_error *err, *unlock_err;
+	struct got_lockfile *lf;
+
+	err = got_lockfile_lock(&lf, relpath, dir_fd);
+	if (err)
+		return err;
+	if (unlinkat(dir_fd, relpath, 0) == -1)
+		err = got_error_from_errno("unlinkat");
+	unlock_err = got_lockfile_unlock(lf, dir_fd);
+	return err ? err : unlock_err;
+}
+
+const struct got_error *
+got_repo_remove_lonely_packidx(struct got_repository *repo, int dry_run,
+    got_lonely_packidx_progress_cb progress_cb, void *progress_arg,
+    got_cancel_cb cancel_cb, void *cancel_arg)
+{
+	const struct got_error *err;
+	DIR *packdir = NULL;
+	struct dirent *dent;
+	char *pack_relpath = NULL;
+	int packdir_fd;
+	struct stat sb;
+
+	packdir_fd = openat(got_repo_get_fd(repo),
+	    GOT_OBJECTS_PACK_DIR, O_DIRECTORY);
+	if (packdir_fd == -1) {
+		if (errno == ENOENT)
+			return NULL;
+		return got_error_from_errno_fmt("openat: %s/%s",
+		    got_repo_get_path_git_dir(repo),
+		    GOT_OBJECTS_PACK_DIR);
+	}
+
+	packdir = fdopendir(packdir_fd);
+	if (packdir == NULL) {
+		err = got_error_from_errno("fdopendir");
+		goto done;
+	}
+
+	while ((dent = readdir(packdir)) != NULL) {
+		if (cancel_cb) {
+			err = cancel_cb(cancel_arg);
+			if (err)
+				goto done;
+		}
+
+		if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen))
+			continue;
+
+		err = got_packidx_get_packfile_path(&pack_relpath,
+		    dent->d_name);
+		if (err)
+			goto done;
+
+		if (fstatat(packdir_fd, pack_relpath, &sb, 0) != -1) {
+			free(pack_relpath);
+			pack_relpath = NULL;
+			continue;
+		}
+		if (errno != ENOENT) {
+			err = got_error_from_errno_fmt("fstatat: %s/%s/%s",
+			    got_repo_get_path_git_dir(repo),
+			    GOT_OBJECTS_PACK_DIR,
+			    pack_relpath);
+			goto done;
+		}
+
+		if (!dry_run) {
+			err = remove_packidx(packdir_fd, dent->d_name);
+			if (err)
+				goto done;
+		}
+		if (progress_cb) {
+			char *path;
+			if (asprintf(&path, "%s/%s/%s",
+			    got_repo_get_path_git_dir(repo),
+			    GOT_OBJECTS_PACK_DIR,
+			    dent->d_name) == -1) {
+				err = got_error_from_errno("asprintf");
+				goto done;
+			}
+			err = progress_cb(progress_arg, path);
+			free(path);
+			if (err)
+				goto done;
+		}
+		free(pack_relpath);
+		pack_relpath = NULL;
+	}
+done:
+	if (packdir && closedir(packdir) != 0 && err == NULL)
+		err = got_error_from_errno("closedir");
+	free(pack_relpath);
+	return err;
+}
blob - b39f8ac5681099dac17303be8a1288dc93f8b2f1
blob + aa49076f7354432812d339a4a4f80a670dbbcacc
--- regress/cmdline/cleanup.sh
+++ regress/cmdline/cleanup.sh
@@ -264,7 +264,103 @@ test_cleanup_precious_objects() {
 	test_done "$testroot" "$ret"
 }
 
+test_cleanup_missing_pack_file() {
+	local testroot=`test_init cleanup_missing_pack_file`
+
+	# no pack files should exist yet
+	ls $testroot/repo/.git/objects/pack/ > $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+	echo -n > $testroot/stdout.expected
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	gotadmin pack -r $testroot/repo > $testroot/stdout
+	packname=`grep ^Wrote $testroot/stdout | cut -d ' ' -f2`
+	packhash=`echo $packname | sed -e 's:^objects/pack/pack-::' \
+		-e 's/.pack$//'`
+
+	# Some freshly cloned Git repositories suffer from lonely pack index
+	# files. Remove the pack file we just wrote to simulate this issue.
+	rm $testroot/repo/.git/objects/pack/pack-$packname
+
+	# cleanup should now refuse to purge objects
+	gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \
+		2> $testroot/stderr
+	ret="$?"
+	if [ "$ret" == "0" ]; then
+		echo "gotadmin cleanup succeeded unexpectedly" >&2
+		test_done "$testroot" "1"
+		return 1
+	fi
+
+	echo -n "gotadmin: objects/pack/pack-${packhash}.idx: " \
+		> $testroot/stderr.expected
+	echo "pack index has no corresponding pack file" \
+		>> $testroot/stderr.expected
+	cmp -s $testroot/stderr.expected $testroot/stderr
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stderr.expected $testroot/stderr
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	gotadmin cleanup -r $testroot/repo -p -n > $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		echo "gotadmin cleanup failed unexpectedly" >&2
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+	packidx_path=$testroot/repo/.git/objects/pack/pack-${packhash}.idx
+	echo "$packidx_path could be removed" > $testroot/stdout.expected
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	gotadmin cleanup -r $testroot/repo -p > $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		echo "gotadmin cleanup failed unexpectedly" >&2
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+	echo "$packidx_path removed" > $testroot/stdout.expected
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	# cleanup should now attempt to purge objects
+	gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \
+		2> $testroot/stderr
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		echo "gotadmin cleanup failed unexpectedly" >&2
+		test_done "$testroot" "1"
+		return 1
+	fi
+	test_done "$testroot" "$ret"
+}
+
 test_parseargs "$@"
 run_test test_cleanup_unreferenced_loose_objects
 run_test test_cleanup_redundant_loose_objects
 run_test test_cleanup_precious_objects
+run_test test_cleanup_missing_pack_file