"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
error out when searching a pack index which lacks a corresponding pack file
To:
gameoftrees@openbsd.org
Date:
Sun, 4 Jul 2021 12:40:28 +0200

Download raw body.

Thread
Some Git repository clones created by Git can somehow end up with pack index
files that lack a corresponding pack file. This came up on the #gameoftrees
IRC channel recently where the repository was cloned by git clone --bare
from a https:// URL.

Git seems to silently ignore this problem. I think we should be treating
this as a repository corruption issue because a lonely index should never
appear intentionally.
A pack file without an index is valid, but an index without a pack file can
never be valid. When removing a pack file the corresponding index should
always be removed first before the pack file gets removed.

What happens now is that Got searches the pack index for an object ID,
and if this ID is found we will eventually error out when we try to
open the corresponding pack file: "foo.pack: No such file or directory".

With this patch we detect the problem earlier, before the pack index is
searched. And we provide a more specific (albeit much longer) error message:

 pack index objects/pack/pack-${packhash}.idx has no corresponding pack file:
 bad pack index file

ok?

diff 9cbca905d8eed2582382eb9c570e0fb8c3837916 cb491a6e72fe7e672232c86fdec1a3402a41d3fb
blob - 5f8dfb856fa6b279f5a7d058b7334047642919af
blob + 15ffdf9b08e4cbaeaf08bb3c1b31ec280c049cde
--- lib/pack.c
+++ lib/pack.c
@@ -327,39 +327,86 @@ done:
 	return err;
 }
 
+static const struct got_error *
+get_packfile_path(char **path_packfile, const char *path_packidx)
+{
+	size_t size;
+
+	/* Packfile path contains ".pack" instead of ".idx", so add one byte. */
+	size = strlen(path_packidx) + 2;
+	if (size < GOT_PACKFILE_NAMELEN + 1)
+		return got_error_path(path_packidx, GOT_ERR_BAD_PATH);
+
+	*path_packfile = malloc(size);
+	if (*path_packfile == NULL)
+		return got_error_from_errno("malloc");
+
+	/* Copy up to and excluding ".idx". */
+	if (strlcpy(*path_packfile, path_packidx,
+	    size - strlen(GOT_PACKIDX_SUFFIX) - 1) >= size)
+		return got_error(GOT_ERR_NO_SPACE);
+
+	if (strlcat(*path_packfile, GOT_PACKFILE_SUFFIX, size) >= size)
+		return got_error(GOT_ERR_NO_SPACE);
+
+	return NULL;
+}
+
 const struct got_error *
 got_packidx_open(struct got_packidx **packidx,
     int dir_fd, const char *relpath, int verify)
 {
 	const struct got_error *err = NULL;
-	struct got_packidx *p;
+	struct got_packidx *p = NULL;
+	char *pack_relpath;
 	struct stat sb;
 
 	*packidx = NULL;
 
+	err = get_packfile_path(&pack_relpath, relpath);
+	if (err)
+		return err;
+
+	/*
+	 * Ensure that a corresponding pack file exists.
+	 * Some Git repositories have this problem. Git seems to ignore
+	 * the existence of lonely pack index files but we do not.
+	 */
+	if (fstatat(dir_fd, pack_relpath, &sb, 0) == -1) {
+		if (errno == ENOENT) {
+			err = got_error_fmt(GOT_ERR_BAD_PACKIDX,
+			    "pack index %s has no corresponding pack file",
+			    relpath);
+		} else
+			err = got_error_from_errno2("fstatat", pack_relpath);
+		goto done;
+	}
+
 	p = calloc(1, sizeof(*p));
-	if (p == NULL)
-		return got_error_from_errno("calloc");
+	if (p == NULL) {
+		err = got_error_from_errno("calloc");
+		goto done;
+	}
 
 	p->fd = openat(dir_fd, relpath, O_RDONLY | O_NOFOLLOW);
 	if (p->fd == -1) {
 		err = got_error_from_errno2("openat", relpath);
 		free(p);
-		return err;
+		goto done;
 	}
 
 	if (fstat(p->fd, &sb) != 0) {
 		err = got_error_from_errno2("fstat", relpath);
 		close(p->fd);
 		free(p);
-		return err;
+		goto done;
 	}
 	p->len = sb.st_size;
 	if (p->len < sizeof(p->hdr)) {
 		err = got_error(GOT_ERR_BAD_PACKIDX);
 		close(p->fd);
 		free(p);
-		return err;
+		goto done;
 	}
 
 	p->path_packidx = strdup(relpath);
@@ -381,11 +428,12 @@ got_packidx_open(struct got_packidx **packidx,
 
 	err = got_packidx_init_hdr(p, verify);
 done:
-	if (err)
-		got_packidx_close(p);
-	else
+	if (err) {
+		if (p)
+			got_packidx_close(p);
+	} else
 		*packidx = p;
-
+	free(pack_relpath);
 	return err;
 }
 
@@ -418,26 +466,7 @@ got_packidx_close(struct got_packidx *packidx)
 const struct got_error *
 got_packidx_get_packfile_path(char **path_packfile, struct got_packidx *packidx)
 {
-	size_t size;
-
-	/* Packfile path contains ".pack" instead of ".idx", so add one byte. */
-	size = strlen(packidx->path_packidx) + 2;
-	if (size < GOT_PACKFILE_NAMELEN + 1)
-		return got_error_path(packidx->path_packidx, GOT_ERR_BAD_PATH);
-
-	*path_packfile = malloc(size);
-	if (*path_packfile == NULL)
-		return got_error_from_errno("malloc");
-
-	/* Copy up to and excluding ".idx". */
-	if (strlcpy(*path_packfile, packidx->path_packidx,
-	    size - strlen(GOT_PACKIDX_SUFFIX) - 1) >= size)
-		return got_error(GOT_ERR_NO_SPACE);
-
-	if (strlcat(*path_packfile, GOT_PACKFILE_SUFFIX, size) >= size)
-		return got_error(GOT_ERR_NO_SPACE);
-
-	return NULL;
+	return get_packfile_path(path_packfile, packidx->path_packidx);
 }
 
 off_t
blob - b55cfe1aebef1d3a2fc7fc8cc6f5858e4ffa124b
blob + 56b044beb6e0f4a53bac6bd0bbc61ea64c40909e
--- regress/cmdline/cleanup.sh
+++ regress/cmdline/cleanup.sh
@@ -261,7 +261,58 @@ test_cleanup_precious_objects() {
 	test_done "$testroot" "$ret"
 }
 
+test_cleanup_missing_pack_file() {
+	local testroot=`test_init cleanup_missing_pack_file`
+
+	# no pack files should exist yet
+	ls $testroot/repo/.git/objects/pack/ > $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+	echo -n > $testroot/stdout.expected
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	gotadmin pack -r $testroot/repo > $testroot/stdout
+	packname=`grep ^Wrote $testroot/stdout | cut -d ' ' -f2`
+	packhash=`echo $packname | sed -e 's:^objects/pack/pack-::' \
+		-e 's/.pack$//'`
+
+	# Some freshly cloned Git repositories suffer from lonely pack index
+	# files. Remove the pack file we just wrote to simulate this issue.
+	rm $testroot/repo/.git/objects/pack/pack-$packname
+
+	# cleanup should now refuse to purge objects
+	gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \
+		2> $testroot/stderr
+	ret="$?"
+	if [ "$ret" == "0" ]; then
+		echo "gotadmin cleanup succeeded unexpectedly" >&2
+		test_done "$testroot" "1"
+		return 1
+	fi
+
+	echo -n "gotadmin: pack index objects/pack/pack-${packhash}.idx " \
+		> $testroot/stderr.expected
+	echo "has no corresponding pack file: bad pack index file" \
+		>> $testroot/stderr.expected
+	cmp -s $testroot/stderr.expected $testroot/stderr
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stderr.expected $testroot/stderr
+	fi
+	test_done "$testroot" "$ret"
+}
+
 test_parseargs "$@"
 run_test test_cleanup_unreferenced_loose_objects
 run_test test_cleanup_redundant_loose_objects
 run_test test_cleanup_precious_objects
+run_test test_cleanup_missing_pack_file