"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
fix support for pack files larger than 2GB
To:
gameoftrees@openbsd.org
Date:
Fri, 8 Nov 2019 23:29:29 +0200

Download raw body.

Thread
mpi@ tried to browse a clone of a NetBSD Got repository from github.
This failed with:  tog: pack index file checksum error

Our support for pack files larger than 2GB is broken.

$ du -hs /mnt/netbsd/.git/objects/pack/pack-21b791c37f7f51a9c6bda1daf396e3c0e9068041.*
93.5M	/mnt/netbsd/.git/objects/pack/pack-21b791c37f7f51a9c6bda1daf396e3c0e9068041.idx
3.6G	/mnt/netbsd/.git/objects/pack/pack-21b791c37f7f51a9c6bda1daf396e3c0e9068041.pack

The regular offset table in the pack index consists of entries which
are 32-bit wide. These entries store offsets of objects stored within
the first 2GB of the pack file verbatim. For objects stored beyond the
2 GB range, these entries have the high bit set and specify another index
into a secondary "large offsets" table, which uses 64-bit wide items to
represent the actual offsets beyond 2GB.

There are two problems:

1) Our check for size > 2GB is performed on the .idx file's size,
instead of on the .pack file's size as it should have been.

2) The code assumes that the large offset table has the same size as
the main offset table. But this is only true if all entries in the
main index table are indices into the secondary table.

To fix both issues we can count the number of large offsets in the
main index and size the large offset table accordingly.
This patch allows me to browse the repository.

ok?

diff 70015d7a0e09198dfe1d24d340818d8769ff6ab8 /home/stsp/src/got
blob - 7e7c69769bca88c55404098a3ff10d7603f937df
file + lib/got_lib_pack.h
--- lib/got_lib_pack.h
+++ lib/got_lib_pack.h
@@ -89,6 +89,7 @@ struct got_packidx {
 	int fd;
 	uint8_t *map;
 	size_t len;
+	size_t nlargeobj;
 	struct got_packidx_v2_hdr hdr; /* convenient pointers into map */
 };
 
blob - b6f49c5941537ece4825344a137f3cbe7ac37e24
file + lib/pack.c
--- lib/pack.c
+++ lib/pack.c
@@ -75,6 +75,7 @@ got_packidx_init_hdr(struct got_packidx *p, int verify
 	uint8_t sha1[SHA1_DIGEST_LENGTH];
 	size_t nobj, len_fanout, len_ids, offset, remain;
 	ssize_t n;
+	int i;
 
 	SHA1Init(&ctx);
 
@@ -255,35 +256,41 @@ got_packidx_init_hdr(struct got_packidx *p, int verify
 	offset += nobj * sizeof(*h->offsets);
 
 	/* Large file offsets are contained only in files > 2GB. */
-	if (p->len <= 0x80000000)
+	for (i = 0; i < nobj; i++) {
+		uint32_t o = betoh32(h->offsets[i]);
+		if (o & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX)
+			p->nlargeobj++;
+	}
+	if (p->nlargeobj == 0)
 		goto checksum;
 
-	if (remain < nobj * sizeof(*h->large_offsets)) {
+	if (remain < p->nlargeobj * sizeof(*h->large_offsets)) {
 		err = got_error(GOT_ERR_BAD_PACKIDX);
 		goto done;
 	}
 	if (p->map)
 		h->large_offsets = (uint64_t *)((uint8_t*)(p->map + offset));
 	else {
-		h->large_offsets = malloc(nobj * sizeof(*h->large_offsets));
+		h->large_offsets = malloc(p->nlargeobj *
+		    sizeof(*h->large_offsets));
 		if (h->large_offsets == NULL) {
 			err = got_error_from_errno("malloc");
 			goto done;
 		}
 		n = read(p->fd, h->large_offsets,
-		    nobj * sizeof(*h->large_offsets));
+		    p->nlargeobj * sizeof(*h->large_offsets));
 		if (n < 0)
 			err = got_error_from_errno("read");
-		else if (n != nobj * sizeof(*h->large_offsets)) {
+		else if (n != p->nlargeobj * sizeof(*h->large_offsets)) {
 			err = got_error(GOT_ERR_BAD_PACKIDX);
 			goto done;
 		}
 	}
 	if (verify)
 		SHA1Update(&ctx, (uint8_t*)h->large_offsets,
-		    nobj * sizeof(*h->large_offsets));
-	remain -= nobj * sizeof(*h->large_offsets);
-	offset += nobj * sizeof(*h->large_offsets);
+		    p->nlargeobj * sizeof(*h->large_offsets));
+	remain -= p->nlargeobj * sizeof(*h->large_offsets);
+	offset += p->nlargeobj * sizeof(*h->large_offsets);
 
 checksum:
 	if (remain < sizeof(*h->trailer)) {
@@ -408,12 +415,11 @@ got_packidx_close(struct got_packidx *packidx)
 static off_t
 get_object_offset(struct got_packidx *packidx, int idx)
 {
-	uint32_t totobj = betoh32(packidx->hdr.fanout_table[0xff]);
 	uint32_t offset = betoh32(packidx->hdr.offsets[idx]);
 	if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) {
 		uint64_t loffset;
 		idx = offset & GOT_PACKIDX_OFFSET_VAL_MASK;
-		if (idx < 0 || idx > totobj ||
+		if (idx < 0 || idx >= packidx->nlargeobj ||
 		    packidx->hdr.large_offsets == NULL)
 			return -1;
 		loffset = betoh64(packidx->hdr.large_offsets[idx]);