From: Stefan Sperling Subject: fix support for pack files larger than 2GB To: gameoftrees@openbsd.org Date: Fri, 8 Nov 2019 23:29:29 +0200 mpi@ tried to browse a clone of a NetBSD Got repository from github. This failed with: tog: pack index file checksum error Our support for pack files larger than 2GB is broken. $ du -hs /mnt/netbsd/.git/objects/pack/pack-21b791c37f7f51a9c6bda1daf396e3c0e9068041.* 93.5M /mnt/netbsd/.git/objects/pack/pack-21b791c37f7f51a9c6bda1daf396e3c0e9068041.idx 3.6G /mnt/netbsd/.git/objects/pack/pack-21b791c37f7f51a9c6bda1daf396e3c0e9068041.pack The regular offset table in the pack index consists of entries which are 32-bit wide. These entries store offsets of objects stored within the first 2GB of the pack file verbatim. For objects stored beyond the 2 GB range, these entries have the high bit set and specify another index into a secondary "large offsets" table, which uses 64-bit wide items to represent the actual offsets beyond 2GB. There are two problems: 1) Our check for size > 2GB is performed on the .idx file's size, instead of on the .pack file's size as it should have been. 2) The code assumes that the large offset table has the same size as the main offset table. But this is only true if all entries in the main index table are indices into the secondary table. To fix both issues we can count the number of large offsets in the main index and size the large offset table accordingly. This patch allows me to browse the repository. ok? diff 70015d7a0e09198dfe1d24d340818d8769ff6ab8 /home/stsp/src/got blob - 7e7c69769bca88c55404098a3ff10d7603f937df file + lib/got_lib_pack.h --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -89,6 +89,7 @@ struct got_packidx { int fd; uint8_t *map; size_t len; + size_t nlargeobj; struct got_packidx_v2_hdr hdr; /* convenient pointers into map */ }; blob - b6f49c5941537ece4825344a137f3cbe7ac37e24 file + lib/pack.c --- lib/pack.c +++ lib/pack.c @@ -75,6 +75,7 @@ got_packidx_init_hdr(struct got_packidx *p, int verify uint8_t sha1[SHA1_DIGEST_LENGTH]; size_t nobj, len_fanout, len_ids, offset, remain; ssize_t n; + int i; SHA1Init(&ctx); @@ -255,35 +256,41 @@ got_packidx_init_hdr(struct got_packidx *p, int verify offset += nobj * sizeof(*h->offsets); /* Large file offsets are contained only in files > 2GB. */ - if (p->len <= 0x80000000) + for (i = 0; i < nobj; i++) { + uint32_t o = betoh32(h->offsets[i]); + if (o & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) + p->nlargeobj++; + } + if (p->nlargeobj == 0) goto checksum; - if (remain < nobj * sizeof(*h->large_offsets)) { + if (remain < p->nlargeobj * sizeof(*h->large_offsets)) { err = got_error(GOT_ERR_BAD_PACKIDX); goto done; } if (p->map) h->large_offsets = (uint64_t *)((uint8_t*)(p->map + offset)); else { - h->large_offsets = malloc(nobj * sizeof(*h->large_offsets)); + h->large_offsets = malloc(p->nlargeobj * + sizeof(*h->large_offsets)); if (h->large_offsets == NULL) { err = got_error_from_errno("malloc"); goto done; } n = read(p->fd, h->large_offsets, - nobj * sizeof(*h->large_offsets)); + p->nlargeobj * sizeof(*h->large_offsets)); if (n < 0) err = got_error_from_errno("read"); - else if (n != nobj * sizeof(*h->large_offsets)) { + else if (n != p->nlargeobj * sizeof(*h->large_offsets)) { err = got_error(GOT_ERR_BAD_PACKIDX); goto done; } } if (verify) SHA1Update(&ctx, (uint8_t*)h->large_offsets, - nobj * sizeof(*h->large_offsets)); - remain -= nobj * sizeof(*h->large_offsets); - offset += nobj * sizeof(*h->large_offsets); + p->nlargeobj * sizeof(*h->large_offsets)); + remain -= p->nlargeobj * sizeof(*h->large_offsets); + offset += p->nlargeobj * sizeof(*h->large_offsets); checksum: if (remain < sizeof(*h->trailer)) { @@ -408,12 +415,11 @@ got_packidx_close(struct got_packidx *packidx) static off_t get_object_offset(struct got_packidx *packidx, int idx) { - uint32_t totobj = betoh32(packidx->hdr.fanout_table[0xff]); uint32_t offset = betoh32(packidx->hdr.offsets[idx]); if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) { uint64_t loffset; idx = offset & GOT_PACKIDX_OFFSET_VAL_MASK; - if (idx < 0 || idx > totobj || + if (idx < 0 || idx >= packidx->nlargeobj || packidx->hdr.large_offsets == NULL) return -1; loffset = betoh64(packidx->hdr.large_offsets[idx]);