From: Stefan Sperling Subject: track pack file size as off_t and size_t To: gameoftrees@openbsd.org Date: Thu, 17 Dec 2020 17:59:39 +0100 Insprired by Ed Maste's size_t changes, this patch attempts to tidy up our tracking of the size of a pack file. Note that pack files are expected to become very large. It doesn't affect repositories I usually work with, but pack files above 4 GB certainly exist. One problem is that we're currently storing the pack file size in a size_t variable. This could be problematic if a large pack file is used on an i386 machine, for instance. With the patch below, we keep separate variables for the on-disk and memory-mapped sizes which fixes off_t/size_t confusion in some places. Furthermore, we only attempt to map files which will fit into a size_t. ok? diff e8f0f2f60d1eb5651a01b13607ca04014f092753 ea75a347556ace162546e9c7f6141c791f40bee7 blob - 39aa4333097646e6b90b09899df9869839a65c43 blob + b65dd0c1423c98241b8fe716b7a1c4c586f9b58a --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -19,7 +19,8 @@ struct got_pack { char *path_packfile; int fd; uint8_t *map; - size_t filesize; + off_t filesize; + size_t mapsize; struct got_privsep_child *privsep_child; struct got_delta_cache *delta_cache; }; blob - b46583a6e3ce2fe952e1c30726026d492c7f5734 blob + b6d955fd156a0173b16ff02c22be3ad38055f2e9 --- lib/pack.c +++ lib/pack.c @@ -538,7 +538,7 @@ got_pack_close(struct got_pack *pack) const struct got_error *err = NULL; err = got_pack_stop_privsep_child(pack); - if (pack->map && munmap(pack->map, pack->filesize) == -1 && !err) + if (pack->map && munmap(pack->map, pack->mapsize) == -1 && !err) err = got_error_from_errno("munmap"); if (pack->fd != -1 && close(pack->fd) != 0 && err == NULL) err = got_error_from_errno("close"); @@ -546,6 +546,7 @@ got_pack_close(struct got_pack *pack) free(pack->path_packfile); pack->path_packfile = NULL; pack->filesize = 0; + pack->mapsize = 0; if (pack->delta_cache) { got_delta_cache_free(pack->delta_cache); pack->delta_cache = NULL; @@ -700,7 +701,7 @@ got_pack_parse_offset_delta(off_t *base_offset, size_t static const struct got_error * read_delta_data(uint8_t **delta_buf, size_t *delta_len, - size_t delta_data_offset, struct got_pack *pack) + off_t delta_data_offset, struct got_pack *pack) { const struct got_error *err = NULL; @@ -708,8 +709,8 @@ read_delta_data(uint8_t **delta_buf, size_t *delta_len if (delta_data_offset >= pack->filesize) return got_error(GOT_ERR_PACK_OFFSET); err = got_inflate_to_mem_mmap(delta_buf, delta_len, - NULL, NULL, pack->map, delta_data_offset, - pack->filesize - delta_data_offset); + NULL, NULL, pack->map, (size_t)delta_data_offset, + pack->mapsize - (size_t)delta_data_offset); } else { if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) return got_error_from_errno("lseek"); @@ -796,7 +797,7 @@ resolve_ref_delta(struct got_delta_chain *deltas, stru size_t base_tslen; off_t delta_data_offset; - if (delta_offset + tslen >= pack->filesize) + if (delta_offset + (off_t)tslen >= pack->filesize) return got_error(GOT_ERR_PACK_OFFSET); if (pack->map) { @@ -1084,7 +1085,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, mapoff = (size_t)delta_data_offset; err = got_inflate_to_file_mmap( &base_bufsz, NULL, NULL, pack->map, - mapoff, pack->filesize - mapoff, + mapoff, pack->mapsize - mapoff, base_file); } else err = got_inflate_to_file_fd( @@ -1096,7 +1097,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, err = got_inflate_to_mem_mmap(&base_buf, &base_bufsz, NULL, NULL, pack->map, mapoff, - pack->filesize - mapoff); + pack->mapsize - mapoff); } else err = got_inflate_to_mem_fd(&base_buf, &base_bufsz, NULL, NULL, max_size, @@ -1219,7 +1220,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz SIMPLEQ_FOREACH(delta, &deltas->entries, entry) { int cached = 1; if (n == 0) { - size_t delta_data_offset; + off_t delta_data_offset; /* Plain object types are the delta base. */ if (delta->type != GOT_OBJ_TYPE_COMMIT && @@ -1239,7 +1240,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz size_t mapoff = (size_t)delta_data_offset; err = got_inflate_to_mem_mmap(&base_buf, &base_bufsz, NULL, NULL, pack->map, - mapoff, pack->filesize - mapoff); + mapoff, pack->mapsize - mapoff); } else { if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) { @@ -1335,7 +1336,7 @@ got_packfile_extract_object(struct got_pack *pack, str if (pack->map) { size_t mapoff = (size_t)obj->pack_offset; err = got_inflate_to_file_mmap(&obj->size, NULL, NULL, - pack->map, mapoff, pack->filesize - mapoff, + pack->map, mapoff, pack->mapsize - mapoff, outfile); } else { if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1) blob - 7a2d2999eee396614f65ab978214ffc035a1f4c2 blob + a3699f34e7df3a124c349bdb340fada674bffe45 --- lib/repository.c +++ lib/repository.c @@ -1128,18 +1128,25 @@ got_repo_cache_pack(struct got_pack **packp, struct go goto done; } pack->filesize = sb.st_size; + if (pack->filesize <= SIZE_MAX) + pack->mapsize = pack->filesize; + else + pack->mapsize = 0; pack->privsep_child = NULL; #ifndef GOT_PACK_NO_MMAP - pack->map = mmap(NULL, pack->filesize, PROT_READ, MAP_PRIVATE, - pack->fd, 0); - if (pack->map == MAP_FAILED) { - if (errno != ENOMEM) { - err = got_error_from_errno("mmap"); - goto done; + if (pack->mapsize > 0) { + pack->map = mmap(NULL, pack->mapsize, PROT_READ, MAP_PRIVATE, + pack->fd, 0); + if (pack->map == MAP_FAILED) { + if (errno != ENOMEM) { + err = got_error_from_errno("mmap"); + goto done; + } + pack->map = NULL; /* fall back to read(2) */ + pack->mapsize = 0; } - pack->map = NULL; /* fall back to read(2) */ } #endif done: blob - bcef0951dbb771b4b70ab05125138f52e08c7b7a blob + 4670d0eff730b5875135247003683d94a88712e0 --- libexec/got-index-pack/got-index-pack.c +++ libexec/got-index-pack/got-index-pack.c @@ -265,7 +265,7 @@ read_packed_object(struct got_pack *pack, struct got_i case GOT_OBJ_TYPE_REF_DELTA: memset(obj->id.sha1, 0xff, SHA1_DIGEST_LENGTH); if (pack->map) { - if (mapoff + SHA1_DIGEST_LENGTH >= pack->filesize) { + if (mapoff + SHA1_DIGEST_LENGTH >= pack->mapsize) { err = got_error(GOT_ERR_BAD_PACKFILE); break; } @@ -636,7 +636,7 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp int p_resolved = 0, last_p_resolved = -1; /* Require that pack file header and SHA1 trailer are present. */ - if (pack->filesize < sizeof(hdr) + SHA1_DIGEST_LENGTH) + if (pack->filesize < (off_t)(sizeof(hdr) + SHA1_DIGEST_LENGTH)) return got_error_msg(GOT_ERR_BAD_PACKFILE, "short pack file"); @@ -798,7 +798,7 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp /* Verify the SHA1 checksum stored at the end of the pack file. */ if (pack->map) { memcpy(pack_sha1_expected, pack->map + - pack->filesize - SHA1_DIGEST_LENGTH, + pack->mapsize - SHA1_DIGEST_LENGTH, SHA1_DIGEST_LENGTH); } else { ssize_t n; @@ -1070,7 +1070,9 @@ main(int argc, char **argv) err = got_error_from_errno("lseek"); goto done; } - pack.filesize = packfile_size; /* XXX off_t vs size_t */ + pack.filesize = packfile_size; + if (pack.filesize <= SIZE_MAX) + pack.mapsize = pack.filesize; if (lseek(pack.fd, 0, SEEK_SET) == -1) { err = got_error_from_errno("lseek"); @@ -1078,10 +1080,18 @@ main(int argc, char **argv) } #ifndef GOT_PACK_NO_MMAP - pack.map = mmap(NULL, pack.filesize, PROT_READ, MAP_PRIVATE, - pack.fd, 0); - if (pack.map == MAP_FAILED) - pack.map = NULL; /* fall back to read(2) */ + if (pack.mapsize > 0) { + pack.map = mmap(NULL, pack.mapsize, PROT_READ, MAP_PRIVATE, + pack.fd, 0); + if (pack.map == MAP_FAILED) { + if (errno != ENOMEM) { + err = got_error_from_errno("mmap"); + goto done; + } + pack.map = NULL; /* fall back to read(2) */ + pack.mapsize = 0; + } + } #endif err = index_pack(&pack, idxfd, tmpfiles[0], tmpfiles[1], tmpfiles[2], pack_hash, &ibuf);