"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
track pack file size as off_t and size_t
To:
gameoftrees@openbsd.org
Date:
Thu, 17 Dec 2020 17:59:39 +0100

Download raw body.

Thread
Insprired by Ed Maste's size_t changes, this patch attempts to tidy
up our tracking of the size of a pack file.

Note that pack files are expected to become very large. It doesn't affect
repositories I usually work with, but pack files above 4 GB certainly exist.

One problem is that we're currently storing the pack file size in a size_t
variable. This could be problematic if a large pack file is used on an i386
machine, for instance.
 
With the patch below, we keep separate variables for the on-disk and
memory-mapped sizes which fixes off_t/size_t confusion in some places.
Furthermore, we only attempt to map files which will fit into a size_t.

ok?

diff e8f0f2f60d1eb5651a01b13607ca04014f092753 ea75a347556ace162546e9c7f6141c791f40bee7
blob - 39aa4333097646e6b90b09899df9869839a65c43
blob + b65dd0c1423c98241b8fe716b7a1c4c586f9b58a
--- lib/got_lib_pack.h
+++ lib/got_lib_pack.h
@@ -19,7 +19,8 @@ struct got_pack {
 	char *path_packfile;
 	int fd;
 	uint8_t *map;
-	size_t filesize;
+	off_t filesize;
+	size_t mapsize;
 	struct got_privsep_child *privsep_child;
 	struct got_delta_cache *delta_cache;
 };
blob - b46583a6e3ce2fe952e1c30726026d492c7f5734
blob + b6d955fd156a0173b16ff02c22be3ad38055f2e9
--- lib/pack.c
+++ lib/pack.c
@@ -538,7 +538,7 @@ got_pack_close(struct got_pack *pack)
 	const struct got_error *err = NULL;
 
 	err = got_pack_stop_privsep_child(pack);
-	if (pack->map && munmap(pack->map, pack->filesize) == -1 && !err)
+	if (pack->map && munmap(pack->map, pack->mapsize) == -1 && !err)
 		err = got_error_from_errno("munmap");
 	if (pack->fd != -1 && close(pack->fd) != 0 && err == NULL)
 		err = got_error_from_errno("close");
@@ -546,6 +546,7 @@ got_pack_close(struct got_pack *pack)
 	free(pack->path_packfile);
 	pack->path_packfile = NULL;
 	pack->filesize = 0;
+	pack->mapsize = 0;
 	if (pack->delta_cache) {
 		got_delta_cache_free(pack->delta_cache);
 		pack->delta_cache = NULL;
@@ -700,7 +701,7 @@ got_pack_parse_offset_delta(off_t *base_offset, size_t
 
 static const struct got_error *
 read_delta_data(uint8_t **delta_buf, size_t *delta_len,
-    size_t delta_data_offset, struct got_pack *pack)
+    off_t delta_data_offset, struct got_pack *pack)
 {
 	const struct got_error *err = NULL;
 
@@ -708,8 +709,8 @@ read_delta_data(uint8_t **delta_buf, size_t *delta_len
 		if (delta_data_offset >= pack->filesize)
 			return got_error(GOT_ERR_PACK_OFFSET);
 		err = got_inflate_to_mem_mmap(delta_buf, delta_len,
-		    NULL, NULL, pack->map, delta_data_offset,
-		    pack->filesize - delta_data_offset);
+		    NULL, NULL, pack->map, (size_t)delta_data_offset,
+		    pack->mapsize - (size_t)delta_data_offset);
 	} else {
 		if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1)
 			return got_error_from_errno("lseek");
@@ -796,7 +797,7 @@ resolve_ref_delta(struct got_delta_chain *deltas, stru
 	size_t base_tslen;
 	off_t delta_data_offset;
 
-	if (delta_offset + tslen >= pack->filesize)
+	if (delta_offset + (off_t)tslen >= pack->filesize)
 		return got_error(GOT_ERR_PACK_OFFSET);
 
 	if (pack->map) {
@@ -1084,7 +1085,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size,
 					mapoff = (size_t)delta_data_offset;
 					err = got_inflate_to_file_mmap(
 					    &base_bufsz, NULL, NULL, pack->map,
-					    mapoff, pack->filesize - mapoff,
+					    mapoff, pack->mapsize - mapoff,
 					    base_file);
 				} else
 					err = got_inflate_to_file_fd(
@@ -1096,7 +1097,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size,
 					err = got_inflate_to_mem_mmap(&base_buf,
 					    &base_bufsz, NULL, NULL,
 					    pack->map, mapoff,
-					    pack->filesize - mapoff);
+					    pack->mapsize - mapoff);
 				} else
 					err = got_inflate_to_mem_fd(&base_buf,
 					    &base_bufsz, NULL, NULL, max_size,
@@ -1219,7 +1220,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz
 	SIMPLEQ_FOREACH(delta, &deltas->entries, entry) {
 		int cached = 1;
 		if (n == 0) {
-			size_t delta_data_offset;
+			off_t delta_data_offset;
 
 			/* Plain object types are the delta base. */
 			if (delta->type != GOT_OBJ_TYPE_COMMIT &&
@@ -1239,7 +1240,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz
 				size_t mapoff = (size_t)delta_data_offset;
 				err = got_inflate_to_mem_mmap(&base_buf,
 				    &base_bufsz, NULL, NULL, pack->map,
-				    mapoff, pack->filesize - mapoff);
+				    mapoff, pack->mapsize - mapoff);
 			} else {
 				if (lseek(pack->fd, delta_data_offset, SEEK_SET)
 				    == -1) {
@@ -1335,7 +1336,7 @@ got_packfile_extract_object(struct got_pack *pack, str
 		if (pack->map) {
 			size_t mapoff = (size_t)obj->pack_offset;
 			err = got_inflate_to_file_mmap(&obj->size, NULL, NULL,
-			    pack->map, mapoff, pack->filesize - mapoff,
+			    pack->map, mapoff, pack->mapsize - mapoff,
 			    outfile);
 		} else {
 			if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1)
blob - 7a2d2999eee396614f65ab978214ffc035a1f4c2
blob + a3699f34e7df3a124c349bdb340fada674bffe45
--- lib/repository.c
+++ lib/repository.c
@@ -1128,18 +1128,25 @@ got_repo_cache_pack(struct got_pack **packp, struct go
 		goto done;
 	}
 	pack->filesize = sb.st_size;
+	if (pack->filesize <= SIZE_MAX)
+		pack->mapsize = pack->filesize;
+	else
+		pack->mapsize = 0;
 
 	pack->privsep_child = NULL;
 
 #ifndef GOT_PACK_NO_MMAP
-	pack->map = mmap(NULL, pack->filesize, PROT_READ, MAP_PRIVATE,
-	    pack->fd, 0);
-	if (pack->map == MAP_FAILED) {
-		if (errno != ENOMEM) {
-			err = got_error_from_errno("mmap");
-			goto done;
+	if (pack->mapsize > 0) {
+		pack->map = mmap(NULL, pack->mapsize, PROT_READ, MAP_PRIVATE,
+		    pack->fd, 0);
+		if (pack->map == MAP_FAILED) {
+			if (errno != ENOMEM) {
+				err = got_error_from_errno("mmap");
+				goto done;
+			}
+			pack->map = NULL; /* fall back to read(2) */
+			pack->mapsize = 0;
 		}
-		pack->map = NULL; /* fall back to read(2) */
 	}
 #endif
 done:
blob - bcef0951dbb771b4b70ab05125138f52e08c7b7a
blob + 4670d0eff730b5875135247003683d94a88712e0
--- libexec/got-index-pack/got-index-pack.c
+++ libexec/got-index-pack/got-index-pack.c
@@ -265,7 +265,7 @@ read_packed_object(struct got_pack *pack, struct got_i
 	case GOT_OBJ_TYPE_REF_DELTA:
 		memset(obj->id.sha1, 0xff, SHA1_DIGEST_LENGTH);
 		if (pack->map) {
-			if (mapoff + SHA1_DIGEST_LENGTH >= pack->filesize) {
+			if (mapoff + SHA1_DIGEST_LENGTH >= pack->mapsize) {
 				err = got_error(GOT_ERR_BAD_PACKFILE);
 				break;
 			}
@@ -636,7 +636,7 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp
 	int p_resolved = 0, last_p_resolved = -1;
 
 	/* Require that pack file header and SHA1 trailer are present. */
-	if (pack->filesize < sizeof(hdr) + SHA1_DIGEST_LENGTH)
+	if (pack->filesize < (off_t)(sizeof(hdr) + SHA1_DIGEST_LENGTH))
 		return got_error_msg(GOT_ERR_BAD_PACKFILE,
 		    "short pack file");
 
@@ -798,7 +798,7 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp
 	/* Verify the SHA1 checksum stored at the end of the pack file. */
 	if (pack->map) {
 		memcpy(pack_sha1_expected, pack->map +
-		    pack->filesize - SHA1_DIGEST_LENGTH,
+		    pack->mapsize - SHA1_DIGEST_LENGTH,
 		    SHA1_DIGEST_LENGTH);
 	} else {
 		ssize_t n;
@@ -1070,7 +1070,9 @@ main(int argc, char **argv)
 		err = got_error_from_errno("lseek");
 		goto done;
 	}
-	pack.filesize = packfile_size; /* XXX off_t vs size_t */
+	pack.filesize = packfile_size;
+	if (pack.filesize <= SIZE_MAX)
+		pack.mapsize = pack.filesize;
 
 	if (lseek(pack.fd, 0, SEEK_SET) == -1) {
 		err = got_error_from_errno("lseek");
@@ -1078,10 +1080,18 @@ main(int argc, char **argv)
 	}
 
 #ifndef GOT_PACK_NO_MMAP
-	pack.map = mmap(NULL, pack.filesize, PROT_READ, MAP_PRIVATE,
-	    pack.fd, 0);
-	if (pack.map == MAP_FAILED)
-		pack.map = NULL; /* fall back to read(2) */
+	if (pack.mapsize > 0) {
+		pack.map = mmap(NULL, pack.mapsize, PROT_READ, MAP_PRIVATE,
+		    pack.fd, 0);
+		if (pack.map == MAP_FAILED) {
+			if (errno != ENOMEM) {
+				err = got_error_from_errno("mmap");
+				goto done;
+			}
+			pack.map = NULL; /* fall back to read(2) */
+			pack.mapsize = 0;
+		}
+	}
 #endif
 	err = index_pack(&pack, idxfd, tmpfiles[0], tmpfiles[1], tmpfiles[2],
 	    pack_hash, &ibuf);