"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: convert got_pack filesize to off_t
To:
Omar Polo <op@omarpolo.com>
Cc:
gameoftrees@openbsd.org
Date:
Tue, 25 Oct 2022 09:42:39 +0200

Download raw body.

Thread
On 2022/10/24 22:04:19 +0200, Omar Polo <op@omarpolo.com> wrote:
> here's the last piece: this converts the filesize field of the struct
> got_pack from size_t to off_t.  To test this, i've created a repo with
> a few big, randomly-generated files until `git repack -a -d' gave me a
> packfile bigger than 4G (on amd64), then copied the repo to an i386
> and saw got fail to handle it.  (actually i went a bit overboard and
> generated 6GB of packfile.)
> 
> 	$ uname -a
> 	OpenBSD chiaki.home.lab 7.2 GENERIC.MP#417 i386
> 	$ /usr/local/bin/got -V
> 	got 0.76
> 	$ /usr/local/bin/got log -P | grep ^commit
> 	commit 5f7afb519822a72570b428b62cc62bf44c378226 (main)
> 	commit f3021112535346c8ff1cb63ce7a0e8968daf9302
> 	commit 23e8d7613b956eb768032cca4ff04fa963ecf087
> 	commit 35c4c26bcab6652c4c72e6166855230ab1ad5a0a
> 	got-read-pack: bad offset in pack file
> 	got: bad offset in pack file
> 	$ got log -P | grep ^commit
> 	commit 5f7afb519822a72570b428b62cc62bf44c378226 (main)
> 	commit f3021112535346c8ff1cb63ce7a0e8968daf9302
> 	commit 23e8d7613b956eb768032cca4ff04fa963ecf087
> 	commit 35c4c26bcab6652c4c72e6166855230ab1ad5a0a
> 	commit f3d677f2833ff4bad7abc4d6461124f128d895f7
> 	commit 8a2fe22648044f8e20a4f43a370844c1196f96b1
> 	commit a702fb23dd465bfc9538c28fb43cbacc010ae668
> 	commit 47bb96a8ca78d72f0adf89d94057aa6c4fde0c14
> 	commit 4de1d9c2deec76f8f591c5c63a8d4b067de88d6b
> 	commit 3a8e0626f15c337852b97ecf952ade40e48b4059
> 	commit 55ccb88a64862a72bbf671099ce50e12b6c91920
> 
> This pushes the size_t truncation to inflate and the delta cache
> boundaries.  Will follow-up with fixes for those too.

Here's a rebased version after the recent changes.  One thing that i
forgot to mention in previous email is that I'm not sure whether
should i add a range check in got_pack_close when calling munmap;
there's nothing we can do reached that point if pack->filesize
overflows size_t, but maybe we could set an error nevertheless?
yesterday i was thinking it was superfluous, but laying out this mail
i'm starting to think that it's not a bad idea maybe.

diff 1c28a36116110db5de40e6edf09887651f3ca37b 168966b95eb8474962f08f48a9a86c9b7237e53e
commit - 1c28a36116110db5de40e6edf09887651f3ca37b
commit + 168966b95eb8474962f08f48a9a86c9b7237e53e
blob - 51a31d0b8608b9351cb8b63bfa93e3d5ab4f87f8
blob + 7b157b7e62f061b3d5da454b7ae3cb321862e407
--- gotd/repo_write.c
+++ gotd/repo_write.c
@@ -999,15 +999,10 @@ recv_packfile(struct repo_write_client **client, struc
 
 	log_debug("pack data received");
 
-	/* XXX size_t vs off_t, both should be off_t */
-	if (pack_filesize >= SIZE_MAX) {
-		err = got_error_msg(GOT_ERR_BAD_PACKFILE,
-		    "pack file too large");
-		goto done;
-	}
 	pack->filesize = pack_filesize;
 
-	log_debug("begin indexing pack (%zu bytes in size)", pack->filesize);
+	log_debug("begin indexing pack (%lld bytes in size)",
+	    (long long)pack->filesize);
 	err = got_pack_index(pack, (*client)->packidx_fd,
 	    tempfiles[0], tempfiles[1], tempfiles[2], (*client)->pack_sha1,
 	    pack_index_progress, NULL, &rl);
blob - 38d0cdc9b37e656d62eca1ed1def96ff6e7e2ef5
blob + 00518379da8d1ea10fa247bbd516547bf00019e1
--- lib/got_lib_pack.h
+++ lib/got_lib_pack.h
@@ -25,7 +25,7 @@ struct got_pack {
 	char *path_packfile;
 	int fd;
 	uint8_t *map;
-	size_t filesize;
+	off_t filesize;
 	struct got_pack_privsep_child *privsep_child;
 	int basefd;
 	int accumfd;
blob - 1d66ed4d59e710d6c7937f42faafc674ea675e66
blob + f84d920be3d90fdc2c9b6db77113efd0d7d87c62
--- lib/got_lib_privsep.h
+++ lib/got_lib_privsep.h
@@ -514,7 +514,7 @@ struct got_imsg_pack {
 /* Structure for GOT_IMSG_PACK. */
 struct got_imsg_pack {
 	char path_packfile[PATH_MAX];
-	size_t filesize;
+	off_t filesize;
 	/* Additionally, a file desciptor is passed via imsg. */
 } __attribute__((__packed__));
 
blob - b043d4d76bcee830e606273e62636fe996c1d756
blob + 123ebe4d37d59bba57a079e82027e85467e28fbd
--- lib/pack.c
+++ lib/pack.c
@@ -819,6 +819,7 @@ got_pack_close(struct got_pack *pack)
 	const struct got_error *err = NULL;
 
 	err = pack_stop_privsep_child(pack);
+	/* XXX: check that pack->filesize < SIZE_MAX ? */
 	if (pack->map && munmap(pack->map, pack->filesize) == -1 && !err)
 		err = got_error_from_errno("munmap");
 	if (pack->fd != -1 && close(pack->fd) == -1 && err == NULL)
@@ -856,6 +857,12 @@ got_pack_parse_object_type_and_size(uint8_t *type, uin
 		return got_error(GOT_ERR_PACK_OFFSET);
 
 	if (pack->map) {
+		if (offset > SIZE_MAX) {
+			return got_error_fmt(GOT_ERR_PACK_OFFSET,
+			    "offset %lld overflows size_t",
+			    (long long)offset);
+		}
+
 		mapoff = (size_t)offset;
 	} else {
 		if (lseek(pack->fd, offset, SEEK_SET) == -1)
@@ -935,6 +942,13 @@ parse_negative_offset(int64_t *offset, size_t *len, st
 
 		if (pack->map) {
 			size_t mapoff;
+
+			if (delta_offset + *len > SIZE_MAX) {
+				return got_error_fmt(GOT_ERR_PACK_OFFSET,
+				    "mapoff %lld would overflow size_t",
+				    delta_offset + *len);
+			}
+
 			mapoff = (size_t)delta_offset + *len;
 			if (mapoff + sizeof(offN) >= pack->filesize)
 				return got_error(GOT_ERR_PACK_OFFSET);
@@ -1063,7 +1077,7 @@ resolve_offset_delta(struct got_delta_chain *deltas,
 	}
 
 	err = add_delta(deltas, delta_offset, tslen, delta_type, delta_size,
-	    delta_data_offset);
+	    delta_data_offset);	/* XXX: off_t vs size_t! */
 	if (err)
 		return err;
 
@@ -1085,7 +1099,15 @@ got_pack_parse_ref_delta(struct got_object_id *id,
     struct got_pack *pack, off_t delta_offset, int tslen)
 {
 	if (pack->map) {
-		size_t mapoff = delta_offset + tslen;
+		size_t mapoff;
+
+		if (delta_offset + tslen > SIZE_MAX) {
+			return got_error_fmt(GOT_ERR_PACK_OFFSET,
+			    "mapoff %lld would overflow size_t",
+			    delta_offset + tslen);
+		}
+
+		mapoff = delta_offset + tslen;
 		if (mapoff + sizeof(*id) >= pack->filesize)
 			return got_error(GOT_ERR_PACK_OFFSET);
 		memcpy(id, pack->map + mapoff, sizeof(*id));
@@ -1130,7 +1152,7 @@ resolve_ref_delta(struct got_delta_chain *deltas, stru
 	}
 
 	err = add_delta(deltas, delta_offset, tslen, delta_type, delta_size,
-	    delta_data_offset);
+	    delta_data_offset);	/* XXX: off_t vs size_t */
 	if (err)
 		return err;
 
@@ -1397,7 +1419,16 @@ got_pack_dump_delta_chain_to_file(size_t *result_size,
 				max_size = delta->size;
 			if (max_size > max_bufsize) {
 				if (pack->map) {
-					mapoff = (size_t)delta_data_offset;
+					if (delta_data_offset > SIZE_MAX) {
+						return got_error_fmt(
+						    GOT_ERR_RANGE,
+						    "delta offset %lld "
+						    "overflows size_t",
+						    (long long)
+						    delta_data_offset);
+					}
+
+					mapoff = delta_data_offset;
 					err = got_inflate_to_file_mmap(
 					    &base_bufsz, NULL, NULL, pack->map,
 					    mapoff, pack->filesize - mapoff,
@@ -1414,7 +1445,16 @@ got_pack_dump_delta_chain_to_file(size_t *result_size,
 				}
 				accum_bufsz = max_size;
 				if (pack->map) {
-					mapoff = (size_t)delta_data_offset;
+					if (delta_data_offset > SIZE_MAX) {
+						return got_error_fmt(
+						    GOT_ERR_RANGE,
+						    "delta offset %lld "
+						    "overflows size_t",
+						    (long long)
+						    delta_data_offset);
+					}
+
+					mapoff = delta_data_offset;
 					err = got_inflate_to_mem_mmap(&base_buf,
 					    &base_bufsz, NULL, NULL,
 					    pack->map, mapoff,
@@ -1570,7 +1610,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz
 		uint64_t base_size, result_size = 0;
 		int cached = 1;
 		if (n == 0) {
-			size_t delta_data_offset;
+			off_t delta_data_offset;
 
 			/* Plain object types are the delta base. */
 			if (delta->type != GOT_OBJ_TYPE_COMMIT &&
@@ -1591,7 +1631,16 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz
 				max_size = delta->size;
 
 			if (pack->map) {
-				size_t mapoff = (size_t)delta_data_offset;
+				size_t mapoff;
+
+				if (delta_data_offset > SIZE_MAX) {
+					return got_error_fmt(GOT_ERR_RANGE,
+					    "delta %lld offset would "
+					    "overflow size_t",
+					    (long long)delta_data_offset);
+				}
+
+				mapoff = delta_data_offset;
 				err = got_inflate_to_mem_mmap(&base_buf,
 				    &base_bufsz, NULL, NULL, pack->map,
 				    mapoff, pack->filesize - mapoff);
@@ -1709,7 +1758,15 @@ got_packfile_extract_object(struct got_pack *pack, str
 			return got_error(GOT_ERR_PACK_OFFSET);
 
 		if (pack->map) {
-			size_t mapoff = (size_t)obj->pack_offset;
+			size_t mapoff;
+
+			if (obj->pack_offset > SIZE_MAX) {
+				return got_error_fmt(GOT_ERR_RANGE,
+				    "pack offset %lld would overflow size_t",
+				    (long long)obj->pack_offset);
+			}
+
+			mapoff = obj->pack_offset;
 			err = got_inflate_to_file_mmap(&obj->size, NULL, NULL,
 			    pack->map, mapoff, pack->filesize - mapoff,
 			    outfile);
@@ -1739,7 +1796,15 @@ got_packfile_extract_object_to_mem(uint8_t **buf, size
 		if (obj->pack_offset >= pack->filesize)
 			return got_error(GOT_ERR_PACK_OFFSET);
 		if (pack->map) {
-			size_t mapoff = (size_t)obj->pack_offset;
+			size_t mapoff;
+
+			if (obj->pack_offset > SIZE_MAX) {
+				return got_error_fmt(GOT_ERR_RANGE,
+				    "pack offset %lld would overflow size_t",
+				    (long long)obj->pack_offset);
+			}
+
+			mapoff = obj->pack_offset;
 			err = got_inflate_to_mem_mmap(buf, len, NULL, NULL,
 			    pack->map, mapoff, pack->filesize - mapoff);
 		} else {
blob - 07b401c3fd8743bb46cd007e16800e79aaa387dc
blob + 8d1f70836b3c957b385dad80e0bc73f6739b9089
--- lib/pack_index.c
+++ lib/pack_index.c
@@ -271,6 +271,12 @@ read_packed_object(struct got_pack *pack, struct got_i
 				err = got_error(GOT_ERR_BAD_PACKFILE);
 				break;
 			}
+			if (mapoff + SHA1_DIGEST_LENGTH > SIZE_MAX) {
+				err = got_error_fmt(GOT_ERR_RANGE,
+				    "mapoff %lld would overflow size_t",
+				    (long long)mapoff + SHA1_DIGEST_LENGTH);
+				break;
+			}
 			memcpy(obj->delta.ref.ref_id.sha1, pack->map + mapoff,
 			    SHA1_DIGEST_LENGTH);
 			obj->crc = crc32(obj->crc, pack->map + mapoff,
@@ -320,6 +326,14 @@ read_packed_object(struct got_pack *pack, struct got_i
 				break;
 			}
 
+			if (mapoff + obj->delta.ofs.base_offsetlen >
+			    SIZE_MAX) {
+				err = got_error_fmt(GOT_ERR_RANGE,
+				    "mapoff %lld would overflow size_t",
+				    (long long)mapoff
+				    + obj->delta.ofs.base_offsetlen);
+			}
+
 			obj->crc = crc32(obj->crc, pack->map + mapoff,
 			    obj->delta.ofs.base_offsetlen);
 			SHA1Update(pack_sha1_ctx, pack->map + mapoff,
@@ -797,6 +811,13 @@ got_pack_index(struct got_pack *pack, int idxfd, FILE 
 
 	/* Verify the SHA1 checksum stored at the end of the pack file. */
 	if (pack->map) {
+		if (pack->filesize > SIZE_MAX) {
+			err = got_error_fmt(GOT_ERR_RANGE,
+			    "filesize %lld overflows size_t",
+			    (long long)pack->filesize);
+			goto done;
+		}
+
 		memcpy(pack_sha1_expected, pack->map +
 		    pack->filesize - SHA1_DIGEST_LENGTH,
 		    SHA1_DIGEST_LENGTH);
blob - 7fda0c4e47f3539c4b13427d388cc26e749b75bb
blob + e44cf3100a855c7702d3a6f0abd5af9889166e60
--- libexec/got-index-pack/got-index-pack.c
+++ libexec/got-index-pack/got-index-pack.c
@@ -175,7 +175,7 @@ main(int argc, char **argv)
 		err = got_error_from_errno("lseek");
 		goto done;
 	}
-	pack.filesize = packfile_size; /* XXX off_t vs size_t */
+	pack.filesize = packfile_size;
 
 	if (lseek(pack.fd, 0, SEEK_SET) == -1) {
 		err = got_error_from_errno("lseek");