"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
skip large offsets in small pack files
To:
gameoftrees@openbsd.org
Date:
Wed, 14 Jul 2021 12:22:52 +0200

Download raw body.

Thread
Don't scan pack index offsets for large values in pack files < 2GB.

This saves an iteration over the entire h->offsets array when opening
a pack index which should not contain large offsets in the first place.

ok?

diff bb931e174c03ada6efb8fb5294d4a352bd12d901 739c7757611e77d1f83847e58167b8df51d19cb8
blob - 08e7b0ccff9dcc74255388ca93d624636feee5ab
blob + 5321690c142e70461340284dce43c453e354aafe
--- lib/got_lib_pack.h
+++ lib/got_lib_pack.h
@@ -169,7 +169,7 @@ struct got_packfile_obj_data {
 	} __attribute__((__packed__));
 } __attribute__((__packed__));
 
-const struct got_error *got_packidx_init_hdr(struct got_packidx *, int);
+const struct got_error *got_packidx_init_hdr(struct got_packidx *, int, off_t);
 const struct got_error *got_packidx_open(struct got_packidx **,
     int, const char *, int);
 const struct got_error *got_packidx_close(struct got_packidx *);
blob - e35958fa1bceef186dcfd71f9d4a132377b31766
blob + 6268cadc46c849d8772530b9e2fa38833090ea59
--- lib/got_lib_privsep.h
+++ lib/got_lib_privsep.h
@@ -358,6 +358,7 @@ struct got_imsg_index_pack_progress {
 /* Structure for GOT_IMSG_PACKIDX. */
 struct got_imsg_packidx {
 	size_t len;
+	off_t packfile_size;
 	/* Additionally, a file desciptor is passed via imsg. */
 };
 
blob - 9d93bee5d0a3def590556db70197db9b3a2b4651
blob + b29f6d25ce45802ebffcccce90ca9975efdbf260
--- lib/pack.c
+++ lib/pack.c
@@ -69,7 +69,7 @@ verify_fanout_table(uint32_t *fanout_table)
 }
 
 const struct got_error *
-got_packidx_init_hdr(struct got_packidx *p, int verify)
+got_packidx_init_hdr(struct got_packidx *p, int verify, off_t packfile_size)
 {
 	const struct got_error *err = NULL;
 	struct got_packidx_v2_hdr *h;
@@ -258,13 +258,19 @@ got_packidx_init_hdr(struct got_packidx *p, int verify
 	offset += nobj * sizeof(*h->offsets);
 
 	/* Large file offsets are contained only in files > 2GB. */
-	for (i = 0; i < nobj; i++) {
-		uint32_t o = h->offsets[i];
-		if (o & htobe32(GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX))
-			p->nlargeobj++;
+	if (verify || packfile_size > 0x7fffffff) {
+		for (i = 0; i < nobj; i++) {
+			uint32_t o = h->offsets[i];
+			if (o & htobe32(GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX))
+				p->nlargeobj++;
+		}
 	}
 	if (p->nlargeobj == 0)
 		goto checksum;
+	else if (packfile_size <= 0x7fffffff) {
+		err = got_error(GOT_ERR_BAD_PACKIDX);
+		goto done;
+	}
 
 	if (remain < p->nlargeobj * sizeof(*h->large_offsets)) {
 		err = got_error(GOT_ERR_BAD_PACKIDX);
@@ -334,7 +340,7 @@ got_packidx_open(struct got_packidx **packidx,
 	const struct got_error *err = NULL;
 	struct got_packidx *p = NULL;
 	char *pack_relpath;
-	struct stat sb;
+	struct stat idx_sb, pack_sb;
 
 	*packidx = NULL;
 
@@ -347,7 +353,7 @@ got_packidx_open(struct got_packidx **packidx,
 	 * Some Git repositories have this problem. Git seems to ignore
 	 * the existence of lonely pack index files but we do not.
 	 */
-	if (fstatat(dir_fd, pack_relpath, &sb, 0) == -1) {
+	if (fstatat(dir_fd, pack_relpath, &pack_sb, 0) == -1) {
 		if (errno == ENOENT) {
 			err = got_error_fmt(GOT_ERR_LONELY_PACKIDX,
 			    "%s", relpath);
@@ -369,13 +375,13 @@ got_packidx_open(struct got_packidx **packidx,
 		goto done;
 	}
 
-	if (fstat(p->fd, &sb) != 0) {
+	if (fstat(p->fd, &idx_sb) != 0) {
 		err = got_error_from_errno2("fstat", relpath);
 		close(p->fd);
 		free(p);
 		goto done;
 	}
-	p->len = sb.st_size;
+	p->len = idx_sb.st_size;
 	if (p->len < sizeof(p->hdr)) {
 		err = got_error(GOT_ERR_BAD_PACKIDX);
 		close(p->fd);
@@ -400,7 +406,7 @@ got_packidx_open(struct got_packidx **packidx,
 	}
 #endif
 
-	err = got_packidx_init_hdr(p, verify);
+	err = got_packidx_init_hdr(p, verify, pack_sb.st_size);
 done:
 	if (err) {
 		if (p)
blob - 9b4a7ee1895805282b36a747dfb244b5dd775ef8
blob + 274f3c3fcbe691037dcbe34aba718d4c708a4af5
--- lib/privsep.c
+++ lib/privsep.c
@@ -1737,6 +1737,7 @@ got_privsep_init_pack_child(struct imsgbuf *ibuf, stru
 	int fd;
 
 	ipackidx.len = packidx->len;
+	ipackidx.packfile_size = pack->filesize;
 	fd = dup(packidx->fd);
 	if (fd == -1)
 		return got_error_from_errno("dup");
blob - 2013a7ab842fd854267c057defa8cbe437fa7bc4
blob + 8fdda11dc81d93858240483333f00089526064e5
--- libexec/got-read-pack/got-read-pack.c
+++ libexec/got-read-pack/got-read-pack.c
@@ -899,7 +899,7 @@ receive_packidx(struct got_packidx **packidx, struct i
 	if (p->map == MAP_FAILED)
 		p->map = NULL; /* fall back to read(2) */
 #endif
-	err = got_packidx_init_hdr(p, 1);
+	err = got_packidx_init_hdr(p, 1, ipackidx.packfile_size);
 done:
 	if (err) {
 		if (imsg.fd != -1)