From: Stefan Sperling Subject: skip large offsets in small pack files To: gameoftrees@openbsd.org Date: Wed, 14 Jul 2021 12:22:52 +0200 Don't scan pack index offsets for large values in pack files < 2GB. This saves an iteration over the entire h->offsets array when opening a pack index which should not contain large offsets in the first place. ok? diff bb931e174c03ada6efb8fb5294d4a352bd12d901 739c7757611e77d1f83847e58167b8df51d19cb8 blob - 08e7b0ccff9dcc74255388ca93d624636feee5ab blob + 5321690c142e70461340284dce43c453e354aafe --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -169,7 +169,7 @@ struct got_packfile_obj_data { } __attribute__((__packed__)); } __attribute__((__packed__)); -const struct got_error *got_packidx_init_hdr(struct got_packidx *, int); +const struct got_error *got_packidx_init_hdr(struct got_packidx *, int, off_t); const struct got_error *got_packidx_open(struct got_packidx **, int, const char *, int); const struct got_error *got_packidx_close(struct got_packidx *); blob - e35958fa1bceef186dcfd71f9d4a132377b31766 blob + 6268cadc46c849d8772530b9e2fa38833090ea59 --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -358,6 +358,7 @@ struct got_imsg_index_pack_progress { /* Structure for GOT_IMSG_PACKIDX. */ struct got_imsg_packidx { size_t len; + off_t packfile_size; /* Additionally, a file desciptor is passed via imsg. */ }; blob - 9d93bee5d0a3def590556db70197db9b3a2b4651 blob + b29f6d25ce45802ebffcccce90ca9975efdbf260 --- lib/pack.c +++ lib/pack.c @@ -69,7 +69,7 @@ verify_fanout_table(uint32_t *fanout_table) } const struct got_error * -got_packidx_init_hdr(struct got_packidx *p, int verify) +got_packidx_init_hdr(struct got_packidx *p, int verify, off_t packfile_size) { const struct got_error *err = NULL; struct got_packidx_v2_hdr *h; @@ -258,13 +258,19 @@ got_packidx_init_hdr(struct got_packidx *p, int verify offset += nobj * sizeof(*h->offsets); /* Large file offsets are contained only in files > 2GB. */ - for (i = 0; i < nobj; i++) { - uint32_t o = h->offsets[i]; - if (o & htobe32(GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX)) - p->nlargeobj++; + if (verify || packfile_size > 0x7fffffff) { + for (i = 0; i < nobj; i++) { + uint32_t o = h->offsets[i]; + if (o & htobe32(GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX)) + p->nlargeobj++; + } } if (p->nlargeobj == 0) goto checksum; + else if (packfile_size <= 0x7fffffff) { + err = got_error(GOT_ERR_BAD_PACKIDX); + goto done; + } if (remain < p->nlargeobj * sizeof(*h->large_offsets)) { err = got_error(GOT_ERR_BAD_PACKIDX); @@ -334,7 +340,7 @@ got_packidx_open(struct got_packidx **packidx, const struct got_error *err = NULL; struct got_packidx *p = NULL; char *pack_relpath; - struct stat sb; + struct stat idx_sb, pack_sb; *packidx = NULL; @@ -347,7 +353,7 @@ got_packidx_open(struct got_packidx **packidx, * Some Git repositories have this problem. Git seems to ignore * the existence of lonely pack index files but we do not. */ - if (fstatat(dir_fd, pack_relpath, &sb, 0) == -1) { + if (fstatat(dir_fd, pack_relpath, &pack_sb, 0) == -1) { if (errno == ENOENT) { err = got_error_fmt(GOT_ERR_LONELY_PACKIDX, "%s", relpath); @@ -369,13 +375,13 @@ got_packidx_open(struct got_packidx **packidx, goto done; } - if (fstat(p->fd, &sb) != 0) { + if (fstat(p->fd, &idx_sb) != 0) { err = got_error_from_errno2("fstat", relpath); close(p->fd); free(p); goto done; } - p->len = sb.st_size; + p->len = idx_sb.st_size; if (p->len < sizeof(p->hdr)) { err = got_error(GOT_ERR_BAD_PACKIDX); close(p->fd); @@ -400,7 +406,7 @@ got_packidx_open(struct got_packidx **packidx, } #endif - err = got_packidx_init_hdr(p, verify); + err = got_packidx_init_hdr(p, verify, pack_sb.st_size); done: if (err) { if (p) blob - 9b4a7ee1895805282b36a747dfb244b5dd775ef8 blob + 274f3c3fcbe691037dcbe34aba718d4c708a4af5 --- lib/privsep.c +++ lib/privsep.c @@ -1737,6 +1737,7 @@ got_privsep_init_pack_child(struct imsgbuf *ibuf, stru int fd; ipackidx.len = packidx->len; + ipackidx.packfile_size = pack->filesize; fd = dup(packidx->fd); if (fd == -1) return got_error_from_errno("dup"); blob - 2013a7ab842fd854267c057defa8cbe437fa7bc4 blob + 8fdda11dc81d93858240483333f00089526064e5 --- libexec/got-read-pack/got-read-pack.c +++ libexec/got-read-pack/got-read-pack.c @@ -899,7 +899,7 @@ receive_packidx(struct got_packidx **packidx, struct i if (p->map == MAP_FAILED) p->map = NULL; /* fall back to read(2) */ #endif - err = got_packidx_init_hdr(p, 1); + err = got_packidx_init_hdr(p, 1, ipackidx.packfile_size); done: if (err) { if (imsg.fd != -1)