From: Omar Polo Subject: Re: opaquify sorted_ids To: gameoftrees@openbsd.org Date: Thu, 11 Jul 2024 15:36:42 +0200 Omar Polo wrote: > This is needed for upcoming work on sha256 pack file handling. We can't > assume that the sorted_id field in pack files is made by > SHA1_DIGEST_LENGTH objects. This is probably not the prettiest but > works in practice, is it acceptable? i've committed the got_hash_digest{,_string}_length() function because they're needed for other diffs that otherwise I can't send out. Here's an updated diff: diff /home/op/w/got commit - 020f73dba5df9e319726af7526dbb3b478d52b76 path + /home/op/w/got blob - 08acf40b3e797365f5cc41914e99e091f0f09fe7 file + lib/got_lib_pack.h --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -67,10 +67,6 @@ struct got_packidx_trailer { u_int8_t packidx_sha1[SHA1_DIGEST_LENGTH]; } __attribute__((__packed__)); -struct got_packidx_object_id { - u_int8_t sha1[SHA1_DIGEST_LENGTH]; -} __attribute__((__packed__)); - /* Ignore pack index version 1 which is no longer written by Git. */ #define GOT_PACKIDX_VERSION 2 @@ -90,8 +86,11 @@ struct got_packidx_v2_hdr { uint32_t *fanout_table; /* values are big endian */ #define GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS (0xff + 1) - /* Sorted SHA1 checksums for each object in the pack file. */ - struct got_packidx_object_id *sorted_ids; + /* + * Sorted hash digest for each object in the pack file. + * Exact size depends on the repository object format. + */ + void *sorted_ids; /* CRC32 of the packed representation of each object. */ uint32_t *crc32; blob - 84302f45b8db014cc23483d23a273c03f7fa077c file + lib/pack.c --- lib/pack.c +++ lib/pack.c @@ -183,14 +183,13 @@ got_packidx_init_hdr(struct got_packidx *p, int verify remain -= len_fanout; nobj = be32toh(h->fanout_table[0xff]); - len_ids = nobj * sizeof(*h->sorted_ids); + len_ids = nobj * got_hash_digest_length(algo); if (len_ids <= nobj || len_ids > remain) { err = got_error(GOT_ERR_BAD_PACKIDX); goto done; } if (p->map) - h->sorted_ids = - (struct got_packidx_object_id *)((uint8_t*)(p->map + offset)); + h->sorted_ids = p->map + offset; else { h->sorted_ids = malloc(len_ids); if (h->sorted_ids == NULL) { @@ -493,17 +492,19 @@ got_packidx_get_object_idx(struct got_packidx *packidx u_int8_t id0 = id->sha1[0]; uint32_t totobj = be32toh(packidx->hdr.fanout_table[0xff]); int left = 0, right = totobj - 1; + size_t idlen; if (id0 > 0) left = be32toh(packidx->hdr.fanout_table[id0 - 1]); + idlen = got_hash_digest_length(GOT_HASH_SHA1); while (left <= right) { - struct got_packidx_object_id *oid; + void *oid; int i, cmp; i = ((left + right) / 2); - oid = &packidx->hdr.sorted_ids[i]; - cmp = memcmp(id->sha1, oid->sha1, SHA1_DIGEST_LENGTH); + oid = packidx->hdr.sorted_ids + i * idlen; + cmp = memcmp(id->sha1, oid, idlen); if (cmp == 0) return i; else if (cmp > 0) @@ -653,13 +654,15 @@ got_packidx_get_object_id(struct got_object_id *id, struct got_packidx *packidx, int idx) { uint32_t totobj = be32toh(packidx->hdr.fanout_table[0xff]); - struct got_packidx_object_id *oid; + void *oid; + size_t idlen; if (idx < 0 || idx >= totobj) return got_error(GOT_ERR_NO_OBJ); - oid = &packidx->hdr.sorted_ids[idx]; - memcpy(id->sha1, oid->sha1, SHA1_DIGEST_LENGTH); + idlen = got_hash_digest_length(GOT_HASH_SHA1); + oid = packidx->hdr.sorted_ids + idx * idlen; + memcpy(id->sha1, oid, idlen); return NULL; } @@ -671,8 +674,8 @@ got_packidx_match_id_str_prefix(struct got_object_id_q u_int8_t id0; uint32_t totobj = be32toh(packidx->hdr.fanout_table[0xff]); char hex[3]; - size_t prefix_len = strlen(id_str_prefix); - struct got_packidx_object_id *oid; + size_t idlen, prefix_len = strlen(id_str_prefix); + uint8_t *oid; uint32_t i = 0; if (prefix_len < 2) @@ -686,29 +689,33 @@ got_packidx_match_id_str_prefix(struct got_object_id_q if (id0 > 0) i = be32toh(packidx->hdr.fanout_table[id0 - 1]); - oid = &packidx->hdr.sorted_ids[i]; - while (i < totobj && oid->sha1[0] == id0) { + + idlen = got_hash_digest_length(GOT_HASH_SHA1); + oid = packidx->hdr.sorted_ids + i * idlen; + while (i < totobj && oid[0] == id0) { char id_str[SHA1_DIGEST_STRING_LENGTH]; struct got_object_qid *qid; int cmp; - if (!got_sha1_digest_to_str(oid->sha1, id_str, sizeof(id_str))) + if (!got_sha1_digest_to_str(oid, id_str, sizeof(id_str))) return got_error(GOT_ERR_NO_SPACE); cmp = strncmp(id_str, id_str_prefix, prefix_len); if (cmp < 0) { - oid = &packidx->hdr.sorted_ids[++i]; + ++i; + oid = packidx->hdr.sorted_ids + i * idlen; continue; } else if (cmp > 0) break; err = got_object_qid_alloc_partial(&qid); if (err) - return err; - memcpy(qid->id.sha1, oid->sha1, SHA1_DIGEST_LENGTH); + break; + memcpy(qid->id.sha1, oid, idlen); STAILQ_INSERT_TAIL(matched_ids, qid, entry); - oid = &packidx->hdr.sorted_ids[++i]; + ++i; + oid = packidx->hdr.sorted_ids + i * idlen; } return NULL; blob - 8aa0ad08ca44ef4b71ccf60df2cf2b962c13da78 file + lib/pack_index.c --- lib/pack_index.c +++ lib/pack_index.c @@ -442,17 +442,18 @@ find_object_idx(struct got_packidx *packidx, uint8_t * uint32_t nindexed = be32toh(packidx->hdr.fanout_table[0xff]); int left = 0, right = nindexed - 1; int cmp = 0, i = 0; + void *oid; + size_t idlen; if (id0 > 0) left = be32toh(packidx->hdr.fanout_table[id0 - 1]); + idlen = got_hash_digest_length(GOT_HASH_SHA1); while (left <= right) { - struct got_packidx_object_id *oid; - i = ((left + right) / 2); - oid = &packidx->hdr.sorted_ids[i]; + oid = packidx->hdr.sorted_ids + i * idlen; - cmp = memcmp(sha1, oid->sha1, SHA1_DIGEST_LENGTH); + cmp = memcmp(sha1, oid, idlen); if (cmp == 0) return -1; /* object already indexed */ else if (cmp > 0) @@ -474,8 +475,11 @@ print_packidx(struct got_packidx *packidx) fprintf(stderr, "object IDs:\n"); for (i = 0; i < nindexed; i++) { char hex[SHA1_DIGEST_STRING_LENGTH]; - got_sha1_digest_to_str(packidx->hdr.sorted_ids[i].sha1, - hex, sizeof(hex)); + void *oid; + + oid = packidx->hdr.sorted_ids + + i * got_hash_digest_length(GOT_HASH_SHA1); + got_sha1_digest_to_str(oid, hex, sizeof(hex)); fprintf(stderr, "%s\n", hex); } fprintf(stderr, "\n"); @@ -503,10 +507,13 @@ static void add_indexed_object(struct got_packidx *packidx, uint32_t idx, struct got_indexed_object *obj) { + void *oid; int i; + size_t idlen; - memcpy(packidx->hdr.sorted_ids[idx].sha1, obj->id.sha1, - SHA1_DIGEST_LENGTH); + idlen = got_hash_digest_length(GOT_HASH_SHA1); + oid = packidx->hdr.sorted_ids + idx * idlen; + memcpy(oid, obj->id.sha1, idlen); packidx->hdr.crc32[idx] = htobe32(obj->crc); if (obj->off < GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) packidx->hdr.offsets[idx] = htobe32(obj->off); @@ -561,15 +568,17 @@ update_packidx(struct got_packidx *packidx, uint32_t n struct got_indexed_object *obj) { int idx; + size_t idlen; uint32_t nindexed = be32toh(packidx->hdr.fanout_table[0xff]); idx = find_object_idx(packidx, obj->id.sha1); if (idx == -1) return; /* object already indexed */ - memmove(&packidx->hdr.sorted_ids[idx + 1], - &packidx->hdr.sorted_ids[idx], - sizeof(struct got_packidx_object_id) * (nindexed - idx)); + idlen = got_hash_digest_length(GOT_HASH_SHA1); + memmove(packidx->hdr.sorted_ids + (idx + 1) * idlen, + packidx->hdr.sorted_ids + idx * idlen, + idlen * (nindexed - idx)); memmove(&packidx->hdr.offsets[idx + 1], &packidx->hdr.offsets[idx], sizeof(uint32_t) * (nindexed - idx)); @@ -671,7 +680,7 @@ got_pack_index(struct got_pack *pack, int idxfd, FILE goto done; } packidx.hdr.sorted_ids = calloc(nobj, - sizeof(struct got_packidx_object_id)); + got_hash_digest_length(GOT_HASH_SHA1)); if (packidx.hdr.sorted_ids == NULL) { err = got_error_from_errno("calloc"); goto done; @@ -914,7 +923,7 @@ got_pack_index(struct got_pack *pack, int idxfd, FILE if (err) goto done; err = got_pack_hwrite(idxfd, packidx.hdr.sorted_ids, - nobj * SHA1_DIGEST_LENGTH, &ctx); + nobj * got_hash_digest_length(GOT_HASH_SHA1), &ctx); if (err) goto done; err = got_pack_hwrite(idxfd, packidx.hdr.crc32, blob - 271a0e86825dde1ffd9062349f1b0ff19c086697 file + lib/repository.c --- lib/repository.c +++ lib/repository.c @@ -1231,7 +1231,7 @@ add_packidx_bloom_filter(struct got_repository *repo, { int i, nobjects = be32toh(packidx->hdr.fanout_table[0xff]); struct got_packidx_bloom_filter *bf; - size_t len; + size_t len, idlen; /* * Don't use bloom filters for very large pack index files. @@ -1266,12 +1266,14 @@ add_packidx_bloom_filter(struct got_repository *repo, } bf->path_len = len; + idlen = got_hash_digest_length(GOT_HASH_SHA1); + /* Minimum size supported by our bloom filter is 1000 entries. */ bloom_init(bf->bloom, nobjects < 1000 ? 1000 : nobjects, 0.1); for (i = 0; i < nobjects; i++) { - struct got_packidx_object_id *id; - id = &packidx->hdr.sorted_ids[i]; - bloom_add(bf->bloom, id->sha1, sizeof(id->sha1)); + void *oid; + oid = packidx->hdr.sorted_ids + i * idlen; + bloom_add(bf->bloom, oid, idlen); } RB_INSERT(got_packidx_bloom_filter_tree, blob - b0168b3397ef9e155f6ca23c6057126006b3f1f6 file + lib/repository_admin.c --- lib/repository_admin.c +++ lib/repository_admin.c @@ -532,6 +532,7 @@ got_repo_list_pack(FILE *packfile, struct got_object_i struct got_packidx *packidx = NULL; struct got_pack *pack = NULL; uint32_t nobj, i; + size_t idlen; err = got_object_id_str(&id_str, pack_hash); if (err) @@ -556,9 +557,10 @@ got_repo_list_pack(FILE *packfile, struct got_object_i if (err) goto done; + idlen = got_hash_digest_length(GOT_HASH_SHA1); nobj = be32toh(packidx->hdr.fanout_table[0xff]); for (i = 0; i < nobj; i++) { - struct got_packidx_object_id *oid; + void *oid; struct got_object_id id, base_id; off_t offset, base_offset = 0; uint8_t type; @@ -570,8 +572,9 @@ got_repo_list_pack(FILE *packfile, struct got_object_i if (err) break; } - oid = &packidx->hdr.sorted_ids[i]; - memcpy(id.sha1, oid->sha1, SHA1_DIGEST_LENGTH); + oid = packidx->hdr.sorted_ids + i * idlen; + memset(&id, 0, sizeof(id)); + memcpy(id.sha1, oid, idlen); offset = got_packidx_get_object_offset(packidx, i); if (offset == -1) { @@ -1269,9 +1272,9 @@ pack_is_redundant(int *redundant, struct got_repositor { const struct got_error *err; struct got_packidx *packidx; - struct got_packidx_object_id *pid; struct got_object_id id; - size_t i, nobjects; + void *pid; + size_t i, nobjects, idlen; *redundant = 1; @@ -1280,11 +1283,12 @@ pack_is_redundant(int *redundant, struct got_repositor return err; nobjects = be32toh(packidx->hdr.fanout_table[0xff]); + idlen = got_hash_digest_length(GOT_HASH_SHA1); for (i = 0; i < nobjects; ++i) { - pid = &packidx->hdr.sorted_ids[i]; + pid = packidx->hdr.sorted_ids + i * idlen; memset(&id, 0, sizeof(id)); - memcpy(&id.sha1, pid->sha1, sizeof(id.sha1)); + memcpy(&id.sha1, pid, idlen); if (got_object_idset_contains(idset, &id)) continue;