From: Stefan Sperling Subject: avoid one memcpy in got-read-pack enumerate_tree() To: gameoftrees@openbsd.org Date: Tue, 24 Feb 2026 22:04:59 +0100 Avoid one per tree-entry memcpy() in got-read-pack enumerate_tree(). This is called per tree entry to it's being called a lot. Anything we can skip doing here should be a worthwile change. I hadn't sent this easlier since I had it sitting on top of the idset changes which have just been committed. The idea is that we can pass pte->id directly to look it up in an idset. The current API forces us to copy the hash into a got_object_id first. ok? M lib/got_lib_object_idset.h | 2+ 0- M lib/object_idset.c | 47+ 0- M libexec/got-read-pack/got-read-pack.c | 4+ 6- 3 files changed, 53 insertions(+), 6 deletions(-) path + /home/stsp/src/got commit - 2835607ea1e4f67b8a6d2e0167cf8719a08fbd06 blob - 750be4962021434fb27d2dcc95867ff2e333a776 file + lib/got_lib_object_idset.h --- lib/got_lib_object_idset.h +++ lib/got_lib_object_idset.h @@ -27,6 +27,8 @@ const struct got_error *got_object_idset_remove(void * struct got_object_idset *, struct got_object_id *); int got_object_idset_contains(struct got_object_idset *, struct got_object_id *); +int got_object_idset_contains_hash(struct got_object_idset *, + enum got_hash_algorithm, uint8_t *); const struct got_error *got_object_idset_for_each(struct got_object_idset *, const struct got_error *(*cb)(struct got_object_id *, void *, void *), void *); commit - 2835607ea1e4f67b8a6d2e0167cf8719a08fbd06 blob - 0f543bbcd0fd44a53473f4c13c0155abf0d5a61f file + lib/object_idset.c --- lib/object_idset.c +++ lib/object_idset.c @@ -123,6 +123,12 @@ idset_hash(SIPHASH_KEY *key, struct got_object_id *id) return SipHash24(key, id->hash, got_hash_digest_length(id->algo)); } +static uint64_t +idset_hash_hash(SIPHASH_KEY *key, enum got_hash_algorithm algo, uint8_t *hash) +{ + return SipHash24(key, hash, got_hash_digest_length(algo)); +} + static const struct got_error * idset_add(struct got_object_idset_bucket *bucket, struct got_object_id *id, void *data) @@ -408,6 +414,47 @@ got_object_idset_contains(struct got_object_idset *set return id_found ? 1 : 0; } +static void +find_element_hash(struct got_object_id **id_found, void **data_found, + struct got_object_idset *set, enum got_hash_algorithm algo, + uint8_t *hash) +{ + uint64_t idx = idset_hash_hash(&set->key, algo, hash) % set->nbuckets; + struct got_object_idset_bucket *bucket = &set->buckets[idx]; + struct got_object_qid *qid; + + *id_found = NULL; + *data_found = NULL; + + if (bucket->id.algo == GOT_OBJECT_IDSET_SLOT_EMPTY) + return; + + if (got_hash_cmp(algo, bucket->id.hash, hash) == 0) { + *id_found = &bucket->id; + *data_found = bucket->data; + return; + } + + STAILQ_FOREACH(qid, &bucket->ids, entry) { + if (got_hash_cmp(algo, qid->id.hash, hash) == 0) { + *id_found = &qid->id; + *data_found = qid->data; + return; + } + } +} +int +got_object_idset_contains_hash(struct got_object_idset *set, + enum got_hash_algorithm algo, uint8_t *hash) +{ + struct got_object_id *id_found; + void *data_found; + + find_element_hash(&id_found, &data_found, set, algo, hash); + return id_found ? 1 : 0; +} + + const struct got_error * got_object_idset_for_each(struct got_object_idset *set, const struct got_error *(*cb)(struct got_object_id *, void *, void *), commit - 2835607ea1e4f67b8a6d2e0167cf8719a08fbd06 blob - 590c80061815dff109d5311b406c5127fb953586 file + libexec/got-read-pack/got-read-pack.c --- libexec/got-read-pack/got-read-pack.c +++ libexec/got-read-pack/got-read-pack.c @@ -1286,23 +1286,21 @@ enumerate_tree(int *have_all_entries, struct imsgbuf * for (i = 0; i < nentries; i++) { struct got_object_qid *eqid = NULL; struct got_parsed_tree_entry *pte = &entries[i]; - struct got_object_id id; char *p; if (!S_ISDIR(pte->mode)) continue; - id.algo = pte->algo; - memcpy(id.hash, pte->id, - MINIMUM(pte->digest_len, sizeof(id.hash))); - if (got_object_idset_contains(idset, &id)) + if (got_object_idset_contains_hash(idset, pte->algo, + pte->id)) continue; err = got_object_qid_alloc_partial(&eqid); if (err) goto done; - memcpy(&eqid->id, &id, sizeof(eqid->id)); + eqid->id.algo = pte->algo; + memcpy(&eqid->id.hash, pte->id, pte->digest_len); if (asprintf(&p, "%s%s%s", path, got_path_is_root_dir(path) ? "" : "/",