"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
avoid one memcpy in got-read-pack enumerate_tree()
To:
gameoftrees@openbsd.org
Date:
Tue, 24 Feb 2026 22:04:59 +0100

Download raw body.

Thread
Avoid one per tree-entry memcpy() in got-read-pack enumerate_tree().

This is called per tree entry to it's being called a lot.
Anything we can skip doing here should be a worthwile change.

I hadn't sent this easlier since I had it sitting on top of the idset
changes which have just been committed.

The idea is that we can pass pte->id directly to look it up in an idset.
The current API forces us to copy the hash into a got_object_id first.

ok?

M  lib/got_lib_object_idset.h             |   2+  0-
M  lib/object_idset.c                     |  47+  0-
M  libexec/got-read-pack/got-read-pack.c  |   4+  6-

3 files changed, 53 insertions(+), 6 deletions(-)

path + /home/stsp/src/got
commit - 2835607ea1e4f67b8a6d2e0167cf8719a08fbd06
blob - 750be4962021434fb27d2dcc95867ff2e333a776
file + lib/got_lib_object_idset.h
--- lib/got_lib_object_idset.h
+++ lib/got_lib_object_idset.h
@@ -27,6 +27,8 @@ const struct got_error *got_object_idset_remove(void *
     struct got_object_idset *, struct got_object_id *);
 int got_object_idset_contains(struct got_object_idset *,
     struct got_object_id *);
+int got_object_idset_contains_hash(struct got_object_idset *,
+    enum got_hash_algorithm, uint8_t *);
 const struct got_error *got_object_idset_for_each(struct got_object_idset *,
     const struct got_error *(*cb)(struct got_object_id *, void *, void *),
     void *);
commit - 2835607ea1e4f67b8a6d2e0167cf8719a08fbd06
blob - 0f543bbcd0fd44a53473f4c13c0155abf0d5a61f
file + lib/object_idset.c
--- lib/object_idset.c
+++ lib/object_idset.c
@@ -123,6 +123,12 @@ idset_hash(SIPHASH_KEY *key, struct got_object_id *id)
 	return SipHash24(key, id->hash, got_hash_digest_length(id->algo));
 }
 
+static uint64_t
+idset_hash_hash(SIPHASH_KEY *key, enum got_hash_algorithm algo, uint8_t *hash)
+{
+	return SipHash24(key, hash, got_hash_digest_length(algo));
+}
+
 static const struct got_error *
 idset_add(struct got_object_idset_bucket *bucket, struct got_object_id *id,
     void *data)
@@ -408,6 +414,47 @@ got_object_idset_contains(struct got_object_idset *set
 	return id_found ? 1 : 0;
 }
 
+static void
+find_element_hash(struct got_object_id **id_found, void **data_found,
+    struct got_object_idset *set, enum got_hash_algorithm algo,
+    uint8_t *hash)
+{
+	uint64_t idx = idset_hash_hash(&set->key, algo, hash) % set->nbuckets;
+	struct got_object_idset_bucket *bucket = &set->buckets[idx];
+	struct got_object_qid *qid;
+
+	*id_found = NULL;
+	*data_found = NULL;
+
+	if (bucket->id.algo == GOT_OBJECT_IDSET_SLOT_EMPTY)
+		return;
+
+	if (got_hash_cmp(algo, bucket->id.hash, hash) == 0) {
+		*id_found = &bucket->id;
+		*data_found = bucket->data;
+		return;
+	}
+
+	STAILQ_FOREACH(qid, &bucket->ids, entry) {
+		if (got_hash_cmp(algo, qid->id.hash, hash) == 0) {
+			*id_found = &qid->id;
+			*data_found = qid->data;
+			return;
+		}
+	}
+}
+int
+got_object_idset_contains_hash(struct got_object_idset *set,
+    enum got_hash_algorithm algo, uint8_t *hash)
+{
+	struct got_object_id *id_found;
+	void *data_found;
+
+	find_element_hash(&id_found, &data_found, set, algo, hash);
+	return id_found ? 1 : 0;
+}
+
+
 const struct got_error *
 got_object_idset_for_each(struct got_object_idset *set,
     const struct got_error *(*cb)(struct got_object_id *, void *, void *),
commit - 2835607ea1e4f67b8a6d2e0167cf8719a08fbd06
blob - 590c80061815dff109d5311b406c5127fb953586
file + libexec/got-read-pack/got-read-pack.c
--- libexec/got-read-pack/got-read-pack.c
+++ libexec/got-read-pack/got-read-pack.c
@@ -1286,23 +1286,21 @@ enumerate_tree(int *have_all_entries, struct imsgbuf *
 		for (i = 0; i < nentries; i++) {
 			struct got_object_qid *eqid = NULL;
 			struct got_parsed_tree_entry *pte = &entries[i];
-			struct got_object_id id;
 			char *p;
 
 			if (!S_ISDIR(pte->mode))
 				continue;
 
-			id.algo = pte->algo;
-			memcpy(id.hash, pte->id,
-			    MINIMUM(pte->digest_len, sizeof(id.hash)));
-			if (got_object_idset_contains(idset, &id))
+			if (got_object_idset_contains_hash(idset, pte->algo,
+			    pte->id))
 				continue;
 
 			err = got_object_qid_alloc_partial(&eqid);
 			if (err)
 				goto done;
 	
-			memcpy(&eqid->id, &id, sizeof(eqid->id));
+			eqid->id.algo = pte->algo;
+			memcpy(&eqid->id.hash, pte->id, pte->digest_len);
 
 			if (asprintf(&p, "%s%s%s", path,
 			    got_path_is_root_dir(path) ? "" : "/",