"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
delta cache improvement for got-index-pack
To:
gameoftrees@openbsd.org
Date:
Tue, 24 Feb 2026 15:04:28 +0100

Download raw body.

Thread
  • Stefan Sperling:

    delta cache improvement for got-index-pack

Cloning ports.git with 'got clone' is quite slow, more than 10 minutes.
I have been looking for ways to improve this. I have found a way to speed
up indexing of the pack file, specifically the "resolving deltas" step.
 
The diff below increases delta cache hit rate during got-index-pack from
60% to 90% when indexing a full pack of the ports.git repository.
In my test case, where I use a ports.git repository with history between
1995 and 2016, the time taken to index the full pack file is reduced from
90 seconds to 30 seconds.
 
As a bonus, memory usage decreases on average since we keep fewer deltas
in cache than before. Set an uppper limit on cache entries to keep memory
usage within reasonable limits. The maximum is now about 1.2GB in the
contrived case where all deltas and fulltexts are of the maximum size
allowed. In practice we hover at around 500 MB with ports.git.
The previous theoretical maximum was about 2GB.

ok?

diff /home/stsp/src/got
path + /home/stsp/src/got
commit - d07cf78033793607c02ab0e8bcb66de1f4a02a6f
blob - f4573ec7892464be6d8b1d3a6baf3decade28e4c
file + lib/delta_cache.c
--- lib/delta_cache.c
+++ lib/delta_cache.c
@@ -41,10 +41,11 @@
 #endif
 
 #define GOT_DELTA_CACHE_MIN_BUCKETS		64
-#define GOT_DELTA_CACHE_MAX_BUCKETS		2048
-#define GOT_DELTA_CACHE_MAX_CHAIN		2
-#define GOT_DELTA_CACHE_MAX_DELTA_SIZE		1024
-#define GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE	524288
+#define GOT_DELTA_CACHE_MAX_BUCKETS		1024
+#define GOT_DELTA_CACHE_MAX_ELEM		768
+#define GOT_DELTA_CACHE_MAX_CHAIN		4
+#define GOT_DELTA_CACHE_MAX_DELTA_SIZE		524288
+#define GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE	1048576
 
 
 struct got_cached_delta {
@@ -111,11 +112,13 @@ got_delta_cache_free(struct got_delta_cache *cache)
 	unsigned int i;
 
 #ifdef GOT_DELTA_CACHE_DEBUG
-	fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d hits, "
-	    "%d fulltext hits, %d missed, %d evicted, %d too large (max %d), "
-	    "%d too large fulltext (max %d)\n",
+	fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d "
+	    "hits (%d%%), %d fulltext hits (%d%%), %d missed, %d evicted, "
+	    "%d too large (max %d), %d too large fulltext (max %d)\n",
 	    getprogname(), cache->totelem, cache->cache_search,
-	    cache->cache_hit, cache->cache_hit_fulltext,
+	    cache->cache_hit, (cache->cache_hit * 100) / cache->cache_search,
+	    cache->cache_hit_fulltext,
+	    (cache->cache_hit_fulltext * 100) / cache->cache_search,
 	    cache->cache_miss, cache->cache_evict, cache->cache_toolarge,
 	    cache->cache_maxtoolarge,
 	    cache->cache_toolarge_fulltext,
@@ -230,6 +233,22 @@ got_delta_cache_add(struct got_delta_cache *cache,
 			return err;
 	}
 
+	if (cache->totelem >= GOT_DELTA_CACHE_MAX_ELEM) {
+		for (idx = 0; idx < cache->nbuckets; idx++) {
+			head = &cache->buckets[idx];
+			if (head->nchain < GOT_DELTA_CACHE_MAX_CHAIN / 2)
+				continue;
+			delta = &head->entries[head->nchain - 1];
+			free(delta->data);
+			free(delta->fulltext);
+			memset(delta, 0, sizeof(*delta));
+			head->nchain--;
+			cache->totelem--;
+			cache->cache_evict++;
+			break;
+		}
+	}
+
 	idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets;
 	head = &cache->buckets[idx];
 	if (head->nchain >= nitems(head->entries)) {