From: Stefan Sperling Subject: delta cache improvement for got-index-pack To: gameoftrees@openbsd.org Date: Tue, 24 Feb 2026 15:04:28 +0100 Cloning ports.git with 'got clone' is quite slow, more than 10 minutes. I have been looking for ways to improve this. I have found a way to speed up indexing of the pack file, specifically the "resolving deltas" step. The diff below increases delta cache hit rate during got-index-pack from 60% to 90% when indexing a full pack of the ports.git repository. In my test case, where I use a ports.git repository with history between 1995 and 2016, the time taken to index the full pack file is reduced from 90 seconds to 30 seconds. As a bonus, memory usage decreases on average since we keep fewer deltas in cache than before. Set an uppper limit on cache entries to keep memory usage within reasonable limits. The maximum is now about 1.2GB in the contrived case where all deltas and fulltexts are of the maximum size allowed. In practice we hover at around 500 MB with ports.git. The previous theoretical maximum was about 2GB. ok? diff /home/stsp/src/got path + /home/stsp/src/got commit - d07cf78033793607c02ab0e8bcb66de1f4a02a6f blob - f4573ec7892464be6d8b1d3a6baf3decade28e4c file + lib/delta_cache.c --- lib/delta_cache.c +++ lib/delta_cache.c @@ -41,10 +41,11 @@ #endif #define GOT_DELTA_CACHE_MIN_BUCKETS 64 -#define GOT_DELTA_CACHE_MAX_BUCKETS 2048 -#define GOT_DELTA_CACHE_MAX_CHAIN 2 -#define GOT_DELTA_CACHE_MAX_DELTA_SIZE 1024 -#define GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE 524288 +#define GOT_DELTA_CACHE_MAX_BUCKETS 1024 +#define GOT_DELTA_CACHE_MAX_ELEM 768 +#define GOT_DELTA_CACHE_MAX_CHAIN 4 +#define GOT_DELTA_CACHE_MAX_DELTA_SIZE 524288 +#define GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE 1048576 struct got_cached_delta { @@ -111,11 +112,13 @@ got_delta_cache_free(struct got_delta_cache *cache) unsigned int i; #ifdef GOT_DELTA_CACHE_DEBUG - fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d hits, " - "%d fulltext hits, %d missed, %d evicted, %d too large (max %d), " - "%d too large fulltext (max %d)\n", + fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d " + "hits (%d%%), %d fulltext hits (%d%%), %d missed, %d evicted, " + "%d too large (max %d), %d too large fulltext (max %d)\n", getprogname(), cache->totelem, cache->cache_search, - cache->cache_hit, cache->cache_hit_fulltext, + cache->cache_hit, (cache->cache_hit * 100) / cache->cache_search, + cache->cache_hit_fulltext, + (cache->cache_hit_fulltext * 100) / cache->cache_search, cache->cache_miss, cache->cache_evict, cache->cache_toolarge, cache->cache_maxtoolarge, cache->cache_toolarge_fulltext, @@ -230,6 +233,22 @@ got_delta_cache_add(struct got_delta_cache *cache, return err; } + if (cache->totelem >= GOT_DELTA_CACHE_MAX_ELEM) { + for (idx = 0; idx < cache->nbuckets; idx++) { + head = &cache->buckets[idx]; + if (head->nchain < GOT_DELTA_CACHE_MAX_CHAIN / 2) + continue; + delta = &head->entries[head->nchain - 1]; + free(delta->data); + free(delta->fulltext); + memset(delta, 0, sizeof(*delta)); + head->nchain--; + cache->totelem--; + cache->cache_evict++; + break; + } + } + idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets; head = &cache->buckets[idx]; if (head->nchain >= nitems(head->entries)) {