Download raw body.
delta cache improvement for got-index-pack
Cloning ports.git with 'got clone' is quite slow, more than 10 minutes.
I have been looking for ways to improve this. I have found a way to speed
up indexing of the pack file, specifically the "resolving deltas" step.
The diff below increases delta cache hit rate during got-index-pack from
60% to 90% when indexing a full pack of the ports.git repository.
In my test case, where I use a ports.git repository with history between
1995 and 2016, the time taken to index the full pack file is reduced from
90 seconds to 30 seconds.
As a bonus, memory usage decreases on average since we keep fewer deltas
in cache than before. Set an uppper limit on cache entries to keep memory
usage within reasonable limits. The maximum is now about 1.2GB in the
contrived case where all deltas and fulltexts are of the maximum size
allowed. In practice we hover at around 500 MB with ports.git.
The previous theoretical maximum was about 2GB.
ok?
diff /home/stsp/src/got
path + /home/stsp/src/got
commit - d07cf78033793607c02ab0e8bcb66de1f4a02a6f
blob - f4573ec7892464be6d8b1d3a6baf3decade28e4c
file + lib/delta_cache.c
--- lib/delta_cache.c
+++ lib/delta_cache.c
@@ -41,10 +41,11 @@
#endif
#define GOT_DELTA_CACHE_MIN_BUCKETS 64
-#define GOT_DELTA_CACHE_MAX_BUCKETS 2048
-#define GOT_DELTA_CACHE_MAX_CHAIN 2
-#define GOT_DELTA_CACHE_MAX_DELTA_SIZE 1024
-#define GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE 524288
+#define GOT_DELTA_CACHE_MAX_BUCKETS 1024
+#define GOT_DELTA_CACHE_MAX_ELEM 768
+#define GOT_DELTA_CACHE_MAX_CHAIN 4
+#define GOT_DELTA_CACHE_MAX_DELTA_SIZE 524288
+#define GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE 1048576
struct got_cached_delta {
@@ -111,11 +112,13 @@ got_delta_cache_free(struct got_delta_cache *cache)
unsigned int i;
#ifdef GOT_DELTA_CACHE_DEBUG
- fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d hits, "
- "%d fulltext hits, %d missed, %d evicted, %d too large (max %d), "
- "%d too large fulltext (max %d)\n",
+ fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d "
+ "hits (%d%%), %d fulltext hits (%d%%), %d missed, %d evicted, "
+ "%d too large (max %d), %d too large fulltext (max %d)\n",
getprogname(), cache->totelem, cache->cache_search,
- cache->cache_hit, cache->cache_hit_fulltext,
+ cache->cache_hit, (cache->cache_hit * 100) / cache->cache_search,
+ cache->cache_hit_fulltext,
+ (cache->cache_hit_fulltext * 100) / cache->cache_search,
cache->cache_miss, cache->cache_evict, cache->cache_toolarge,
cache->cache_maxtoolarge,
cache->cache_toolarge_fulltext,
@@ -230,6 +233,22 @@ got_delta_cache_add(struct got_delta_cache *cache,
return err;
}
+ if (cache->totelem >= GOT_DELTA_CACHE_MAX_ELEM) {
+ for (idx = 0; idx < cache->nbuckets; idx++) {
+ head = &cache->buckets[idx];
+ if (head->nchain < GOT_DELTA_CACHE_MAX_CHAIN / 2)
+ continue;
+ delta = &head->entries[head->nchain - 1];
+ free(delta->data);
+ free(delta->fulltext);
+ memset(delta, 0, sizeof(*delta));
+ head->nchain--;
+ cache->totelem--;
+ cache->cache_evict++;
+ break;
+ }
+ }
+
idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets;
head = &cache->buckets[idx];
if (head->nchain >= nitems(head->entries)) {
delta cache improvement for got-index-pack