From: Stefan Sperling Subject: Re: Got is kinda slow To: Christian Weisgerber , gameoftrees@openbsd.org Date: Sun, 23 Apr 2023 23:40:35 +0200 On Wed, Apr 19, 2023 at 02:14:36PM +0200, Stefan Sperling wrote: > The patch below implements fulltext caching in the delta cache. > This is a proof of concept, not ready for commit mostly because we would > need to carefully evaluate the various constants that control memory usage. > For now, I've sized the cache such that all fulltexts in your test case > will fit. > > This shaves off about 10% of time spent inflating data. > But got-read-pack uses about 300MB of memory now, instead of about 150MB. Here is a new version of this diff with more conservative memory usage. Together with another diff I will send out to optimize the search for changed paths this helps make 'got log path' about 30%-50% faster with one single large pack file in freebsd-ports.git. ----------------------------------------------- cache fulltext data in delta cache to improve speed with long delta chains diff 91db220264b6d9be6d44223dd76ae8eb9bea3641 24e3f99b1f2ea548f27d6d72325797c8b1fe3b08 commit - 91db220264b6d9be6d44223dd76ae8eb9bea3641 commit + 24e3f99b1f2ea548f27d6d72325797c8b1fe3b08 blob - a88ea5659f6ff7be69d846ef2825a6e32b5963a7 blob + aaa03170319bd65b941e76c4152b6d5720bdcd5a --- lib/delta_cache.c +++ lib/delta_cache.c @@ -40,15 +40,19 @@ #define nitems(_a) (sizeof(_a) / sizeof((_a)[0])) #endif -#define GOT_DELTA_CACHE_MIN_BUCKETS 64 -#define GOT_DELTA_CACHE_MAX_BUCKETS 2048 -#define GOT_DELTA_CACHE_MAX_CHAIN 2 -#define GOT_DELTA_CACHE_MAX_DELTA_SIZE 1024 +#define GOT_DELTA_CACHE_MIN_BUCKETS 64 +#define GOT_DELTA_CACHE_MAX_BUCKETS 2048 +#define GOT_DELTA_CACHE_MAX_CHAIN 2 +#define GOT_DELTA_CACHE_MAX_DELTA_SIZE 1024 +#define GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE 524288 + struct got_cached_delta { off_t offset; uint8_t *data; size_t len; + uint8_t *fulltext; + size_t fulltext_len; }; struct got_delta_cache_head { @@ -62,10 +66,13 @@ struct got_delta_cache { unsigned int totelem; int cache_search; int cache_hit; + int cache_hit_fulltext; int cache_miss; int cache_evict; int cache_toolarge; + int cache_toolarge_fulltext; int cache_maxtoolarge; + int cache_maxtoolarge_fulltext; unsigned int flags; #define GOT_DELTA_CACHE_F_NOMEM 0x01 SIPHASH_KEY key; @@ -105,10 +112,14 @@ got_delta_cache_free(struct got_delta_cache *cache) #ifdef GOT_DELTA_CACHE_DEBUG fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d hits, " - "%d missed, %d evicted, %d too large (max %d)\n", getprogname(), - cache->totelem, cache->cache_search, cache->cache_hit, + "%d fulltext hits, %d missed, %d evicted, %d too large (max %d), " + "%d too large fulltext (max %d)\n", + getprogname(), cache->totelem, cache->cache_search, + cache->cache_hit, cache->cache_hit_fulltext, cache->cache_miss, cache->cache_evict, cache->cache_toolarge, - cache->cache_maxtoolarge); + cache->cache_maxtoolarge, + cache->cache_toolarge_fulltext, + cache->cache_maxtoolarge_fulltext); #endif for (i = 0; i < cache->nbuckets; i++) { struct got_delta_cache_head *head; @@ -222,6 +233,7 @@ got_delta_cache_add(struct got_delta_cache *cache, if (head->nchain >= nitems(head->entries)) { delta = &head->entries[head->nchain - 1]; free(delta->data); + free(delta->fulltext); memset(delta, 0, sizeof(*delta)); head->nchain--; cache->totelem--; @@ -232,6 +244,8 @@ got_delta_cache_add(struct got_delta_cache *cache, delta->offset = delta_data_offset; delta->data = delta_data; delta->len = delta_len; + delta->fulltext = NULL; + delta->fulltext_len = 0; head->nchain++; cache->totelem++; @@ -239,8 +253,56 @@ void #endif } +const struct got_error * +got_delta_cache_add_fulltext(struct got_delta_cache *cache, + off_t delta_data_offset, uint8_t *fulltext, size_t fulltext_len) +{ +#ifdef GOT_NO_DELTA_CACHE + return got_error(GOT_ERR_NO_SPACE); +#else + struct got_cached_delta *delta; + struct got_delta_cache_head *head; + uint64_t idx; + int i; + + if (fulltext_len > GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE) { + cache->cache_toolarge_fulltext++; + if (fulltext_len > cache->cache_maxtoolarge) + cache->cache_maxtoolarge_fulltext = fulltext_len; + return got_error(GOT_ERR_NO_SPACE); + } + + idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets; + head = &cache->buckets[idx]; + + for (i = 0; i < head->nchain; i++) { + delta = &head->entries[i]; + if (delta->offset != delta_data_offset) + continue; + if (i > 0) { + struct got_cached_delta tmp; + memcpy(&tmp, &head->entries[0], sizeof(tmp)); + memcpy(&head->entries[0], &head->entries[i], + sizeof(head->entries[0])); + memcpy(&head->entries[i], &tmp, + sizeof(head->entries[i])); + delta = &head->entries[0]; + } + delta->fulltext = malloc(fulltext_len); + if (delta->fulltext == NULL) + return got_error_from_errno("malloc"); + memcpy(delta->fulltext, fulltext, fulltext_len); + delta->fulltext_len = fulltext_len; + break; + } + + return NULL; +#endif +} + void got_delta_cache_get(uint8_t **delta_data, size_t *delta_len, + uint8_t **fulltext, size_t *fulltext_len, struct got_delta_cache *cache, off_t delta_data_offset) { uint64_t idx; @@ -254,6 +316,10 @@ got_delta_cache_get(uint8_t **delta_data, size_t *delt cache->cache_search++; *delta_data = NULL; *delta_len = 0; + if (fulltext) + *fulltext = NULL; + if (fulltext_len) + *fulltext_len = 0; for (i = 0; i < head->nchain; i++) { delta = &head->entries[i]; if (delta->offset != delta_data_offset) @@ -270,6 +336,13 @@ got_delta_cache_get(uint8_t **delta_data, size_t *delt } *delta_data = delta->data; *delta_len = delta->len; + if (fulltext && fulltext_len && + delta->fulltext && delta->fulltext_len) { + *fulltext = delta->fulltext; + *fulltext_len = delta->fulltext_len; + cache->cache_hit_fulltext++; + } + return; } blob - 7da86957201fd53ae69d5deb71b64d3bf6216599 blob + 03111a87e65fe8d7de030236276b4a0961ab8d05 --- lib/got_lib_delta_cache.h +++ lib/got_lib_delta_cache.h @@ -21,4 +21,7 @@ void got_delta_cache_get(uint8_t **, size_t *, struct const struct got_error *got_delta_cache_add(struct got_delta_cache *, off_t, uint8_t *, size_t); -void got_delta_cache_get(uint8_t **, size_t *, struct got_delta_cache *, off_t); +const struct got_error *got_delta_cache_add_fulltext(struct got_delta_cache *, + off_t , uint8_t *, size_t); +void got_delta_cache_get(uint8_t **, size_t *, uint8_t **, size_t *, + struct got_delta_cache *, off_t); blob - a9b71c809585eb6a270c9f49e602be196d76f3ad blob + 4bef4fa0fe94d62da5c1003246d35e196a4e04dc --- lib/pack.c +++ lib/pack.c @@ -1316,7 +1316,8 @@ got_pack_get_delta_chain_max_size(uint64_t *max_size, if (pack->delta_cache) { got_delta_cache_get(&delta_buf, &delta_len, - pack->delta_cache, delta->data_offset); + NULL, NULL, pack->delta_cache, + delta->data_offset); } if (delta_buf == NULL) { cached = 0; @@ -1370,7 +1371,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, const struct got_error *err = NULL; struct got_delta *delta; uint8_t *base_buf = NULL, *accum_buf = NULL; - size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0, delta_len; + size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0; /* We process small enough files entirely in memory for speed. */ const size_t max_bufsize = GOT_DELTA_RESULT_SIZE_CACHED_MAX; uint64_t max_size = 0; @@ -1381,6 +1382,23 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, if (STAILQ_EMPTY(&deltas->entries)) return got_error(GOT_ERR_BAD_DELTA_CHAIN); + if (pack->delta_cache) { + uint8_t *delta_buf = NULL, *fulltext = NULL; + size_t delta_len, fulltext_len; + + delta = STAILQ_LAST(&deltas->entries, got_delta, entry); + got_delta_cache_get(&delta_buf, &delta_len, + &fulltext, &fulltext_len, + pack->delta_cache, delta->data_offset); + if (fulltext) { + size_t w = fwrite(fulltext, 1, fulltext_len, outfile); + if (w != fulltext_len) + return got_ferror(outfile, GOT_ERR_IO); + *result_size = fulltext_len; + return NULL; + } + } + if (fseeko(base_file, 0L, SEEK_SET) == -1) return got_error_from_errno("fseeko"); if (fseeko(accum_file, 0L, SEEK_SET) == -1) @@ -1388,7 +1406,8 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, /* Deltas are ordered in ascending order. */ STAILQ_FOREACH(delta, &deltas->entries, entry) { - uint8_t *delta_buf = NULL; + uint8_t *delta_buf = NULL, *fulltext = NULL; + size_t delta_len, fulltext_len; uint64_t base_size, result_size = 0; int cached = 1; if (n == 0) { @@ -1475,6 +1494,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, if (pack->delta_cache) { got_delta_cache_get(&delta_buf, &delta_len, + &fulltext, &fulltext_len, pack->delta_cache, delta->data_offset); } if (delta_buf == NULL) { @@ -1506,6 +1526,8 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, max_size = base_size; if (result_size > max_size) max_size = result_size; + if (fulltext_len > max_size) + max_size = fulltext_len; if (base_buf && max_size > max_bufsize) { /* Switch from buffers to temporary files. */ @@ -1548,21 +1570,41 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, } if (base_buf) { - err = got_delta_apply_in_mem(base_buf, base_bufsz, - delta_buf, delta_len, accum_buf, - &accum_size, max_size); + if (fulltext) { + memcpy(accum_buf, fulltext, fulltext_len); + accum_size = fulltext_len; + err = NULL; + } else { + err = got_delta_apply_in_mem(base_buf, base_bufsz, + delta_buf, delta_len, accum_buf, + &accum_size, max_size); + } n++; + if (!cached) + free(delta_buf); + if (err) + goto done; + if (fulltext == NULL) { + err = got_delta_cache_add_fulltext( + pack->delta_cache, delta->data_offset, + accum_buf, accum_size); + if (err) { + if (err->code != GOT_ERR_NO_SPACE) + goto done; + err = NULL; + } + } } else { err = got_delta_apply(base_file, delta_buf, delta_len, /* Final delta application writes to output file. */ ++n < deltas->nentries ? accum_file : outfile, &accum_size); + if (!cached) + free(delta_buf); + if (err) + goto done; } - if (!cached) - free(delta_buf); - if (err) - goto done; if (n < deltas->nentries) { /* Accumulated delta becomes the new base. */ @@ -1604,7 +1646,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz const struct got_error *err = NULL; struct got_delta *delta; uint8_t *base_buf = NULL, *accum_buf = NULL; - size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0, delta_len; + size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0; uint64_t max_size = 0; int n = 0; @@ -1614,9 +1656,28 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz if (STAILQ_EMPTY(&deltas->entries)) return got_error(GOT_ERR_BAD_DELTA_CHAIN); + if (pack->delta_cache) { + uint8_t *delta_buf = NULL, *fulltext = NULL; + size_t delta_len, fulltext_len; + + delta = STAILQ_LAST(&deltas->entries, got_delta, entry); + got_delta_cache_get(&delta_buf, &delta_len, + &fulltext, &fulltext_len, + pack->delta_cache, delta->data_offset); + if (fulltext) { + *outbuf = malloc(fulltext_len); + if (*outbuf == NULL) + return got_error_from_errno("malloc"); + memcpy(*outbuf, fulltext, fulltext_len); + *outlen = fulltext_len; + return NULL; + } + } + /* Deltas are ordered in ascending order. */ STAILQ_FOREACH(delta, &deltas->entries, entry) { - uint8_t *delta_buf = NULL; + uint8_t *delta_buf = NULL, *fulltext = NULL; + size_t delta_len, fulltext_len = 0; uint64_t base_size, result_size = 0; int cached = 1; if (n == 0) { @@ -1637,10 +1698,26 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz goto done; } + if (pack->delta_cache) { + got_delta_cache_get(&delta_buf, &delta_len, + &fulltext, &fulltext_len, + pack->delta_cache, delta_data_offset); + } + if (delta->size > max_size) max_size = delta->size; + if (delta->size > fulltext_len) + max_size = fulltext_len; - if (pack->map) { + if (fulltext) { + base_buf = malloc(fulltext_len); + if (base_buf == NULL) { + err = got_error_from_errno("malloc"); + goto done; + } + memcpy(base_buf, fulltext, fulltext_len); + base_bufsz = fulltext_len; + } else if (pack->map) { size_t mapoff; if (delta_data_offset > SIZE_MAX) { @@ -1667,11 +1744,30 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz if (err) goto done; n++; + + if (pack->delta_cache && fulltext == NULL) { + err = got_delta_cache_add(pack->delta_cache, + delta_data_offset, NULL, 0); + if (err) { + if (err->code != GOT_ERR_NO_SPACE) + goto done; + err = NULL; + } else { + err = got_delta_cache_add_fulltext( + pack->delta_cache, + delta_data_offset, + fulltext, fulltext_len); + if (err && err->code != GOT_ERR_NO_SPACE) + goto done; + err = NULL; + } + } continue; } if (pack->delta_cache) { got_delta_cache_get(&delta_buf, &delta_len, + &fulltext, &fulltext_len, pack->delta_cache, delta->data_offset); } if (delta_buf == NULL) { @@ -1703,6 +1799,8 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz max_size = base_size; if (result_size > max_size) max_size = result_size; + if (fulltext_len > max_size) + max_size = fulltext_len; if (max_size > base_bufsz) { uint8_t *p = realloc(base_buf, max_size); @@ -1728,15 +1826,31 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz accum_bufsz = max_size; } - err = got_delta_apply_in_mem(base_buf, base_bufsz, - delta_buf, delta_len, accum_buf, - &accum_size, max_size); + if (fulltext) { + memcpy(accum_buf, fulltext, fulltext_len); + accum_size = fulltext_len; + err = NULL; + } else { + err = got_delta_apply_in_mem(base_buf, base_bufsz, + delta_buf, delta_len, accum_buf, + &accum_size, max_size); + } if (!cached) free(delta_buf); n++; if (err) goto done; + if (fulltext == NULL) { + err = got_delta_cache_add_fulltext(pack->delta_cache, + delta->data_offset, accum_buf, accum_size); + if (err) { + if (err->code != GOT_ERR_NO_SPACE) + goto done; + err = NULL; + } + } + if (n < deltas->nentries) { /* Accumulated delta becomes the new base. */ uint8_t *tmp = accum_buf;