From: Mark Jamsek Subject: Re: Got is kinda slow To: Christian Weisgerber , gameoftrees@openbsd.org Date: Mon, 24 Apr 2023 22:34:49 +1000 On 23-04-23 11:40PM, Stefan Sperling wrote: > On Wed, Apr 19, 2023 at 02:14:36PM +0200, Stefan Sperling wrote: > > The patch below implements fulltext caching in the delta cache. > > This is a proof of concept, not ready for commit mostly because we would > > need to carefully evaluate the various constants that control memory usage. > > For now, I've sized the cache such that all fulltexts in your test case > > will fit. > > > > This shaves off about 10% of time spent inflating data. > > But got-read-pack uses about 300MB of memory now, instead of about 150MB. > > Here is a new version of this diff with more conservative memory usage. > > Together with another diff I will send out to optimize the search for > changed paths this helps make 'got log path' about 30%-50% faster with > one single large pack file in freebsd-ports.git. Very nice! These make a marked improvement. A couple tiny style nits below if you'd like to polish now or later, either way is ok for me > ----------------------------------------------- > cache fulltext data in delta cache to improve speed with long delta chains > > diff 91db220264b6d9be6d44223dd76ae8eb9bea3641 24e3f99b1f2ea548f27d6d72325797c8b1fe3b08 > commit - 91db220264b6d9be6d44223dd76ae8eb9bea3641 > commit + 24e3f99b1f2ea548f27d6d72325797c8b1fe3b08 > blob - a88ea5659f6ff7be69d846ef2825a6e32b5963a7 > blob + aaa03170319bd65b941e76c4152b6d5720bdcd5a > --- lib/delta_cache.c > +++ lib/delta_cache.c > @@ -40,15 +40,19 @@ > #define nitems(_a) (sizeof(_a) / sizeof((_a)[0])) > #endif > > -#define GOT_DELTA_CACHE_MIN_BUCKETS 64 > -#define GOT_DELTA_CACHE_MAX_BUCKETS 2048 > -#define GOT_DELTA_CACHE_MAX_CHAIN 2 > -#define GOT_DELTA_CACHE_MAX_DELTA_SIZE 1024 > +#define GOT_DELTA_CACHE_MIN_BUCKETS 64 > +#define GOT_DELTA_CACHE_MAX_BUCKETS 2048 > +#define GOT_DELTA_CACHE_MAX_CHAIN 2 > +#define GOT_DELTA_CACHE_MAX_DELTA_SIZE 1024 > +#define GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE 524288 > > + > struct got_cached_delta { > off_t offset; > uint8_t *data; > size_t len; > + uint8_t *fulltext; > + size_t fulltext_len; > }; > > struct got_delta_cache_head { > @@ -62,10 +66,13 @@ struct got_delta_cache { > unsigned int totelem; > int cache_search; > int cache_hit; > + int cache_hit_fulltext; > int cache_miss; > int cache_evict; > int cache_toolarge; > + int cache_toolarge_fulltext; > int cache_maxtoolarge; > + int cache_maxtoolarge_fulltext; > unsigned int flags; > #define GOT_DELTA_CACHE_F_NOMEM 0x01 > SIPHASH_KEY key; > @@ -105,10 +112,14 @@ got_delta_cache_free(struct got_delta_cache *cache) > > #ifdef GOT_DELTA_CACHE_DEBUG > fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d hits, " > - "%d missed, %d evicted, %d too large (max %d)\n", getprogname(), > - cache->totelem, cache->cache_search, cache->cache_hit, > + "%d fulltext hits, %d missed, %d evicted, %d too large (max %d), " > + "%d too large fulltext (max %d)\n", > + getprogname(), cache->totelem, cache->cache_search, > + cache->cache_hit, cache->cache_hit_fulltext, > cache->cache_miss, cache->cache_evict, cache->cache_toolarge, > - cache->cache_maxtoolarge); > + cache->cache_maxtoolarge, > + cache->cache_toolarge_fulltext, > + cache->cache_maxtoolarge_fulltext); > #endif > for (i = 0; i < cache->nbuckets; i++) { > struct got_delta_cache_head *head; > @@ -222,6 +233,7 @@ got_delta_cache_add(struct got_delta_cache *cache, > if (head->nchain >= nitems(head->entries)) { > delta = &head->entries[head->nchain - 1]; > free(delta->data); > + free(delta->fulltext); > memset(delta, 0, sizeof(*delta)); > head->nchain--; > cache->totelem--; > @@ -232,6 +244,8 @@ got_delta_cache_add(struct got_delta_cache *cache, > delta->offset = delta_data_offset; > delta->data = delta_data; > delta->len = delta_len; > + delta->fulltext = NULL; > + delta->fulltext_len = 0; > head->nchain++; > cache->totelem++; > > @@ -239,8 +253,56 @@ void > #endif > } > > +const struct got_error * > +got_delta_cache_add_fulltext(struct got_delta_cache *cache, > + off_t delta_data_offset, uint8_t *fulltext, size_t fulltext_len) > +{ > +#ifdef GOT_NO_DELTA_CACHE > + return got_error(GOT_ERR_NO_SPACE); > +#else > + struct got_cached_delta *delta; > + struct got_delta_cache_head *head; > + uint64_t idx; > + int i; > + > + if (fulltext_len > GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE) { > + cache->cache_toolarge_fulltext++; > + if (fulltext_len > cache->cache_maxtoolarge) > + cache->cache_maxtoolarge_fulltext = fulltext_len; > + return got_error(GOT_ERR_NO_SPACE); > + } > + > + idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets; > + head = &cache->buckets[idx]; > + > + for (i = 0; i < head->nchain; i++) { > + delta = &head->entries[i]; > + if (delta->offset != delta_data_offset) > + continue; > + if (i > 0) { > + struct got_cached_delta tmp; please insert blank line following declaration > + memcpy(&tmp, &head->entries[0], sizeof(tmp)); > + memcpy(&head->entries[0], &head->entries[i], > + sizeof(head->entries[0])); > + memcpy(&head->entries[i], &tmp, > + sizeof(head->entries[i])); > + delta = &head->entries[0]; > + } > + delta->fulltext = malloc(fulltext_len); > + if (delta->fulltext == NULL) > + return got_error_from_errno("malloc"); > + memcpy(delta->fulltext, fulltext, fulltext_len); > + delta->fulltext_len = fulltext_len; > + break; > + } > + > + return NULL; > +#endif > +} > + > void > got_delta_cache_get(uint8_t **delta_data, size_t *delta_len, > + uint8_t **fulltext, size_t *fulltext_len, > struct got_delta_cache *cache, off_t delta_data_offset) > { > uint64_t idx; > @@ -254,6 +316,10 @@ got_delta_cache_get(uint8_t **delta_data, size_t *delt > cache->cache_search++; > *delta_data = NULL; > *delta_len = 0; > + if (fulltext) > + *fulltext = NULL; > + if (fulltext_len) > + *fulltext_len = 0; > for (i = 0; i < head->nchain; i++) { > delta = &head->entries[i]; > if (delta->offset != delta_data_offset) > @@ -270,6 +336,13 @@ got_delta_cache_get(uint8_t **delta_data, size_t *delt > } > *delta_data = delta->data; > *delta_len = delta->len; > + if (fulltext && fulltext_len && > + delta->fulltext && delta->fulltext_len) { > + *fulltext = delta->fulltext; > + *fulltext_len = delta->fulltext_len; > + cache->cache_hit_fulltext++; > + } > + > return; > } > > blob - 7da86957201fd53ae69d5deb71b64d3bf6216599 > blob + 03111a87e65fe8d7de030236276b4a0961ab8d05 > --- lib/got_lib_delta_cache.h > +++ lib/got_lib_delta_cache.h > @@ -21,4 +21,7 @@ void got_delta_cache_get(uint8_t **, size_t *, struct > > const struct got_error *got_delta_cache_add(struct got_delta_cache *, off_t, > uint8_t *, size_t); > -void got_delta_cache_get(uint8_t **, size_t *, struct got_delta_cache *, off_t); > +const struct got_error *got_delta_cache_add_fulltext(struct got_delta_cache *, > + off_t , uint8_t *, size_t); > +void got_delta_cache_get(uint8_t **, size_t *, uint8_t **, size_t *, > + struct got_delta_cache *, off_t); > blob - a9b71c809585eb6a270c9f49e602be196d76f3ad > blob + 4bef4fa0fe94d62da5c1003246d35e196a4e04dc > --- lib/pack.c > +++ lib/pack.c > @@ -1316,7 +1316,8 @@ got_pack_get_delta_chain_max_size(uint64_t *max_size, > > if (pack->delta_cache) { > got_delta_cache_get(&delta_buf, &delta_len, > - pack->delta_cache, delta->data_offset); > + NULL, NULL, pack->delta_cache, > + delta->data_offset); > } > if (delta_buf == NULL) { > cached = 0; > @@ -1370,7 +1371,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, > const struct got_error *err = NULL; > struct got_delta *delta; > uint8_t *base_buf = NULL, *accum_buf = NULL; > - size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0, delta_len; > + size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0; > /* We process small enough files entirely in memory for speed. */ > const size_t max_bufsize = GOT_DELTA_RESULT_SIZE_CACHED_MAX; > uint64_t max_size = 0; > @@ -1381,6 +1382,23 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, > if (STAILQ_EMPTY(&deltas->entries)) > return got_error(GOT_ERR_BAD_DELTA_CHAIN); > > + if (pack->delta_cache) { > + uint8_t *delta_buf = NULL, *fulltext = NULL; > + size_t delta_len, fulltext_len; > + > + delta = STAILQ_LAST(&deltas->entries, got_delta, entry); > + got_delta_cache_get(&delta_buf, &delta_len, > + &fulltext, &fulltext_len, > + pack->delta_cache, delta->data_offset); > + if (fulltext) { > + size_t w = fwrite(fulltext, 1, fulltext_len, outfile); please don't use function calls in initialisers, and as above leave a blank line after: if (fulltext) { size_t w; w = fwrite(fulltext, 1, fulltext_len, outfile); if (w != fulltext_len) return got_ferror(outfile, GOT_ERR_IO); ... > + if (w != fulltext_len) > + return got_ferror(outfile, GOT_ERR_IO); > + *result_size = fulltext_len; > + return NULL; > + } > + } > + > if (fseeko(base_file, 0L, SEEK_SET) == -1) > return got_error_from_errno("fseeko"); > if (fseeko(accum_file, 0L, SEEK_SET) == -1) > @@ -1388,7 +1406,8 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, > > /* Deltas are ordered in ascending order. */ > STAILQ_FOREACH(delta, &deltas->entries, entry) { > - uint8_t *delta_buf = NULL; > + uint8_t *delta_buf = NULL, *fulltext = NULL; > + size_t delta_len, fulltext_len; > uint64_t base_size, result_size = 0; > int cached = 1; > if (n == 0) { > @@ -1475,6 +1494,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, > > if (pack->delta_cache) { > got_delta_cache_get(&delta_buf, &delta_len, > + &fulltext, &fulltext_len, > pack->delta_cache, delta->data_offset); > } > if (delta_buf == NULL) { > @@ -1506,6 +1526,8 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, > max_size = base_size; > if (result_size > max_size) > max_size = result_size; > + if (fulltext_len > max_size) > + max_size = fulltext_len; > > if (base_buf && max_size > max_bufsize) { > /* Switch from buffers to temporary files. */ > @@ -1548,21 +1570,41 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, > } > > if (base_buf) { > - err = got_delta_apply_in_mem(base_buf, base_bufsz, > - delta_buf, delta_len, accum_buf, > - &accum_size, max_size); > + if (fulltext) { > + memcpy(accum_buf, fulltext, fulltext_len); > + accum_size = fulltext_len; > + err = NULL; > + } else { > + err = got_delta_apply_in_mem(base_buf, base_bufsz, please wrap before 80 columns: err = got_delta_apply_in_mem(base_buf, base_bufsz, delta_buf, delta_len, accum_buf, &accum_size, max_size); > + delta_buf, delta_len, accum_buf, > + &accum_size, max_size); > + } > n++; > + if (!cached) > + free(delta_buf); > + if (err) > + goto done; > + if (fulltext == NULL) { > + err = got_delta_cache_add_fulltext( > + pack->delta_cache, delta->data_offset, > + accum_buf, accum_size); > + if (err) { > + if (err->code != GOT_ERR_NO_SPACE) > + goto done; > + err = NULL; > + } > + } > } else { > err = got_delta_apply(base_file, delta_buf, > delta_len, > /* Final delta application writes to output file. */ > ++n < deltas->nentries ? accum_file : outfile, > &accum_size); > + if (!cached) > + free(delta_buf); > + if (err) > + goto done; > } > - if (!cached) > - free(delta_buf); > - if (err) > - goto done; > > if (n < deltas->nentries) { > /* Accumulated delta becomes the new base. */ > @@ -1604,7 +1646,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz > const struct got_error *err = NULL; > struct got_delta *delta; > uint8_t *base_buf = NULL, *accum_buf = NULL; > - size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0, delta_len; > + size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0; > uint64_t max_size = 0; > int n = 0; > > @@ -1614,9 +1656,28 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz > if (STAILQ_EMPTY(&deltas->entries)) > return got_error(GOT_ERR_BAD_DELTA_CHAIN); > > + if (pack->delta_cache) { > + uint8_t *delta_buf = NULL, *fulltext = NULL; > + size_t delta_len, fulltext_len; > + > + delta = STAILQ_LAST(&deltas->entries, got_delta, entry); > + got_delta_cache_get(&delta_buf, &delta_len, > + &fulltext, &fulltext_len, > + pack->delta_cache, delta->data_offset); > + if (fulltext) { > + *outbuf = malloc(fulltext_len); > + if (*outbuf == NULL) > + return got_error_from_errno("malloc"); > + memcpy(*outbuf, fulltext, fulltext_len); > + *outlen = fulltext_len; > + return NULL; > + } > + } > + > /* Deltas are ordered in ascending order. */ > STAILQ_FOREACH(delta, &deltas->entries, entry) { > - uint8_t *delta_buf = NULL; > + uint8_t *delta_buf = NULL, *fulltext = NULL; > + size_t delta_len, fulltext_len = 0; > uint64_t base_size, result_size = 0; > int cached = 1; > if (n == 0) { > @@ -1637,10 +1698,26 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz > goto done; > } > > + if (pack->delta_cache) { > + got_delta_cache_get(&delta_buf, &delta_len, > + &fulltext, &fulltext_len, > + pack->delta_cache, delta_data_offset); > + } > + > if (delta->size > max_size) > max_size = delta->size; > + if (delta->size > fulltext_len) > + max_size = fulltext_len; > > - if (pack->map) { > + if (fulltext) { > + base_buf = malloc(fulltext_len); > + if (base_buf == NULL) { > + err = got_error_from_errno("malloc"); > + goto done; > + } > + memcpy(base_buf, fulltext, fulltext_len); > + base_bufsz = fulltext_len; > + } else if (pack->map) { > size_t mapoff; > > if (delta_data_offset > SIZE_MAX) { > @@ -1667,11 +1744,30 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz > if (err) > goto done; > n++; > + > + if (pack->delta_cache && fulltext == NULL) { > + err = got_delta_cache_add(pack->delta_cache, > + delta_data_offset, NULL, 0); > + if (err) { > + if (err->code != GOT_ERR_NO_SPACE) > + goto done; > + err = NULL; > + } else { > + err = got_delta_cache_add_fulltext( > + pack->delta_cache, > + delta_data_offset, > + fulltext, fulltext_len); > + if (err && err->code != GOT_ERR_NO_SPACE) > + goto done; likewise, please wrap before 80: if (err && err->code != GOT_ERR_NO_SPACE) goto done; > + err = NULL; > + } > + } > continue; > } > > if (pack->delta_cache) { > got_delta_cache_get(&delta_buf, &delta_len, > + &fulltext, &fulltext_len, > pack->delta_cache, delta->data_offset); > } > if (delta_buf == NULL) { > @@ -1703,6 +1799,8 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz > max_size = base_size; > if (result_size > max_size) > max_size = result_size; > + if (fulltext_len > max_size) > + max_size = fulltext_len; > > if (max_size > base_bufsz) { > uint8_t *p = realloc(base_buf, max_size); > @@ -1728,15 +1826,31 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz > accum_bufsz = max_size; > } > > - err = got_delta_apply_in_mem(base_buf, base_bufsz, > - delta_buf, delta_len, accum_buf, > - &accum_size, max_size); > + if (fulltext) { > + memcpy(accum_buf, fulltext, fulltext_len); > + accum_size = fulltext_len; > + err = NULL; > + } else { > + err = got_delta_apply_in_mem(base_buf, base_bufsz, > + delta_buf, delta_len, accum_buf, > + &accum_size, max_size); > + } > if (!cached) > free(delta_buf); > n++; > if (err) > goto done; > > + if (fulltext == NULL) { > + err = got_delta_cache_add_fulltext(pack->delta_cache, > + delta->data_offset, accum_buf, accum_size); > + if (err) { > + if (err->code != GOT_ERR_NO_SPACE) > + goto done; > + err = NULL; > + } > + } > + > if (n < deltas->nentries) { > /* Accumulated delta becomes the new base. */ > uint8_t *tmp = accum_buf; > -- Mark Jamsek GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68