"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Mark Jamsek <mark@jamsek.com>
Subject:
Re: Got is kinda slow
To:
Christian Weisgerber <naddy@mips.inka.de>, gameoftrees@openbsd.org
Date:
Mon, 24 Apr 2023 22:34:49 +1000

Download raw body.

Thread
  • Christian Weisgerber:

    Got is kinda slow

  • Stefan Sperling:

    Got is kinda slow

  • On 23-04-23 11:40PM, Stefan Sperling wrote:
    > On Wed, Apr 19, 2023 at 02:14:36PM +0200, Stefan Sperling wrote:
    > > The patch below implements fulltext caching in the delta cache.
    > > This is a proof of concept, not ready for commit mostly because we would
    > > need to carefully evaluate the various constants that control memory usage.
    > > For now, I've sized the cache such that all fulltexts in your test case
    > > will fit.
    > > 
    > > This shaves off about 10% of time spent inflating data.
    > > But got-read-pack uses about 300MB of memory now, instead of about 150MB.
    > 
    > Here is a new version of this diff with more conservative memory usage.
    > 
    > Together with another diff I will send out to optimize the search for
    > changed paths this helps make 'got log path' about 30%-50% faster with
    > one single large pack file in freebsd-ports.git.
    
    Very nice! These make a marked improvement.
    
    A couple tiny style nits below if you'd like to polish now or later,
    either way is ok for me
    
    > -----------------------------------------------
    >  cache fulltext data in delta cache to improve speed with long delta chains
    >  
    > diff 91db220264b6d9be6d44223dd76ae8eb9bea3641 24e3f99b1f2ea548f27d6d72325797c8b1fe3b08
    > commit - 91db220264b6d9be6d44223dd76ae8eb9bea3641
    > commit + 24e3f99b1f2ea548f27d6d72325797c8b1fe3b08
    > blob - a88ea5659f6ff7be69d846ef2825a6e32b5963a7
    > blob + aaa03170319bd65b941e76c4152b6d5720bdcd5a
    > --- lib/delta_cache.c
    > +++ lib/delta_cache.c
    > @@ -40,15 +40,19 @@
    >  #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
    >  #endif
    >  
    > -#define GOT_DELTA_CACHE_MIN_BUCKETS	64
    > -#define GOT_DELTA_CACHE_MAX_BUCKETS	2048
    > -#define GOT_DELTA_CACHE_MAX_CHAIN	2
    > -#define GOT_DELTA_CACHE_MAX_DELTA_SIZE	1024
    > +#define GOT_DELTA_CACHE_MIN_BUCKETS		64
    > +#define GOT_DELTA_CACHE_MAX_BUCKETS		2048
    > +#define GOT_DELTA_CACHE_MAX_CHAIN		2
    > +#define GOT_DELTA_CACHE_MAX_DELTA_SIZE		1024
    > +#define GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE	524288
    >  
    > +
    >  struct got_cached_delta {
    >  	off_t offset;
    >  	uint8_t *data;
    >  	size_t len;
    > +	uint8_t *fulltext;
    > +	size_t fulltext_len;
    >  };
    >  
    >  struct got_delta_cache_head {
    > @@ -62,10 +66,13 @@ struct got_delta_cache {
    >  	unsigned int totelem;
    >  	int cache_search;
    >  	int cache_hit;
    > +	int cache_hit_fulltext;
    >  	int cache_miss;
    >  	int cache_evict;
    >  	int cache_toolarge;
    > +	int cache_toolarge_fulltext;
    >  	int cache_maxtoolarge;
    > +	int cache_maxtoolarge_fulltext;
    >  	unsigned int flags;
    >  #define GOT_DELTA_CACHE_F_NOMEM	0x01
    >  	SIPHASH_KEY key;
    > @@ -105,10 +112,14 @@ got_delta_cache_free(struct got_delta_cache *cache)
    >  
    >  #ifdef GOT_DELTA_CACHE_DEBUG
    >  	fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d hits, "
    > -	    "%d missed, %d evicted, %d too large (max %d)\n", getprogname(),
    > -	    cache->totelem, cache->cache_search, cache->cache_hit,
    > +	    "%d fulltext hits, %d missed, %d evicted, %d too large (max %d), "
    > +	    "%d too large fulltext (max %d)\n",
    > +	    getprogname(), cache->totelem, cache->cache_search,
    > +	    cache->cache_hit, cache->cache_hit_fulltext,
    >  	    cache->cache_miss, cache->cache_evict, cache->cache_toolarge,
    > -	    cache->cache_maxtoolarge);
    > +	    cache->cache_maxtoolarge,
    > +	    cache->cache_toolarge_fulltext,
    > +	    cache->cache_maxtoolarge_fulltext);
    >  #endif
    >  	for (i = 0; i < cache->nbuckets; i++) {
    >  		struct got_delta_cache_head *head;
    > @@ -222,6 +233,7 @@ got_delta_cache_add(struct got_delta_cache *cache,
    >  	if (head->nchain >= nitems(head->entries)) {
    >  		delta = &head->entries[head->nchain - 1];
    >  		free(delta->data);
    > +		free(delta->fulltext);
    >  		memset(delta, 0, sizeof(*delta));
    >  		head->nchain--;
    >  		cache->totelem--;
    > @@ -232,6 +244,8 @@ got_delta_cache_add(struct got_delta_cache *cache,
    >  	delta->offset = delta_data_offset;
    >  	delta->data = delta_data;
    >  	delta->len = delta_len;
    > +	delta->fulltext = NULL;
    > +	delta->fulltext_len = 0;
    >  	head->nchain++;
    >  	cache->totelem++;
    >  
    > @@ -239,8 +253,56 @@ void
    >  #endif
    >  }
    >  
    > +const struct got_error *
    > +got_delta_cache_add_fulltext(struct got_delta_cache *cache,
    > +    off_t delta_data_offset, uint8_t *fulltext, size_t fulltext_len)
    > +{
    > +#ifdef GOT_NO_DELTA_CACHE
    > +	return got_error(GOT_ERR_NO_SPACE);
    > +#else
    > +	struct got_cached_delta *delta;
    > +	struct got_delta_cache_head *head;
    > +	uint64_t idx;
    > +	int i;
    > +
    > +	if (fulltext_len > GOT_DELTA_CACHE_MAX_FULLTEXT_SIZE) {
    > +		cache->cache_toolarge_fulltext++;
    > +		if (fulltext_len > cache->cache_maxtoolarge)
    > +			cache->cache_maxtoolarge_fulltext = fulltext_len;
    > +		return got_error(GOT_ERR_NO_SPACE);
    > +	}
    > +
    > +	idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets;
    > +	head = &cache->buckets[idx];
    > +
    > +	for (i = 0; i < head->nchain; i++) {
    > +		delta = &head->entries[i];
    > +		if (delta->offset != delta_data_offset)
    > +			continue;
    > +		if (i > 0) {
    > +			struct got_cached_delta tmp;
    
    please insert blank line following declaration
    
    > +			memcpy(&tmp, &head->entries[0], sizeof(tmp));
    > +			memcpy(&head->entries[0], &head->entries[i],
    > +			    sizeof(head->entries[0]));
    > +			memcpy(&head->entries[i], &tmp,
    > +			    sizeof(head->entries[i]));
    > +			delta = &head->entries[0];
    > +		}
    > +		delta->fulltext = malloc(fulltext_len);
    > +		if (delta->fulltext == NULL)
    > +			return got_error_from_errno("malloc");
    > +		memcpy(delta->fulltext, fulltext, fulltext_len);
    > +		delta->fulltext_len = fulltext_len;
    > +		break;
    > +	}
    > +
    > +	return NULL;
    > +#endif
    > +}
    > +
    >  void
    >  got_delta_cache_get(uint8_t **delta_data, size_t *delta_len,
    > +    uint8_t **fulltext, size_t *fulltext_len,
    >      struct got_delta_cache *cache, off_t delta_data_offset)
    >  {
    >  	uint64_t idx;
    > @@ -254,6 +316,10 @@ got_delta_cache_get(uint8_t **delta_data, size_t *delt
    >  	cache->cache_search++;
    >  	*delta_data = NULL;
    >  	*delta_len = 0;
    > +	if (fulltext)
    > +		*fulltext = NULL;
    > +	if (fulltext_len)
    > +		*fulltext_len = 0;
    >  	for (i = 0; i < head->nchain; i++) {
    >  		delta = &head->entries[i];
    >  		if (delta->offset != delta_data_offset)
    > @@ -270,6 +336,13 @@ got_delta_cache_get(uint8_t **delta_data, size_t *delt
    >  		}
    >  		*delta_data = delta->data;
    >  		*delta_len = delta->len;
    > +		if (fulltext && fulltext_len &&
    > +		    delta->fulltext && delta->fulltext_len) {
    > +			*fulltext = delta->fulltext;
    > +			*fulltext_len = delta->fulltext_len;
    > +			cache->cache_hit_fulltext++;
    > +		}
    > +
    >  		return;
    >  	}
    >  
    > blob - 7da86957201fd53ae69d5deb71b64d3bf6216599
    > blob + 03111a87e65fe8d7de030236276b4a0961ab8d05
    > --- lib/got_lib_delta_cache.h
    > +++ lib/got_lib_delta_cache.h
    > @@ -21,4 +21,7 @@ void got_delta_cache_get(uint8_t **, size_t *, struct 
    >  
    >  const struct got_error *got_delta_cache_add(struct got_delta_cache *, off_t,
    >      uint8_t *, size_t);
    > -void got_delta_cache_get(uint8_t **, size_t *, struct got_delta_cache *, off_t);
    > +const struct got_error *got_delta_cache_add_fulltext(struct got_delta_cache *,
    > +    off_t , uint8_t *, size_t);
    > +void got_delta_cache_get(uint8_t **, size_t *, uint8_t **, size_t *,
    > +    struct got_delta_cache *, off_t);
    > blob - a9b71c809585eb6a270c9f49e602be196d76f3ad
    > blob + 4bef4fa0fe94d62da5c1003246d35e196a4e04dc
    > --- lib/pack.c
    > +++ lib/pack.c
    > @@ -1316,7 +1316,8 @@ got_pack_get_delta_chain_max_size(uint64_t *max_size,
    >  
    >  			if (pack->delta_cache) {
    >  				got_delta_cache_get(&delta_buf, &delta_len,
    > -				    pack->delta_cache, delta->data_offset);
    > +				    NULL, NULL, pack->delta_cache,
    > +				    delta->data_offset);
    >  			}
    >  			if (delta_buf == NULL) {
    >  				cached = 0;
    > @@ -1370,7 +1371,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size,
    >  	const struct got_error *err = NULL;
    >  	struct got_delta *delta;
    >  	uint8_t *base_buf = NULL, *accum_buf = NULL;
    > -	size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0, delta_len;
    > +	size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0;
    >  	/* We process small enough files entirely in memory for speed. */
    >  	const size_t max_bufsize = GOT_DELTA_RESULT_SIZE_CACHED_MAX;
    >  	uint64_t max_size = 0;
    > @@ -1381,6 +1382,23 @@ got_pack_dump_delta_chain_to_file(size_t *result_size,
    >  	if (STAILQ_EMPTY(&deltas->entries))
    >  		return got_error(GOT_ERR_BAD_DELTA_CHAIN);
    >  
    > +	if (pack->delta_cache) {
    > +		uint8_t *delta_buf = NULL, *fulltext = NULL;
    > +		size_t delta_len, fulltext_len;
    > +
    > +		delta = STAILQ_LAST(&deltas->entries, got_delta, entry);
    > +		got_delta_cache_get(&delta_buf, &delta_len,
    > +		    &fulltext, &fulltext_len,
    > +		    pack->delta_cache, delta->data_offset);
    > +		if (fulltext) {
    > +			size_t w = fwrite(fulltext, 1, fulltext_len, outfile);
    
    please don't use function calls in initialisers, and as above leave
    a blank line after:
    
    		if (fulltext) {
    			size_t w;
    
    			w = fwrite(fulltext, 1, fulltext_len, outfile);
    			if (w != fulltext_len)
    				return got_ferror(outfile, GOT_ERR_IO);
    			...
    
    > +			if (w != fulltext_len)
    > +				return got_ferror(outfile, GOT_ERR_IO);
    > +			*result_size = fulltext_len;
    > +			return NULL;
    > +		}
    > +	}
    > +
    >  	if (fseeko(base_file, 0L, SEEK_SET) == -1)
    >  		return got_error_from_errno("fseeko");
    >  	if (fseeko(accum_file, 0L, SEEK_SET) == -1)
    > @@ -1388,7 +1406,8 @@ got_pack_dump_delta_chain_to_file(size_t *result_size,
    >  
    >  	/* Deltas are ordered in ascending order. */
    >  	STAILQ_FOREACH(delta, &deltas->entries, entry) {
    > -		uint8_t *delta_buf = NULL;
    > +		uint8_t *delta_buf = NULL, *fulltext = NULL;
    > +		size_t delta_len, fulltext_len;
    >  		uint64_t base_size, result_size = 0;
    >  		int cached = 1;
    >  		if (n == 0) {
    > @@ -1475,6 +1494,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size,
    >  
    >  		if (pack->delta_cache) {
    >  			got_delta_cache_get(&delta_buf, &delta_len,
    > +			    &fulltext, &fulltext_len,
    >  			    pack->delta_cache, delta->data_offset);
    >  		}
    >  		if (delta_buf == NULL) {
    > @@ -1506,6 +1526,8 @@ got_pack_dump_delta_chain_to_file(size_t *result_size,
    >  			max_size = base_size;
    >  		if (result_size > max_size)
    >  			max_size = result_size;
    > +		if (fulltext_len > max_size)
    > +			max_size = fulltext_len;
    >  
    >  		if (base_buf && max_size > max_bufsize) {
    >  			/* Switch from buffers to temporary files. */
    > @@ -1548,21 +1570,41 @@ got_pack_dump_delta_chain_to_file(size_t *result_size,
    >  		}
    >  
    >  		if (base_buf) {
    > -			err = got_delta_apply_in_mem(base_buf, base_bufsz,
    > -			    delta_buf, delta_len, accum_buf,
    > -			    &accum_size, max_size);
    > +			if (fulltext) {
    > +				memcpy(accum_buf, fulltext, fulltext_len);
    > +				accum_size = fulltext_len;
    > +				err = NULL;
    > +			} else {
    > +				err = got_delta_apply_in_mem(base_buf, base_bufsz,
    
    please wrap before 80 columns:
    
    				err = got_delta_apply_in_mem(base_buf,
    				    base_bufsz, delta_buf, delta_len,
    				    accum_buf, &accum_size, max_size);
    
    > +				    delta_buf, delta_len, accum_buf,
    > +				    &accum_size, max_size);
    > +			}
    >  			n++;
    > +			if (!cached)
    > +				free(delta_buf);
    > +			if (err)
    > +				goto done;
    > +			if (fulltext == NULL) {
    > +				err = got_delta_cache_add_fulltext(
    > +				    pack->delta_cache, delta->data_offset,
    > +				    accum_buf, accum_size);
    > +				if (err) {
    > +					if (err->code != GOT_ERR_NO_SPACE)
    > +						goto done;
    > +					err = NULL;
    > +				}
    > +			}
    >  		} else {
    >  			err = got_delta_apply(base_file, delta_buf,
    >  			    delta_len,
    >  			    /* Final delta application writes to output file. */
    >  			    ++n < deltas->nentries ? accum_file : outfile,
    >  			    &accum_size);
    > +			if (!cached)
    > +				free(delta_buf);
    > +			if (err)
    > +				goto done;
    >  		}
    > -		if (!cached)
    > -			free(delta_buf);
    > -		if (err)
    > -			goto done;
    >  
    >  		if (n < deltas->nentries) {
    >  			/* Accumulated delta becomes the new base. */
    > @@ -1604,7 +1646,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz
    >  	const struct got_error *err = NULL;
    >  	struct got_delta *delta;
    >  	uint8_t *base_buf = NULL, *accum_buf = NULL;
    > -	size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0, delta_len;
    > +	size_t base_bufsz = 0, accum_bufsz = 0, accum_size = 0;
    >  	uint64_t max_size = 0;
    >  	int n = 0;
    >  
    > @@ -1614,9 +1656,28 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz
    >  	if (STAILQ_EMPTY(&deltas->entries))
    >  		return got_error(GOT_ERR_BAD_DELTA_CHAIN);
    >  
    > +	if (pack->delta_cache) {
    > +		uint8_t *delta_buf = NULL, *fulltext = NULL;
    > +		size_t delta_len, fulltext_len;
    > +
    > +		delta = STAILQ_LAST(&deltas->entries, got_delta, entry);
    > +		got_delta_cache_get(&delta_buf, &delta_len,
    > +		    &fulltext, &fulltext_len,
    > +		    pack->delta_cache, delta->data_offset);
    > +		if (fulltext) {
    > +			*outbuf = malloc(fulltext_len);
    > +			if (*outbuf == NULL)
    > +				return got_error_from_errno("malloc");
    > +			memcpy(*outbuf, fulltext, fulltext_len);
    > +			*outlen = fulltext_len;
    > +			return NULL;
    > +		}
    > +	}
    > +
    >  	/* Deltas are ordered in ascending order. */
    >  	STAILQ_FOREACH(delta, &deltas->entries, entry) {
    > -		uint8_t *delta_buf = NULL;
    > +		uint8_t *delta_buf = NULL, *fulltext = NULL;
    > +		size_t delta_len, fulltext_len = 0;
    >  		uint64_t base_size, result_size = 0;
    >  		int cached = 1;
    >  		if (n == 0) {
    > @@ -1637,10 +1698,26 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz
    >  				goto done;
    >  			}
    >  
    > +			if (pack->delta_cache) {
    > +				got_delta_cache_get(&delta_buf, &delta_len,
    > +				    &fulltext, &fulltext_len,
    > +				    pack->delta_cache, delta_data_offset);
    > +			}
    > +
    >  			if (delta->size > max_size)
    >  				max_size = delta->size;
    > +			if (delta->size > fulltext_len)
    > +				max_size = fulltext_len;
    >  
    > -			if (pack->map) {
    > +			if (fulltext) {
    > +				base_buf = malloc(fulltext_len);
    > +				if (base_buf == NULL) {
    > +					err = got_error_from_errno("malloc");
    > +					goto done;
    > +				}
    > +				memcpy(base_buf, fulltext, fulltext_len);
    > +				base_bufsz = fulltext_len;
    > +			} else if (pack->map) {
    >  				size_t mapoff;
    >  
    >  				if (delta_data_offset > SIZE_MAX) {
    > @@ -1667,11 +1744,30 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz
    >  			if (err)
    >  				goto done;
    >  			n++;
    > +
    > +			if (pack->delta_cache && fulltext == NULL) {
    > +				err = got_delta_cache_add(pack->delta_cache,
    > +				    delta_data_offset, NULL, 0);
    > +				if (err) {
    > +					if (err->code != GOT_ERR_NO_SPACE)
    > +						goto done;
    > +					err = NULL;
    > +				} else {
    > +					err = got_delta_cache_add_fulltext(
    > +					    pack->delta_cache,
    > +					    delta_data_offset,
    > +					    fulltext, fulltext_len);
    > +					if (err && err->code != GOT_ERR_NO_SPACE)
    > +						goto done;
    
    likewise, please wrap before 80:
    
    					if (err &&
    					    err->code != GOT_ERR_NO_SPACE)
    						goto done;
    
    > +					err = NULL;
    > +				}
    > +			}
    >  			continue;
    >  		}
    >  
    >  		if (pack->delta_cache) {
    >  			got_delta_cache_get(&delta_buf, &delta_len,
    > +			    &fulltext, &fulltext_len,
    >  			    pack->delta_cache, delta->data_offset);
    >  		}
    >  		if (delta_buf == NULL) {
    > @@ -1703,6 +1799,8 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz
    >  			max_size = base_size;
    >  		if (result_size > max_size)
    >  			max_size = result_size;
    > +		if (fulltext_len > max_size)
    > +			max_size = fulltext_len;
    >  
    >  		if (max_size > base_bufsz) {
    >  			uint8_t *p = realloc(base_buf, max_size);
    > @@ -1728,15 +1826,31 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz
    >  			accum_bufsz = max_size;
    >  		}
    >  
    > -		err = got_delta_apply_in_mem(base_buf, base_bufsz,
    > -		    delta_buf, delta_len, accum_buf,
    > -		    &accum_size, max_size);
    > +		if (fulltext) {
    > +			memcpy(accum_buf, fulltext, fulltext_len);
    > +			accum_size = fulltext_len;
    > +			err = NULL;
    > +		} else {
    > +			err = got_delta_apply_in_mem(base_buf, base_bufsz,
    > +			    delta_buf, delta_len, accum_buf,
    > +			    &accum_size, max_size);
    > +		}
    >  		if (!cached)
    >  			free(delta_buf);
    >  		n++;
    >  		if (err)
    >  			goto done;
    >  
    > +		if (fulltext == NULL) {
    > +			err = got_delta_cache_add_fulltext(pack->delta_cache,
    > +			    delta->data_offset, accum_buf, accum_size);
    > +			if (err) {
    > +				if (err->code != GOT_ERR_NO_SPACE)
    > +					goto done;
    > +				err = NULL;
    > +			}
    > +		}
    > +
    >  		if (n < deltas->nentries) {
    >  			/* Accumulated delta becomes the new base. */
    >  			uint8_t *tmp = accum_buf;
    > 
    
    -- 
    Mark Jamsek <fnc.bsdbox.org|got.bsdbox.org>
    GPG: F2FF 13DE 6A06 C471 CA80  E6E2 2930 DC66 86EE CF68
    
  • Christian Weisgerber:

    Got is kinda slow

  • Stefan Sperling:

    Got is kinda slow