Download raw body.
add LRU cache for delta data to got-read-pack
On Sun, Nov 10, 2019 at 03:37:54PM +0200, Stefan Sperling wrote:
> got-read-pack spends a lot of time decompressing the same delta data
> over and over. This diff adds an LRU cache to make it hit the disk
> and zlib less often. The effect on performance is quite pronounced.
>
> Before:
> $ time got blame sys/kern/kern_tc.c > /dev/null
> 0m55.65s real 0m34.61s user 0m24.69s system
> $ time got blame sys/kern/kern_tc.c > /dev/null
> 0m53.54s real 0m33.43s user 0m23.62s system
> $ time got blame sys/kern/kern_tc.c > /dev/null
> 0m52.93s real 0m32.59s user 0m23.87s system
>
> After:
>
> $ time got blame sys/kern/kern_tc.c > /dev/null
> 0m31.92s real 0m29.60s user 0m06.87s system
> $ time got blame sys/kern/kern_tc.c > /dev/null
> 0m31.53s real 0m29.13s user 0m06.86s system
> $ time got blame sys/kern/kern_tc.c > /dev/null
> 0m31.49s real 0m29.57s user 0m06.56s system
>
> 30 seconds still isn't great but is better than 50.
>
> ok?
>
> diff refs/heads/master refs/heads/deltacache
> blob - 4310fc7e0269fccc28e40d464196abeb11e6d25f
> blob + 709948a09b358cf01d39d50454f4f9c08fad9c28
> --- got/Makefile
> +++ got/Makefile
> @@ -8,7 +8,7 @@ SRCS= got.c blame.c commit_graph.c delta.c diff.c \
> object_idset.c object_parse.c opentemp.c path.c pack.c \
> privsep.c reference.c repository.c sha1.c worktree.c \
> inflate.c buf.c rcsutil.c diff3.c lockfile.c \
> - deflate.c object_create.c
> + deflate.c object_create.c delta_cache.c
> MAN = ${PROG}.1 got-worktree.5 git-repository.5
>
> CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib
> blob - 42f628d13edc1ddbaa6844b6156d41a9438d9e4b
> blob + 9abb0d4dc56c7c09ebf32461a9eb717f79cbca05
> --- lib/delta.c
> +++ lib/delta.c
> @@ -40,7 +40,7 @@
>
> struct got_delta *
> got_delta_open(off_t offset, size_t tslen, int type, size_t size,
> - off_t data_offset, uint8_t *delta_buf, size_t delta_len)
> + off_t data_offset)
> {
> struct got_delta *delta;
>
> @@ -53,8 +53,6 @@ got_delta_open(off_t offset, size_t tslen, int type, s
> delta->tslen = tslen;
> delta->size = size;
> delta->data_offset = data_offset;
> - delta->delta_buf = delta_buf;
> - delta->delta_len = delta_len;
> return delta;
> }
>
> blob - /dev/null
> blob + b9d4f89656dd56b278729ad6c522d9c453d476da (mode 644)
> --- /dev/null
> +++ lib/delta_cache.c
> @@ -0,0 +1,144 @@
> +/*
> + * Copyright (c) 2019 Stefan Sperling <stsp@openbsd.org>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/queue.h>
> +
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sha1.h>
> +#include <stdio.h>
> +#include <zlib.h>
> +#include <limits.h>
> +#include <time.h>
> +
> +#include "got_object.h"
> +#include "got_error.h"
> +
> +#include "got_lib_delta.h"
> +#include "got_lib_inflate.h"
> +#include "got_lib_object.h"
> +#include "got_lib_delta_cache.h"
> +
> +#ifndef nitems
> +#define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
> +#endif
> +
> +struct got_delta_cache_element {
> + TAILQ_ENTRY(got_delta_cache_element) entry;
> + off_t delta_data_offset;
> + uint8_t *delta_data;
> + size_t delta_len;
> +};
> +
> +TAILQ_HEAD(got_delta_cache_head, got_delta_cache_element);
> +
> +struct got_delta_cache {
> + struct got_delta_cache_head entries;
> + int nelem;
> + int maxelem;
> + size_t maxelemsize;
> +};
> +
> +struct got_delta_cache *
> +got_delta_cache_alloc(int maxelem, size_t maxelemsize)
> +{
> + struct got_delta_cache *cache;
> +
> + cache = calloc(1, sizeof(*cache));
> + if (cache == NULL)
> + return NULL;
> +
> + TAILQ_INIT(&cache->entries);
> + cache->maxelem = maxelem;
> + cache->maxelemsize = maxelemsize;
> + return cache;
> +}
> +
> +void
> +got_delta_cache_free(struct got_delta_cache *cache)
> +{
> + struct got_delta_cache_element *entry;
> +
> + while (!TAILQ_EMPTY(&cache->entries)) {
> + entry = TAILQ_FIRST(&cache->entries);
> + TAILQ_REMOVE(&cache->entries, entry, entry);
> + free(entry->delta_data);
> + free(entry);
> + }
> + free(cache);
> +}
> +
> +static void
> +remove_least_used_element(struct got_delta_cache *cache)
> +{
> + struct got_delta_cache_element *entry;
> +
> + if (cache->nelem == 0)
> + return;
> +
> + entry = TAILQ_LAST(&cache->entries, got_delta_cache_head);
> + TAILQ_REMOVE(&cache->entries, entry, entry);
> + free(entry->delta_data);
> + free(entry);
> + cache->nelem--;
> +}
> +
> +
> +const struct got_error *
> +got_delta_cache_add(struct got_delta_cache *cache,
> + off_t delta_data_offset, uint8_t *delta_data, size_t delta_len)
> +{
> + struct got_delta_cache_element *entry;
> +
> + if (delta_len > cache->maxelemsize)
> + return got_error(GOT_ERR_NO_SPACE);
> +
> + if (cache->nelem >= cache->maxelem)
> + remove_least_used_element(cache);
> +
> + entry = calloc(1, sizeof(*entry));
> + if (entry == NULL)
> + return got_error_from_errno("calloc");
> +
> + entry->delta_data_offset = delta_data_offset;
> + entry->delta_data = delta_data;
> + entry->delta_len = delta_len;
> +
> + TAILQ_INSERT_HEAD(&cache->entries, entry, entry);
> + cache->nelem++;
> + return NULL;
> +}
> +
> +void
> +got_delta_cache_get(uint8_t **delta_data, size_t *delta_len,
> + struct got_delta_cache *cache, off_t delta_data_offset)
> +{
> + struct got_delta_cache_element *entry;
> +
> + *delta_data = NULL;
> + *delta_len = 0;
> + TAILQ_FOREACH(entry, &cache->entries, entry) {
> + if (entry->delta_data_offset != delta_data_offset)
> + continue;
> + if (entry != TAILQ_FIRST(&cache->entries)) {
> + TAILQ_REMOVE(&cache->entries, entry, entry);
> + TAILQ_INSERT_HEAD(&cache->entries, entry, entry);
> + }
> + *delta_data = entry->delta_data;
> + *delta_len = entry->delta_len;
> + return;
> + }
> +}
> blob - 8b79413858bc4609d31a7e5b7910f8419f709365
> blob + b04e243f7e28e091a99469856203d981d23691d5
> --- lib/got_lib_delta.h
> +++ lib/got_lib_delta.h
> @@ -21,8 +21,6 @@ struct got_delta {
> int type;
> size_t size;
> off_t data_offset;
> - uint8_t *delta_buf;
> - size_t delta_len;
> };
>
> struct got_delta_chain {
> @@ -32,8 +30,7 @@ struct got_delta_chain {
>
> #define GOT_DELTA_CHAIN_RECURSION_MAX 500
>
> -struct got_delta *got_delta_open(off_t, size_t, int, size_t, off_t,
> - uint8_t *, size_t);
> +struct got_delta *got_delta_open(off_t, size_t, int, size_t, off_t);
> const struct got_error *got_delta_chain_get_base_type(int *,
> struct got_delta_chain *);
> const struct got_error *got_delta_get_sizes(uint64_t *, uint64_t *,
> blob - /dev/null
> blob + 39cb23dc018390d1de373fde1dc990faa76c21e2 (mode 644)
> --- /dev/null
> +++ lib/got_lib_delta_cache.h
> @@ -0,0 +1,24 @@
> +/*
> + * Copyright (c) 2019 Stefan Sperling <stsp@openbsd.org>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +struct got_delta_cache;
> +
> +struct got_delta_cache *got_delta_cache_alloc(int, size_t);
> +void got_delta_cache_free(struct got_delta_cache *);
> +
> +const struct got_error *got_delta_cache_add(struct got_delta_cache *, off_t,
> + uint8_t *, size_t);
> +void got_delta_cache_get(uint8_t **, size_t *, struct got_delta_cache *, off_t);
> blob - 654dfa86d4de464fcdc3b68b5b8469f50f476a4c
> blob + f1f994a6a41a9c16744f389369f3692b13da72e4
> --- lib/got_lib_pack.h
> +++ lib/got_lib_pack.h
> @@ -21,6 +21,7 @@ struct got_pack {
> uint8_t *map;
> size_t filesize;
> struct got_privsep_child *privsep_child;
> + struct got_delta_cache *delta_cache;
> };
>
> const struct got_error *got_pack_stop_privsep_child(struct got_pack *);
> @@ -168,7 +169,7 @@ const struct got_error *got_packidx_match_id_str_prefi
> const struct got_error *got_packfile_open_object(struct got_object **,
> struct got_pack *, struct got_packidx *, int, struct got_object_id *);
> const struct got_error *got_pack_get_max_delta_object_size(uint64_t *,
> - struct got_object *);
> + struct got_object *, struct got_pack *);
> const struct got_error *got_packfile_extract_object(struct got_pack *,
> struct got_object *, FILE *, FILE *, FILE *);
> const struct got_error *got_packfile_extract_object_to_mem(uint8_t **, size_t *,
> blob - 6d876216ae17f82d4f9dee9e675e1577abd93a08
> blob + c67ff751c1fbd694ef012e6ec8f54283f24acdf3
> --- lib/object_cache.c
> +++ lib/object_cache.c
> @@ -84,9 +84,9 @@ get_size_obj(struct got_object *obj)
> return size;
>
> SIMPLEQ_FOREACH(delta, &obj->deltas.entries, entry) {
> - if (SIZE_MAX - (sizeof(*delta) + delta->delta_len) < size)
> + if (SIZE_MAX - sizeof(*delta) < size)
> return SIZE_MAX;
> - size += sizeof(*delta) + delta->delta_len;
> + size += sizeof(*delta);
> }
>
> return size;
> blob - a037ceb7fcaca7f9358cecf93fb644ff7f35ccc9
> blob + 9a37c6f1d7c6414382484dbde632c0fb6bb7591e
> --- lib/object_parse.c
> +++ lib/object_parse.c
> @@ -113,7 +113,6 @@ got_object_close(struct got_object *obj)
> while (!SIMPLEQ_EMPTY(&obj->deltas.entries)) {
> delta = SIMPLEQ_FIRST(&obj->deltas.entries);
> SIMPLEQ_REMOVE_HEAD(&obj->deltas.entries, entry);
> - free(delta->delta_buf);
> free(delta);
> }
> }
> blob - fb5f114d8a5ce54615d80a1f1a73b8ab0dc65a9a
> blob + eef27b82b6eeb8a714c5fedd79b33b04f6e8e5f5
> --- lib/pack.c
> +++ lib/pack.c
> @@ -39,6 +39,7 @@
>
> #include "got_lib_sha1.h"
> #include "got_lib_delta.h"
> +#include "got_lib_delta_cache.h"
> #include "got_lib_inflate.h"
> #include "got_lib_object.h"
> #include "got_lib_object_parse.h"
> @@ -546,6 +547,10 @@ got_pack_close(struct got_pack *pack)
> free(pack->path_packfile);
> pack->path_packfile = NULL;
> pack->filesize = 0;
> + if (pack->delta_cache) {
> + got_delta_cache_free(pack->delta_cache);
> + pack->delta_cache = NULL;
> + }
>
> return err;
> }
> @@ -707,15 +712,32 @@ resolve_delta_chain(struct got_delta_chain *, struct g
> struct got_pack *, off_t, size_t, int, size_t, unsigned int);
>
> static const struct got_error *
> +read_delta_data(uint8_t **delta_buf, size_t *delta_len,
> + size_t delta_data_offset, struct got_pack *pack)
> +{
> + const struct got_error *err = NULL;
> +
> + if (pack->map) {
> + if (delta_data_offset >= pack->filesize)
> + return got_error(GOT_ERR_PACK_OFFSET);
> + err = got_inflate_to_mem_mmap(delta_buf, delta_len, pack->map,
> + delta_data_offset, pack->filesize - delta_data_offset);
> + } else {
> + if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1)
> + return got_error_from_errno("lseek");
> + err = got_inflate_to_mem_fd(delta_buf, delta_len, pack->fd);
> + }
> + return err;
> +}
> +
> +static const struct got_error *
> add_delta(struct got_delta_chain *deltas, off_t delta_offset, size_t tslen,
> - int delta_type, size_t delta_size, size_t delta_data_offset,
> - uint8_t *delta_buf, size_t delta_len)
> + int delta_type, size_t delta_size, size_t delta_data_offset)
> {
> struct got_delta *delta;
>
> delta = got_delta_open(delta_offset, tslen, delta_type, delta_size,
> - delta_data_offset, delta_buf,
> - delta_len);
> + delta_data_offset);
> if (delta == NULL)
> return got_error_from_errno("got_delta_open");
> /* delta is freed in got_object_close() */
> @@ -736,8 +758,7 @@ resolve_offset_delta(struct got_delta_chain *deltas,
> uint64_t base_size;
> size_t base_tslen;
> off_t delta_data_offset;
> - uint8_t *delta_buf;
> - size_t delta_len, consumed;
> + size_t consumed;
>
> err = parse_offset_delta(&base_offset, &consumed, pack,
> delta_offset, tslen);
> @@ -754,21 +775,8 @@ resolve_offset_delta(struct got_delta_chain *deltas,
> return got_error_from_errno("lseek");
> }
>
> - if (pack->map) {
> - size_t mapoff = (size_t)delta_data_offset;
> - err = got_inflate_to_mem_mmap(&delta_buf, &delta_len, pack->map,
> - mapoff, pack->filesize - mapoff);
> - if (err)
> - return err;
> - } else {
> -
> - err = got_inflate_to_mem_fd(&delta_buf, &delta_len, pack->fd);
> - if (err)
> - return err;
> - }
> -
> err = add_delta(deltas, delta_offset, tslen, delta_type, delta_size,
> - delta_data_offset, delta_buf, delta_len);
> + delta_data_offset);
> if (err)
> return err;
>
> @@ -801,28 +809,27 @@ resolve_ref_delta(struct got_delta_chain *deltas, stru
> uint8_t *delta_buf;
> size_t delta_len;
>
> - if (delta_offset >= pack->filesize)
> + if (delta_offset + tslen >= pack->filesize)
> return got_error(GOT_ERR_PACK_OFFSET);
> - delta_data_offset = delta_offset + tslen;
> - if (delta_data_offset >= pack->filesize)
> - return got_error(GOT_ERR_PACK_OFFSET);
>
> if (pack->map == NULL) {
> - delta_data_offset = lseek(pack->fd, 0, SEEK_CUR);
> - if (delta_data_offset == -1)
> - return got_error_from_errno("lseek");
> }
>
> if (pack->map) {
> - size_t mapoff = (size_t)delta_data_offset;
> + size_t mapoff = delta_offset + tslen;
> memcpy(&id, pack->map + mapoff, sizeof(id));
> mapoff += sizeof(id);
> + delta_data_offset = (off_t)mapoff;
> err = got_inflate_to_mem_mmap(&delta_buf, &delta_len, pack->map,
> mapoff, pack->filesize - mapoff);
> if (err)
> return err;
> } else {
> - ssize_t n = read(pack->fd, &id, sizeof(id));
> + ssize_t n;
> + delta_data_offset = lseek(pack->fd, 0, SEEK_CUR);
> + if (delta_data_offset == -1)
> + return got_error_from_errno("lseek");
> + n = read(pack->fd, &id, sizeof(id));
> if (n < 0)
> return got_error_from_errno("read");
> if (n != sizeof(id))
> @@ -833,7 +840,7 @@ resolve_ref_delta(struct got_delta_chain *deltas, stru
> }
>
> err = add_delta(deltas, delta_offset, tslen, delta_type, delta_size,
> - delta_data_offset, delta_buf, delta_len);
> + delta_data_offset);
> if (err)
> return err;
>
> @@ -875,7 +882,7 @@ resolve_delta_chain(struct got_delta_chain *deltas, st
> case GOT_OBJ_TYPE_TAG:
> /* Plain types are the final delta base. Recursion ends. */
> err = add_delta(deltas, delta_offset, tslen, delta_type,
> - delta_size, 0, NULL, 0);
> + delta_size, 0);
> break;
> case GOT_OBJ_TYPE_OFFSET_DELTA:
> err = resolve_offset_delta(deltas, packidx, pack,
> @@ -980,7 +987,8 @@ got_packfile_open_object(struct got_object **obj, stru
> }
>
> static const struct got_error *
> -get_delta_chain_max_size(uint64_t *max_size, struct got_delta_chain *deltas)
> +get_delta_chain_max_size(uint64_t *max_size, struct got_delta_chain *deltas,
> + struct got_pack *pack)
> {
> struct got_delta *delta;
> uint64_t base_size = 0, result_size = 0;
> @@ -993,8 +1001,31 @@ get_delta_chain_max_size(uint64_t *max_size, struct go
> delta->type != GOT_OBJ_TYPE_BLOB &&
> delta->type != GOT_OBJ_TYPE_TAG) {
> const struct got_error *err;
> + uint8_t *delta_buf;
> + size_t delta_len;
> + int cached = 1;
> +
> + got_delta_cache_get(&delta_buf, &delta_len,
> + pack->delta_cache, delta->data_offset);
> + if (delta_buf == NULL) {
> + cached = 0;
> + err = read_delta_data(&delta_buf, &delta_len,
> + delta->data_offset, pack);
> + if (err)
> + return err;
> + err = got_delta_cache_add(pack->delta_cache,
> + delta->data_offset, delta_buf, delta_len);
> + if (err == NULL)
> + cached = 1;
> + else if (err->code != GOT_ERR_NO_SPACE) {
> + free(delta_buf);
> + return err;
> + }
> + }
> err = got_delta_get_sizes(&base_size, &result_size,
> - delta->delta_buf, delta->delta_len);
> + delta_buf, delta_len);
> + if (!cached)
> + free(delta_buf);
> if (err)
> return err;
> } else
> @@ -1009,12 +1040,13 @@ get_delta_chain_max_size(uint64_t *max_size, struct go
> }
>
> const struct got_error *
> -got_pack_get_max_delta_object_size(uint64_t *size, struct got_object *obj)
> +got_pack_get_max_delta_object_size(uint64_t *size, struct got_object *obj,
> + struct got_pack *pack)
> {
> if ((obj->flags & GOT_OBJ_FLAG_DELTIFIED) == 0)
> return got_error(GOT_ERR_OBJ_TYPE);
>
> - return get_delta_chain_max_size(size, &obj->deltas);
> + return get_delta_chain_max_size(size, &obj->deltas, pack);
> }
>
> static const struct got_error *
> @@ -1023,8 +1055,8 @@ dump_delta_chain_to_file(size_t *result_size, struct g
> {
> const struct got_error *err = NULL;
> struct got_delta *delta;
> - uint8_t *base_buf = NULL, *accum_buf = NULL;
> - size_t base_bufsz = 0, accum_size = 0;
> + uint8_t *base_buf = NULL, *accum_buf = NULL, *delta_buf;
> + size_t base_bufsz = 0, accum_size = 0, delta_len;
> uint64_t max_size;
> int n = 0;
>
> @@ -1034,7 +1066,7 @@ dump_delta_chain_to_file(size_t *result_size, struct g
> return got_error(GOT_ERR_BAD_DELTA_CHAIN);
>
> /* We process small enough files entirely in memory for speed. */
> - err = get_delta_chain_max_size(&max_size, deltas);
> + err = get_delta_chain_max_size(&max_size, deltas, pack);
> if (err)
> return err;
> if (max_size < GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
> @@ -1047,6 +1079,7 @@ dump_delta_chain_to_file(size_t *result_size, struct g
>
> /* Deltas are ordered in ascending order. */
> SIMPLEQ_FOREACH(delta, &deltas->entries, entry) {
> + int cached = 1;
> if (n == 0) {
> size_t mapoff;
> off_t delta_data_offset;
> @@ -1099,18 +1132,37 @@ dump_delta_chain_to_file(size_t *result_size, struct g
> continue;
> }
>
> + got_delta_cache_get(&delta_buf, &delta_len,
> + pack->delta_cache, delta->data_offset);
> + if (delta_buf == NULL) {
> + cached = 0;
> + err = read_delta_data(&delta_buf, &delta_len,
> + delta->data_offset, pack);
> + if (err)
> + goto done;
> + err = got_delta_cache_add(pack->delta_cache,
> + delta->data_offset, delta_buf, delta_len);
> + if (err == NULL)
> + cached = 1;
> + else if (err->code != GOT_ERR_NO_SPACE) {
> + free(delta_buf);
> + goto done;
> + }
> + }
> if (base_buf) {
> err = got_delta_apply_in_mem(base_buf, base_bufsz,
> - delta->delta_buf, delta->delta_len, accum_buf,
> + delta_buf, delta_len, accum_buf,
> &accum_size, max_size);
> n++;
> } else {
> - err = got_delta_apply(base_file, delta->delta_buf,
> - delta->delta_len,
> + err = got_delta_apply(base_file, delta_buf,
> + delta_len,
> /* Final delta application writes to output file. */
> ++n < deltas->nentries ? accum_file : outfile,
> &accum_size);
> }
> + if (!cached)
> + free(delta_buf);
> if (err)
> goto done;
>
> @@ -1167,8 +1219,8 @@ dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outl
> {
> const struct got_error *err = NULL;
> struct got_delta *delta;
> - uint8_t *base_buf = NULL, *accum_buf = NULL;
> - size_t base_bufsz = 0, accum_size = 0;
> + uint8_t *base_buf = NULL, *accum_buf = NULL, *delta_buf;
> + size_t base_bufsz = 0, accum_size = 0, delta_len;
> uint64_t max_size;
> int n = 0;
>
> @@ -1178,7 +1230,7 @@ dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outl
> if (SIMPLEQ_EMPTY(&deltas->entries))
> return got_error(GOT_ERR_BAD_DELTA_CHAIN);
>
> - err = get_delta_chain_max_size(&max_size, deltas);
> + err = get_delta_chain_max_size(&max_size, deltas, pack);
> if (err)
> return err;
> accum_buf = malloc(max_size);
> @@ -1187,6 +1239,7 @@ dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outl
>
> /* Deltas are ordered in ascending order. */
> SIMPLEQ_FOREACH(delta, &deltas->entries, entry) {
> + int cached = 1;
> if (n == 0) {
> size_t delta_data_offset;
>
> @@ -1224,9 +1277,28 @@ dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outl
> continue;
> }
>
> + got_delta_cache_get(&delta_buf, &delta_len,
> + pack->delta_cache, delta->data_offset);
> + if (delta_buf == NULL) {
> + cached = 0;
> + err = read_delta_data(&delta_buf, &delta_len,
> + delta->data_offset, pack);
> + if (err)
> + goto done;
> + err = got_delta_cache_add(pack->delta_cache,
> + delta->data_offset, delta_buf, delta_len);
> + if (err == NULL)
> + cached = 1;
> + else if (err->code != GOT_ERR_NO_SPACE) {
> + free(delta_buf);
> + goto done;
> + }
> + }
> err = got_delta_apply_in_mem(base_buf, base_bufsz,
> - delta->delta_buf, delta->delta_len, accum_buf,
> + delta_buf, delta_len, accum_buf,
> &accum_size, max_size);
> + if (!cached)
> + free(delta_buf);
> n++;
> if (err)
> goto done;
> blob - e50b725ba52b655d85bb730cc8fae281a9751215
> blob + 5fe2b7efcfad5831f504354f84e46082989a0d5f
> --- libexec/got-read-pack/Makefile
> +++ libexec/got-read-pack/Makefile
> @@ -5,7 +5,7 @@
> PROG= got-read-pack
> SRCS= got-read-pack.c delta.c error.c inflate.c object_cache.c \
> object_idset.c object_parse.c opentemp.c pack.c path.c \
> - privsep.c sha1.c
> + privsep.c sha1.c delta_cache.c
>
> CPPFLAGS = -I${.CURDIR}/../../include -I${.CURDIR}/../../lib
> LDADD = -lutil -lz
> blob - faa1ca0fe5d71391f0f5fe9cb3b683725741ca18
> blob + d8c2e0f8683a4492064ae0e7be3d35a1c6b485f0
> --- libexec/got-read-pack/got-read-pack.c
> +++ libexec/got-read-pack/got-read-pack.c
> @@ -35,6 +35,7 @@
> #include "got_object.h"
>
> #include "got_lib_delta.h"
> +#include "got_lib_delta_cache.h"
> #include "got_lib_object.h"
> #include "got_lib_object_cache.h"
> #include "got_lib_object_parse.h"
> @@ -289,7 +290,7 @@ blob_request(struct imsg *imsg, struct imsgbuf *ibuf,
> goto done;
>
> if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
> - err = got_pack_get_max_delta_object_size(&blob_size, obj);
> + err = got_pack_get_max_delta_object_size(&blob_size, obj, pack);
> if (err)
> goto done;
> } else
> @@ -491,6 +492,13 @@ receive_pack(struct got_pack **packp, struct imsgbuf *
> pack->path_packfile = strdup(ipack.path_packfile);
> if (pack->path_packfile == NULL) {
> err = got_error_from_errno("strdup");
> + goto done;
> + }
> +
> + pack->delta_cache = got_delta_cache_alloc(100,
> + GOT_DELTA_RESULT_SIZE_CACHED_MAX);
> + if (pack->delta_cache == NULL) {
> + err = got_error_from_errno("got_delta_cache_alloc");
> goto done;
> }
>
> blob - 531c090468f03b4cc3cec7ddbb0b6cd2c7df89a4
> blob + aafde1a4c7fe645f39e2b0acb46faa64767369c7
> --- tog/Makefile
> +++ tog/Makefile
> @@ -8,7 +8,7 @@ SRCS= tog.c blame.c commit_graph.c delta.c diff.c \
> object_idset.c object_parse.c opentemp.c path.c pack.c \
> privsep.c reference.c repository.c sha1.c worktree.c \
> utf8.c inflate.c buf.c rcsutil.c diff3.c \
> - lockfile.c deflate.c object_create.c
> + lockfile.c deflate.c object_create.c delta_cache.c
> MAN = ${PROG}.1
>
> CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib
>
Nice work :),
Briefly tested on some various-sized repos and all numbers are faster.
Before:
$ time got blame sys/kern/kern_tc.c > /dev/null
1m13.89s real 0m41.08s user 0m38.26s system
$ time got blame sys/kern/kern_tc.c > /dev/null
1m14.10s real 0m41.97s user 0m37.96s system
$ time got blame sys/kern/kern_tc.c > /dev/null
1m14.11s real 0m41.83s user 0m37.87s system
After:
$ time got blame sys/kern/kern_tc.c > /dev/null
0m45.19s real 0m34.40s user 0m16.39s system
$ time got blame sys/kern/kern_tc.c > /dev/null
0m45.01s real 0m33.88s user 0m16.34s system
$ time got blame sys/kern/kern_tc.c > /dev/null
0m44.86s real 0m35.41s user 0m15.89s system
--
Kind regards,
Hiltjo
add LRU cache for delta data to got-read-pack