From: Stefan Sperling Subject: Re: reuse deltas while packing To: ori@eigenstate.org Cc: gameoftrees@openbsd.org Date: Thu, 10 Feb 2022 09:09:55 +0100 On Thu, Feb 10, 2022 at 12:57:14AM -0500, ori@eigenstate.org wrote: > Playing with it, if I reduce the minimum chunk > size to 32, the pack gets significantly smaller: > > 1.6188G eecede5b54cafd33515f01101c139b098370ec3a.pack > > And switching from sha1 to murmurhash2 for the delta > hash table is a significant speedup. It drops the time > to repack the plan9front repo from 67 seconds to 48 > seconds. Thank you, Ori! Is the patch below equivalent to what you did? I am sorry I did not find time yet to provide you with numbers that show our current performance with respect to size and time. I could still do that if it would still be useful to you. diff d75b4088b08f12aea8079aad55996a65b7b312c8 /home/stsp/src/got blob - 0285cfc40e99bc160a29bfb3df79477d73b75cf4 file + lib/deltify.c --- lib/deltify.c +++ lib/deltify.c @@ -26,11 +26,11 @@ #include #include #include -#include #include "got_error.h" #include "got_lib_deltify.h" +#include "murmurhash2.h" #ifndef MIN #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) @@ -87,17 +87,10 @@ static uint32_t geartab[256] = { 0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba, }; -static uint64_t +static uint32_t hashblk(const unsigned char *p, off_t n) { - unsigned char buf[SHA1_DIGEST_LENGTH]; - uint64_t h; - SHA1_CTX ctx; - SHA1Init(&ctx); - SHA1Update(&ctx, p, n); - SHA1Final(buf, &ctx); - memcpy(&h, buf, sizeof(h)); - return be64toh(h); + return murmurhash2(p, n, 0x1d7c5ac3); } static const struct got_error * @@ -247,7 +240,7 @@ lookupblk(struct got_delta_block **block, struct got_d unsigned char *p, off_t len, FILE *basefile, off_t basefile_offset0) { int i; - uint64_t h; + uint32_t h; uint8_t buf[GOT_DELTIFY_MAXCHUNK]; size_t r; @@ -278,7 +271,7 @@ lookupblk_mem(struct got_delta_block **block, struct g unsigned char *p, off_t len, uint8_t *basedata, off_t basefile_offset0) { int i; - uint64_t h; + uint32_t h; uint8_t *b; *block = NULL; @@ -360,7 +353,7 @@ got_deltify_init(struct got_delta_table **dt, FILE *f, off_t filesize) { const struct got_error *err = NULL; - uint64_t h; + uint32_t h; const off_t offset0 = fileoffset; *dt = calloc(1, sizeof(**dt)); blob - 956f6c3dab971228b22f00297f9ee0ef0ef8f894 file + lib/got_lib_deltify.h --- lib/got_lib_deltify.h +++ lib/got_lib_deltify.h @@ -34,7 +34,7 @@ struct got_delta_instruction { }; enum { - GOT_DELTIFY_MINCHUNK = 128, + GOT_DELTIFY_MINCHUNK = 32, GOT_DELTIFY_MAXCHUNK = 8192, GOT_DELTIFY_SPLITMASK = (1 << 8) - 1, blob - b903bbecf5905f866e82fb63306b28d3e7b6366e file + regress/deltify/Makefile --- regress/deltify/Makefile +++ regress/deltify/Makefile @@ -1,7 +1,7 @@ .PATH:${.CURDIR}/../../lib PROG = deltify_test -SRCS = deltify.c error.c opentemp.c sha1.c deltify_test.c +SRCS = deltify.c error.c opentemp.c sha1.c deltify_test.c murmurhash2.c CPPFLAGS = -I${.CURDIR}/../../include -I${.CURDIR}/../../lib LDADD = -lz