Download raw body.
reuse deltas while packing
On Thu, Feb 10, 2022 at 12:57:14AM -0500, ori@eigenstate.org wrote:
> Playing with it, if I reduce the minimum chunk
> size to 32, the pack gets significantly smaller:
>
> 1.6188G eecede5b54cafd33515f01101c139b098370ec3a.pack
>
> And switching from sha1 to murmurhash2 for the delta
> hash table is a significant speedup. It drops the time
> to repack the plan9front repo from 67 seconds to 48
> seconds.
Thank you, Ori! Is the patch below equivalent to what you did?
I am sorry I did not find time yet to provide you with numbers that
show our current performance with respect to size and time.
I could still do that if it would still be useful to you.
diff d75b4088b08f12aea8079aad55996a65b7b312c8 /home/stsp/src/got
blob - 0285cfc40e99bc160a29bfb3df79477d73b75cf4
file + lib/deltify.c
--- lib/deltify.c
+++ lib/deltify.c
@@ -26,11 +26,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sha1.h>
#include "got_error.h"
#include "got_lib_deltify.h"
+#include "murmurhash2.h"
#ifndef MIN
#define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
@@ -87,17 +87,10 @@ static uint32_t geartab[256] = {
0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba,
};
-static uint64_t
+static uint32_t
hashblk(const unsigned char *p, off_t n)
{
- unsigned char buf[SHA1_DIGEST_LENGTH];
- uint64_t h;
- SHA1_CTX ctx;
- SHA1Init(&ctx);
- SHA1Update(&ctx, p, n);
- SHA1Final(buf, &ctx);
- memcpy(&h, buf, sizeof(h));
- return be64toh(h);
+ return murmurhash2(p, n, 0x1d7c5ac3);
}
static const struct got_error *
@@ -247,7 +240,7 @@ lookupblk(struct got_delta_block **block, struct got_d
unsigned char *p, off_t len, FILE *basefile, off_t basefile_offset0)
{
int i;
- uint64_t h;
+ uint32_t h;
uint8_t buf[GOT_DELTIFY_MAXCHUNK];
size_t r;
@@ -278,7 +271,7 @@ lookupblk_mem(struct got_delta_block **block, struct g
unsigned char *p, off_t len, uint8_t *basedata, off_t basefile_offset0)
{
int i;
- uint64_t h;
+ uint32_t h;
uint8_t *b;
*block = NULL;
@@ -360,7 +353,7 @@ got_deltify_init(struct got_delta_table **dt, FILE *f,
off_t filesize)
{
const struct got_error *err = NULL;
- uint64_t h;
+ uint32_t h;
const off_t offset0 = fileoffset;
*dt = calloc(1, sizeof(**dt));
blob - 956f6c3dab971228b22f00297f9ee0ef0ef8f894
file + lib/got_lib_deltify.h
--- lib/got_lib_deltify.h
+++ lib/got_lib_deltify.h
@@ -34,7 +34,7 @@ struct got_delta_instruction {
};
enum {
- GOT_DELTIFY_MINCHUNK = 128,
+ GOT_DELTIFY_MINCHUNK = 32,
GOT_DELTIFY_MAXCHUNK = 8192,
GOT_DELTIFY_SPLITMASK = (1 << 8) - 1,
blob - b903bbecf5905f866e82fb63306b28d3e7b6366e
file + regress/deltify/Makefile
--- regress/deltify/Makefile
+++ regress/deltify/Makefile
@@ -1,7 +1,7 @@
.PATH:${.CURDIR}/../../lib
PROG = deltify_test
-SRCS = deltify.c error.c opentemp.c sha1.c deltify_test.c
+SRCS = deltify.c error.c opentemp.c sha1.c deltify_test.c murmurhash2.c
CPPFLAGS = -I${.CURDIR}/../../include -I${.CURDIR}/../../lib
LDADD = -lz
reuse deltas while packing