"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: reuse deltas while packing
To:
ori@eigenstate.org
Cc:
gameoftrees@openbsd.org
Date:
Thu, 10 Feb 2022 09:09:55 +0100

Download raw body.

Thread
  • Christian Weisgerber:

    reuse deltas while packing

  • On Thu, Feb 10, 2022 at 12:57:14AM -0500, ori@eigenstate.org wrote:
    > Playing with it, if I reduce the minimum chunk
    > size to 32, the pack gets significantly smaller:
    > 
    > 	1.6188G	eecede5b54cafd33515f01101c139b098370ec3a.pack
    > 
    > And switching from sha1 to murmurhash2 for the delta
    > hash table is a significant speedup.  It drops the time
    > to repack the plan9front repo from 67 seconds to 48
    > seconds.
    
    Thank you, Ori!  Is the patch below equivalent to what you did?
    
    I am sorry I did not find time yet to provide you with numbers that
    show our current performance with respect to size and time.
    I could still do that if it would still be useful to you.
    
    
    diff d75b4088b08f12aea8079aad55996a65b7b312c8 /home/stsp/src/got
    blob - 0285cfc40e99bc160a29bfb3df79477d73b75cf4
    file + lib/deltify.c
    --- lib/deltify.c
    +++ lib/deltify.c
    @@ -26,11 +26,11 @@
     #include <stdio.h>
     #include <stdlib.h>
     #include <string.h>
    -#include <sha1.h>
     
     #include "got_error.h"
     
     #include "got_lib_deltify.h"
    +#include "murmurhash2.h"
     
     #ifndef MIN
     #define	MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
    @@ -87,17 +87,10 @@ static uint32_t geartab[256] = {
         0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba,
     };
     
    -static uint64_t
    +static uint32_t
     hashblk(const unsigned char *p, off_t n)
     {
    -	unsigned char buf[SHA1_DIGEST_LENGTH];
    -	uint64_t h;
    -	SHA1_CTX ctx;
    -	SHA1Init(&ctx);
    -	SHA1Update(&ctx, p, n);
    -	SHA1Final(buf, &ctx);
    -	memcpy(&h, buf, sizeof(h));
    -	return be64toh(h);
    +	return murmurhash2(p, n, 0x1d7c5ac3);
     }
     
     static const struct got_error *
    @@ -247,7 +240,7 @@ lookupblk(struct got_delta_block **block, struct got_d
         unsigned char *p, off_t len, FILE *basefile, off_t basefile_offset0)
     {
     	int i;
    -	uint64_t h;
    +	uint32_t h;
     	uint8_t buf[GOT_DELTIFY_MAXCHUNK];
     	size_t r;
     
    @@ -278,7 +271,7 @@ lookupblk_mem(struct got_delta_block **block, struct g
         unsigned char *p, off_t len, uint8_t *basedata, off_t basefile_offset0)
     {
     	int i;
    -	uint64_t h;
    +	uint32_t h;
     	uint8_t *b;
     
     	*block = NULL;
    @@ -360,7 +353,7 @@ got_deltify_init(struct got_delta_table **dt, FILE *f,
         off_t filesize)
     {
     	const struct got_error *err = NULL;
    -	uint64_t h;
    +	uint32_t h;
     	const off_t offset0 = fileoffset;
     
     	*dt = calloc(1, sizeof(**dt));
    blob - 956f6c3dab971228b22f00297f9ee0ef0ef8f894
    file + lib/got_lib_deltify.h
    --- lib/got_lib_deltify.h
    +++ lib/got_lib_deltify.h
    @@ -34,7 +34,7 @@ struct got_delta_instruction {
     };
     
     enum {
    -	GOT_DELTIFY_MINCHUNK	= 128,
    +	GOT_DELTIFY_MINCHUNK	= 32,
     	GOT_DELTIFY_MAXCHUNK	= 8192,
     	GOT_DELTIFY_SPLITMASK	= (1 << 8) - 1,
     	
    blob - b903bbecf5905f866e82fb63306b28d3e7b6366e
    file + regress/deltify/Makefile
    --- regress/deltify/Makefile
    +++ regress/deltify/Makefile
    @@ -1,7 +1,7 @@
     .PATH:${.CURDIR}/../../lib
     
     PROG = deltify_test
    -SRCS = deltify.c error.c opentemp.c sha1.c deltify_test.c
    +SRCS = deltify.c error.c opentemp.c sha1.c deltify_test.c murmurhash2.c
     
     CPPFLAGS = -I${.CURDIR}/../../include -I${.CURDIR}/../../lib
     LDADD = -lz
    
    
    
  • Christian Weisgerber:

    reuse deltas while packing