Download raw body.
fix path meta-data used for packing
Stefan Sperling <stsp@stsp.name> wrote:
> On Fri, May 20, 2022 at 10:17:49AM +0200, Omar Polo wrote:
> > > + m->path_hash = murmurhash2(path, strlen(path), 0xd70af26a);
> >
> > reusing the same seed on every run is to be deterministic and produce the
> > same pack at every run? What are the disadvantages with, say, using a
> > random seed freshly generated at every run?
> >
> > (a quick grep shows that we're always using hardcoded seeds for
> > murmurhash2 also in other cases too so this is not an issue, just a
> > question)
>
> Yes, we should probably be using a unique arc4random() seed per run.
> The SipHash24 API encourages this as well.
>
> We can fix this later.
with something like this? :)
written on the go and only lightly tested, but regress passes. It
creates a fresh seed in got_pack_create and in got_deltify. I can move
the arc4random call one level higher probably, but these functions seems
to be called only once.
I've also changed bloom.c to create a fresh seed upon bloom_init: the
change is very small but this "vendored" library so I'm not sure to
change it.
I don't see further calls to murmurhash2 with an hardcoded hash.
This will require some work in -portable thought. I've seen what
openssh (IIRC) does to have a bundled arc4random and it's not
straightforward (even if we may be able to just steal their arc4random
compat.) I'm happy to help with that however!
diff refs/heads/main refs/heads/randseeds
blob - 05e57b1f7053a73112d4713be722a197307866e2
blob + a66c49edad2dad5800dbe4eb0deb37ed923834ac
--- lib/bloom.c
+++ lib/bloom.c
@@ -77,7 +77,7 @@ static int bloom_check_add(struct bloom * bloom,
}
int hits = 0;
- register unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
+ register unsigned int a = murmurhash2(buffer, len, bloom->seed);
register unsigned int b = murmurhash2(buffer, len, a);
register unsigned int x;
register unsigned int i;
@@ -111,6 +111,8 @@ int bloom_init(struct bloom * bloom, int entries, doub
{
bloom->ready = 0;
+ bloom->seed = arc4random();
+
if (entries < 1000 || error == 0) {
return 1;
}
blob - 28356e3721b9f7902763322750d5a1b87640cd4d
blob + 51c848e8470bd02ed14a2bc377a173e39836239c
--- lib/bloom.h
+++ lib/bloom.h
@@ -52,6 +52,7 @@ struct bloom
int bits;
int bytes;
int hashes;
+ uint32_t seed;
// Fields below are private to the implementation. These may go away or
// change incompatibly at any moment. Client code MUST NOT access or rely
blob - b3bbf0b189575c109c8b935c3519ade298fed3aa
blob + 526099720694041b29211dc3b8f4852c0e20d398
--- lib/deltify.c
+++ lib/deltify.c
@@ -88,9 +88,9 @@ static const uint32_t geartab[256] = {
};
static uint32_t
-hashblk(const unsigned char *p, off_t n)
+hashblk(const unsigned char *p, off_t n, uint32_t seed)
{
- return murmurhash2(p, n, 0x1d7c5ac3);
+ return murmurhash2(p, n, seed);
}
static const struct got_error *
@@ -237,7 +237,8 @@ addblk_mem(struct got_delta_table *dt, uint8_t *data,
static const struct got_error *
lookupblk(struct got_delta_block **block, struct got_delta_table *dt,
- unsigned char *p, off_t len, FILE *basefile, off_t basefile_offset0)
+ unsigned char *p, off_t len, uint32_t seed, FILE *basefile,
+ off_t basefile_offset0)
{
int i;
uint32_t h;
@@ -246,7 +247,7 @@ lookupblk(struct got_delta_block **block, struct got_d
*block = NULL;
- h = hashblk(p, len);
+ h = hashblk(p, len, seed);
for (i = h % dt->nalloc; dt->blocks[i].len != 0;
i = (i + 1) % dt->nalloc) {
if (dt->blocks[i].hash != h ||
@@ -268,7 +269,8 @@ lookupblk(struct got_delta_block **block, struct got_d
static const struct got_error *
lookupblk_mem(struct got_delta_block **block, struct got_delta_table *dt,
- unsigned char *p, off_t len, uint8_t *basedata, off_t basefile_offset0)
+ unsigned char *p, off_t len, uint32_t seed, uint8_t *basedata,
+ off_t basefile_offset0)
{
int i;
uint32_t h;
@@ -276,7 +278,7 @@ lookupblk_mem(struct got_delta_block **block, struct g
*block = NULL;
- h = hashblk(p, len);
+ h = hashblk(p, len, seed);
for (i = h % dt->nalloc; dt->blocks[i].len != 0;
i = (i + 1) % dt->nalloc) {
if (dt->blocks[i].hash != h ||
@@ -355,7 +357,10 @@ got_deltify_init(struct got_delta_table **dt, FILE *f,
const struct got_error *err = NULL;
uint32_t h;
const off_t offset0 = fileoffset;
+ uint32_t seed;
+ seed = arc4random();
+
*dt = calloc(1, sizeof(**dt));
if (*dt == NULL)
return got_error_from_errno("calloc");
@@ -379,7 +384,7 @@ got_deltify_init(struct got_delta_table **dt, FILE *f,
goto done;
if (blocklen == 0)
break;
- h = hashblk(buf, blocklen);
+ h = hashblk(buf, blocklen, seed);
err = addblk(*dt, f, offset0, blocklen,
fileoffset - offset0, h);
if (err)
@@ -405,7 +410,10 @@ got_deltify_init_mem(struct got_delta_table **dt, uint
const struct got_error *err = NULL;
uint32_t h;
const off_t offset0 = fileoffset;
+ uint32_t seed;
+ seed = arc4random();
+
*dt = calloc(1, sizeof(**dt));
if (*dt == NULL)
return got_error_from_errno("calloc");
@@ -425,7 +433,7 @@ got_deltify_init_mem(struct got_delta_table **dt, uint
goto done;
if (blocklen == 0)
break;
- h = hashblk(data + fileoffset, blocklen);
+ h = hashblk(data + fileoffset, blocklen, seed);
err = addblk_mem(*dt, data, offset0, blocklen,
fileoffset - offset0, h);
if (err)
@@ -629,10 +637,13 @@ got_deltify(struct got_delta_instruction **deltas, int
const off_t offset0 = fileoffset;
size_t nalloc = 0;
const size_t alloc_chunk_size = 64;
+ uint32_t seed;
*deltas = NULL;
*ndeltas = 0;
+ seed = arc4random();
+
/*
* offset0 indicates where data to be deltified begins.
* For example, we want to avoid deltifying a Git object header at
@@ -663,7 +674,7 @@ got_deltify(struct got_delta_instruction **deltas, int
}
break;
}
- err = lookupblk(&block, dt, buf, blocklen, basefile,
+ err = lookupblk(&block, dt, buf, blocklen, seed, basefile,
basefile_offset0);
if (err)
break;
@@ -716,10 +727,13 @@ got_deltify_file_mem(struct got_delta_instruction **de
const off_t offset0 = fileoffset;
size_t nalloc = 0;
const size_t alloc_chunk_size = 64;
+ uint32_t seed;
*deltas = NULL;
*ndeltas = 0;
+ seed = arc4random();
+
/*
* offset0 indicates where data to be deltified begins.
* For example, we want to avoid deltifying a Git object header at
@@ -750,7 +764,7 @@ got_deltify_file_mem(struct got_delta_instruction **de
}
break;
}
- err = lookupblk_mem(&block, dt, buf, blocklen, basedata,
+ err = lookupblk_mem(&block, dt, buf, blocklen, seed, basedata,
basefile_offset0);
if (err)
break;
@@ -803,10 +817,13 @@ got_deltify_mem_file(struct got_delta_instruction **de
const off_t offset0 = fileoffset;
size_t nalloc = 0;
const size_t alloc_chunk_size = 64;
+ uint32_t seed;
*deltas = NULL;
*ndeltas = 0;
+ seed = arc4random();
+
*deltas = reallocarray(NULL, alloc_chunk_size,
sizeof(struct got_delta_instruction));
if (*deltas == NULL)
@@ -828,7 +845,7 @@ got_deltify_mem_file(struct got_delta_instruction **de
}
break;
}
- err = lookupblk(&block, dt, data + fileoffset, blocklen,
+ err = lookupblk(&block, dt, data + fileoffset, blocklen, seed,
basefile, basefile_offset0);
if (err)
break;
@@ -878,10 +895,13 @@ got_deltify_mem_mem(struct got_delta_instruction **del
const off_t offset0 = fileoffset;
size_t nalloc = 0;
const size_t alloc_chunk_size = 64;
+ uint32_t seed;
*deltas = NULL;
*ndeltas = 0;
+ seed = arc4random();
+
*deltas = reallocarray(NULL, alloc_chunk_size,
sizeof(struct got_delta_instruction));
if (*deltas == NULL)
@@ -904,7 +924,7 @@ got_deltify_mem_mem(struct got_delta_instruction **del
break;
}
err = lookupblk_mem(&block, dt, data + fileoffset, blocklen,
- basedata, basefile_offset0);
+ seed, basedata, basefile_offset0);
if (err)
break;
if (block != NULL) {
blob - d748288cff18ac9179407bbf2ecb771ad11452c2
blob + f92a4d04144e6c8cbea41506b7f00f552a1942ad
--- lib/pack_create.c
+++ lib/pack_create.c
@@ -51,6 +51,7 @@
#include "got_lib_object_cache.h"
#include "got_lib_deflate.h"
#include "got_lib_pack.h"
+#include "got_lib_pack_create.h"
#include "got_lib_privsep.h"
#include "got_lib_repository.h"
#include "got_lib_ratelimit.h"
@@ -104,7 +105,7 @@ struct got_pack_metavec {
static const struct got_error *
alloc_meta(struct got_pack_meta **new, struct got_object_id *id,
- const char *path, int obj_type, time_t mtime)
+ const char *path, int obj_type, time_t mtime, uint32_t seed)
{
struct got_pack_meta *m;
@@ -116,7 +117,7 @@ alloc_meta(struct got_pack_meta **new, struct got_obje
memcpy(&m->id, id, sizeof(m->id));
- m->path_hash = murmurhash2(path, strlen(path), 0xd70af26a);
+ m->path_hash = murmurhash2(path, strlen(path), seed);
m->obj_type = obj_type;
m->mtime = mtime;
*new = m;
@@ -856,8 +857,8 @@ search_packidx(int *found, struct got_object_id *id,
static const struct got_error *
add_object(int want_meta, struct got_object_idset *idset,
struct got_object_id *id, const char *path, int obj_type,
- time_t mtime, int loose_obj_only, struct got_repository *repo,
- int *ncolored, int *nfound, int *ntrees,
+ time_t mtime, uint32_t seed, int loose_obj_only,
+ struct got_repository *repo, int *ncolored, int *nfound, int *ntrees,
got_pack_progress_cb progress_cb, void *progress_arg,
struct got_ratelimit *rl)
{
@@ -874,7 +875,7 @@ add_object(int want_meta, struct got_object_idset *ids
}
if (want_meta) {
- err = alloc_meta(&m, id, path, obj_type, mtime);
+ err = alloc_meta(&m, id, path, obj_type, mtime, seed);
if (err)
return err;
@@ -900,7 +901,7 @@ static const struct got_error *
load_tree_entries(struct got_object_id_queue *ids, int want_meta,
struct got_object_idset *idset, struct got_object_idset *idset_exclude,
struct got_object_id *tree_id,
- const char *dpath, time_t mtime, struct got_repository *repo,
+ const char *dpath, time_t mtime, uint32_t seed, struct got_repository *repo,
int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
got_pack_progress_cb progress_cb, void *progress_arg,
struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
@@ -953,8 +954,8 @@ load_tree_entries(struct got_object_id_queue *ids, int
} else if (S_ISREG(mode) || S_ISLNK(mode)) {
err = add_object(want_meta,
want_meta ? idset : idset_exclude, id, p,
- GOT_OBJ_TYPE_BLOB, mtime, loose_obj_only, repo,
- ncolored, nfound, ntrees,
+ GOT_OBJ_TYPE_BLOB, mtime, seed, loose_obj_only,
+ repo, ncolored, nfound, ntrees,
progress_cb, progress_arg, rl);
if (err)
break;
@@ -975,7 +976,7 @@ static const struct got_error *
load_tree(int want_meta, struct got_object_idset *idset,
struct got_object_idset *idset_exclude,
struct got_object_id *tree_id, const char *dpath, time_t mtime,
- struct got_repository *repo, int loose_obj_only,
+ uint32_t seed, struct got_repository *repo, int loose_obj_only,
int *ncolored, int *nfound, int *ntrees,
got_pack_progress_cb progress_cb, void *progress_arg,
struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
@@ -1022,7 +1023,7 @@ load_tree(int want_meta, struct got_object_idset *idse
err = add_object(want_meta, want_meta ? idset : idset_exclude,
&qid->id, path, GOT_OBJ_TYPE_TREE,
- mtime, loose_obj_only, repo,
+ mtime, seed, loose_obj_only, repo,
ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
if (err) {
free(qid->data);
@@ -1032,7 +1033,7 @@ load_tree(int want_meta, struct got_object_idset *idse
err = load_tree_entries(&tree_ids, want_meta, idset,
idset_exclude, &qid->id,
- path, mtime, repo, loose_obj_only, ncolored, nfound,
+ path, mtime, seed, repo, loose_obj_only, ncolored, nfound,
ntrees, progress_cb, progress_arg, rl,
cancel_cb, cancel_arg);
free(qid->data);
@@ -1050,8 +1051,8 @@ load_tree(int want_meta, struct got_object_idset *idse
static const struct got_error *
load_commit(int want_meta, struct got_object_idset *idset,
struct got_object_idset *idset_exclude,
- struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
- int *ncolored, int *nfound, int *ntrees,
+ struct got_object_id *id, struct got_repository *repo, uint32_t seed,
+ int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
got_pack_progress_cb progress_cb, void *progress_arg,
struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
{
@@ -1077,7 +1078,7 @@ load_commit(int want_meta, struct got_object_idset *id
err = add_object(want_meta, want_meta ? idset : idset_exclude,
id, "", GOT_OBJ_TYPE_COMMIT,
- got_object_commit_get_committer_time(commit),
+ got_object_commit_get_committer_time(commit), seed,
loose_obj_only, repo,
ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
if (err)
@@ -1085,7 +1086,7 @@ load_commit(int want_meta, struct got_object_idset *id
err = load_tree(want_meta, idset, idset_exclude,
got_object_commit_get_tree_id(commit),
- "", got_object_commit_get_committer_time(commit),
+ "", got_object_commit_get_committer_time(commit), seed,
repo, loose_obj_only, ncolored, nfound, ntrees,
progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
done:
@@ -1096,8 +1097,8 @@ done:
static const struct got_error *
load_tag(int want_meta, struct got_object_idset *idset,
struct got_object_idset *idset_exclude,
- struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
- int *ncolored, int *nfound, int *ntrees,
+ struct got_object_id *id, struct got_repository *repo, uint32_t seed,
+ int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
got_pack_progress_cb progress_cb, void *progress_arg,
struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
{
@@ -1123,7 +1124,7 @@ load_tag(int want_meta, struct got_object_idset *idset
err = add_object(want_meta, want_meta ? idset : idset_exclude,
id, "", GOT_OBJ_TYPE_TAG,
- got_object_tag_get_tagger_time(tag), loose_obj_only, repo,
+ got_object_tag_get_tagger_time(tag), seed, loose_obj_only, repo,
ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
if (err)
goto done;
@@ -1131,16 +1132,16 @@ load_tag(int want_meta, struct got_object_idset *idset
switch (got_object_tag_get_object_type(tag)) {
case GOT_OBJ_TYPE_COMMIT:
err = load_commit(want_meta, idset, idset_exclude,
- got_object_tag_get_object_id(tag), repo, loose_obj_only,
- ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
- cancel_cb, cancel_arg);
+ got_object_tag_get_object_id(tag), repo, seed,
+ loose_obj_only, ncolored, nfound, ntrees,
+ progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
break;
case GOT_OBJ_TYPE_TREE:
err = load_tree(want_meta, idset, idset_exclude,
got_object_tag_get_object_id(tag), "",
- got_object_tag_get_tagger_time(tag), repo, loose_obj_only,
- ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
- cancel_cb, cancel_arg);
+ got_object_tag_get_tagger_time(tag), seed, repo,
+ loose_obj_only, ncolored, nfound, ntrees,
+ progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
break;
default:
break;
@@ -1451,8 +1452,9 @@ static const struct got_error *
load_object_ids(int *ncolored, int *nfound, int *ntrees,
struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs,
struct got_object_id **ours, int nours, struct got_repository *repo,
- int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg,
- struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
+ uint32_t seed, int loose_obj_only, got_pack_progress_cb progress_cb,
+ void *progress_arg, struct got_ratelimit *rl, got_cancel_cb cancel_cb,
+ void *cancel_arg)
{
const struct got_error *err = NULL;
struct got_object_id **ids = NULL;
@@ -1481,14 +1483,14 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
return err;
if (obj_type == GOT_OBJ_TYPE_COMMIT) {
err = load_commit(0, idset, idset_exclude, id, repo,
- loose_obj_only, ncolored, nfound, ntrees,
+ seed, loose_obj_only, ncolored, nfound, ntrees,
progress_cb, progress_arg, rl,
cancel_cb, cancel_arg);
if (err)
goto done;
} else if (obj_type == GOT_OBJ_TYPE_TAG) {
err = load_tag(0, idset, idset_exclude, id, repo,
- loose_obj_only, ncolored, nfound, ntrees,
+ seed, loose_obj_only, ncolored, nfound, ntrees,
progress_cb, progress_arg, rl,
cancel_cb, cancel_arg);
if (err)
@@ -1497,8 +1499,8 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
}
for (i = 0; i < nobj; i++) {
- err = load_commit(1, idset, idset_exclude,
- ids[i], repo, loose_obj_only, ncolored, nfound, ntrees,
+ err = load_commit(1, idset, idset_exclude, ids[i], repo,
+ seed, loose_obj_only, ncolored, nfound, ntrees,
progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
if (err)
goto done;
@@ -1519,7 +1521,7 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
if (obj_type != GOT_OBJ_TYPE_TAG)
continue;
err = load_tag(1, idset, idset_exclude, id, repo,
- loose_obj_only, ncolored, nfound, ntrees,
+ seed, loose_obj_only, ncolored, nfound, ntrees,
progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
if (err)
goto done;
@@ -1943,7 +1945,10 @@ got_pack_create(uint8_t *packsha1, FILE *packfile,
struct got_pack_metavec deltify, reuse;
int ncolored = 0, nfound = 0, ntrees = 0;
size_t ndeltify;
+ uint32_t seed;
+ seed = arc4random();
+
memset(&deltify, 0, sizeof(deltify));
memset(&reuse, 0, sizeof(reuse));
@@ -1954,7 +1959,7 @@ got_pack_create(uint8_t *packsha1, FILE *packfile,
return got_error_from_errno("got_object_idset_alloc");
err = load_object_ids(&ncolored, &nfound, &ntrees, idset, theirs,
- ntheirs, ours, nours, repo, loose_obj_only,
+ ntheirs, ours, nours, repo, seed, loose_obj_only,
progress_cb, progress_arg, &rl, cancel_cb, cancel_arg);
if (err)
goto done;
fix path meta-data used for packing