From: Stefan Sperling Subject: encode small deltas in memory while packing To: gameoftrees@openbsd.org Date: Sat, 8 Jan 2022 02:09:47 +0100 This patch restores the ability to encode deltas in memory rather than writing them out to a temporary file. This feature is used for small deltas only and speeds up the phase where we write out the generated pack file. This patch needs to be applied on top of my other patch which lets 'gotadmin pack' map files into memory. diff refs/heads/raw-mmap refs/heads/delta-encode blob - 7342a8ac1b751a6ef554f7a3b5d05eed6d664933 blob + f9ef76e1b2a866d879061a355ecdda783cc211a4 --- lib/pack_create.c +++ lib/pack_create.c @@ -69,8 +69,9 @@ struct got_pack_meta { /* The best delta we picked */ struct got_pack_meta *head; struct got_pack_meta *prev; + unsigned char *delta_buf; /* if not encoded in delta cache file */ off_t delta_offset; /* offset in delta cache file */ - off_t delta_len; /* length in delta cache file */ + off_t delta_len; /* encoded delta length */ int nchain; /* Only used for delta window */ @@ -121,6 +122,8 @@ clear_meta(struct got_pack_meta *meta) return; free(meta->path); meta->path = NULL; + free(meta->delta_buf); + meta->delta_buf = NULL; } static void @@ -152,10 +155,11 @@ delta_order_cmp(const void *pa, const void *pb) return got_object_id_cmp(&a->id, &b->id); } -static int +static off_t delta_size(struct got_delta_instruction *deltas, int ndeltas) { - int i, size = 32; + int i; + off_t size = 32; for (i = 0; i < ndeltas; i++) { if (deltas[i].copy) size += GOT_DELTA_SIZE_SHIFT; @@ -166,6 +170,135 @@ delta_size(struct got_delta_instruction *deltas, int n } static const struct got_error * +append(unsigned char **p, size_t *len, off_t *sz, void *seg, int nseg) +{ + char *n; + + if (*len + nseg >= *sz) { + while (*len + nseg >= *sz) + *sz += *sz / 2; + n = realloc(*p, *sz); + if (n == NULL) + return got_error_from_errno("realloc"); + *p = n; + } + memcpy(*p + *len, seg, nseg); + *len += nseg; + return NULL; +} + +static const struct got_error * +encode_delta_in_mem(struct got_pack_meta *m, struct got_raw_object *o, + struct got_delta_instruction *deltas, int ndeltas, + off_t delta_size, off_t base_size) +{ + const struct got_error *err; + unsigned char buf[16], *bp; + int i, j; + size_t len = 0; + off_t n; + struct got_delta_instruction *d; + + m->delta_buf = malloc(delta_size); + if (m->delta_buf == NULL) + return got_error_from_errno("calloc"); + + /* base object size */ + buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK; + n = base_size >> GOT_DELTA_SIZE_SHIFT; + for (i = 1; n > 0; i++) { + buf[i - 1] |= GOT_DELTA_SIZE_MORE; + buf[i] = n & GOT_DELTA_SIZE_VAL_MASK; + n >>= GOT_DELTA_SIZE_SHIFT; + } + err = append(&m->delta_buf, &len, &delta_size, buf, i); + if (err) + return err; + + /* target object size */ + buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK; + n = o->size >> GOT_DELTA_SIZE_SHIFT; + for (i = 1; n > 0; i++) { + buf[i - 1] |= GOT_DELTA_SIZE_MORE; + buf[i] = n & GOT_DELTA_SIZE_VAL_MASK; + n >>= GOT_DELTA_SIZE_SHIFT; + } + err = append(&m->delta_buf, &len, &delta_size, buf, i); + if (err) + return err; + + for (j = 0; j < ndeltas; j++) { + d = &deltas[j]; + if (d->copy) { + n = d->offset; + bp = &buf[1]; + buf[0] = GOT_DELTA_BASE_COPY; + for (i = 0; i < 4; i++) { + /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */ + buf[0] |= 1 << i; + *bp++ = n & 0xff; + n >>= 8; + if (n == 0) + break; + } + + n = d->len; + if (n != GOT_DELTA_COPY_DEFAULT_LEN) { + /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */ + for (i = 0; i < 3 && n > 0; i++) { + buf[0] |= 1 << (i + 4); + *bp++ = n & 0xff; + n >>= 8; + } + } + err = append(&m->delta_buf, &len, &delta_size, + buf, bp - buf); + if (err) + return err; + } else if (o->f == NULL) { + n = 0; + while (n != d->len) { + buf[0] = (d->len - n < 127) ? d->len - n : 127; + err = append(&m->delta_buf, &len, &delta_size, + buf, 1); + if (err) + return err; + err = append(&m->delta_buf, &len, &delta_size, + o->data + o->hdrlen + d->offset + n, + buf[0]); + if (err) + return err; + n += buf[0]; + } + } else { + char content[128]; + size_t r; + if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) + return got_error_from_errno("fseeko"); + n = 0; + while (n != d->len) { + buf[0] = (d->len - n < 127) ? d->len - n : 127; + err = append(&m->delta_buf, &len, &delta_size, + buf, 1); + if (err) + return err; + r = fread(content, 1, buf[0], o->f); + if (r != buf[0]) + return got_ferror(o->f, GOT_ERR_IO); + err = append(&m->delta_buf, &len, &delta_size, + content, buf[0]); + if (err) + return err; + n += buf[0]; + } + } + } + + m->delta_len = len; + return NULL; +} + +static const struct got_error * encode_delta(struct got_pack_meta *m, struct got_raw_object *o, struct got_delta_instruction *deltas, int ndeltas, off_t base_size, FILE *f) @@ -262,6 +395,7 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o } } + m->delta_len = ftello(f) - m->delta_offset; return NULL; } @@ -294,7 +428,8 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in struct got_pack_meta *m = NULL, *base = NULL; struct got_raw_object *raw = NULL, *base_raw = NULL; struct got_delta_instruction *deltas = NULL, *best_deltas = NULL; - int i, j, size, best_size, ndeltas, best_ndeltas; + int i, j, ndeltas, best_ndeltas; + off_t size, best_size; const int max_base_candidates = 3; int outfd = -1; @@ -412,15 +547,19 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in } if (best_ndeltas > 0) { - m->delta_offset = ftello(delta_cache); - err = encode_delta(m, raw, best_deltas, - best_ndeltas, m->prev->size, delta_cache); + if (best_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX) { + err = encode_delta_in_mem(m, raw, best_deltas, + best_ndeltas, best_size, m->prev->size); + } else { + m->delta_offset = ftello(delta_cache); + err = encode_delta(m, raw, best_deltas, + best_ndeltas, m->prev->size, delta_cache); + } free(best_deltas); best_deltas = NULL; best_ndeltas = 0; if (err) goto done; - m->delta_len = ftello(delta_cache) - m->delta_offset; } got_object_raw_close(raw); @@ -1193,6 +1332,43 @@ packoff(char *hdr, off_t off) } static const struct got_error * +deltahdr(off_t *packfile_size, SHA1_CTX *ctx, FILE *packfile, + struct got_pack_meta *m, int use_offset_deltas) +{ + const struct got_error *err; + char buf[32]; + int nh; + + if (use_offset_deltas && m->prev->off != 0) { + err = packhdr(&nh, buf, sizeof(buf), + GOT_OBJ_TYPE_OFFSET_DELTA, m->delta_len); + if (err) + return err; + nh += packoff(buf + nh, m->off - m->prev->off); + err = hwrite(packfile, buf, nh, ctx); + if (err) + return err; + *packfile_size += nh; + } else { + err = packhdr(&nh, buf, sizeof(buf), + GOT_OBJ_TYPE_REF_DELTA, m->delta_len); + if (err) + return err; + err = hwrite(packfile, buf, nh, ctx); + if (err) + return err; + *packfile_size += nh; + err = hwrite(packfile, m->prev->id.sha1, + sizeof(m->prev->id.sha1), ctx); + if (err) + return err; + *packfile_size += sizeof(m->prev->id.sha1); + } + + return NULL; +} + +static const struct got_error * genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delta_cache, struct got_pack_meta **meta, int nmeta, int nours, int use_offset_deltas, struct got_repository *repo, @@ -1267,6 +1443,18 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt packfile_size += outlen; got_object_raw_close(raw); raw = NULL; + } else if (m->delta_buf) { + err = deltahdr(&packfile_size, &ctx, packfile, + m, use_offset_deltas); + if (err) + goto done; + err = got_deflate_to_file_mmap(&outlen, + m->delta_buf, 0, m->delta_len, packfile, &csum); + if (err) + goto done; + packfile_size += outlen; + free(m->delta_buf); + m->delta_buf = NULL; } else { off_t remain; if (delta_file == NULL) { @@ -1309,30 +1497,10 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt } remain -= n; } - if (use_offset_deltas && m->prev->off != 0) { - err = packhdr(&nh, buf, sizeof(buf), - GOT_OBJ_TYPE_OFFSET_DELTA, m->delta_len); - if (err) - goto done; - nh += packoff(buf + nh, - m->off - m->prev->off); - err = hwrite(packfile, buf, nh, &ctx); - if (err) - goto done; - packfile_size += nh; - } else { - err = packhdr(&nh, buf, sizeof(buf), - GOT_OBJ_TYPE_REF_DELTA, m->delta_len); - err = hwrite(packfile, buf, nh, &ctx); - if (err) - goto done; - packfile_size += nh; - err = hwrite(packfile, m->prev->id.sha1, - sizeof(m->prev->id.sha1), &ctx); - packfile_size += sizeof(m->prev->id.sha1); - if (err) - goto done; - } + err = deltahdr(&packfile_size, &ctx, packfile, + m, use_offset_deltas); + if (err) + goto done; if (fseeko(delta_file, 0L, SEEK_SET) == -1) { err = got_error_from_errno("fseeko"); goto done;