From: Stefan Sperling Subject: process deltas in compressed form To: gameoftrees@openbsd.org Date: Sat, 30 Apr 2022 18:31:23 +0200 During packing we currently decompress reused deltas while reading them from the pack file, and store the decompressed data either in memory or in the delta cache file. The same applies to newly computed deltas; we write them to the delta cache file in uncompressed form, and compress the delta data while copying it into the generated pack file. This approach works, but it is wasteful. A 4GB /tmp paritition will run out of space during 'gotadmin pack -a' in a copy of the OpenBSD src repo because the delta cache file grows too large. With the patch below, we store deltas in compressed form. Reused deltas will be copied as-is from their pack file, so we won't waste time compressing them again. We still decompress such deltas once order to verify that decompression succeeds. This is a sanity check also performed by Git. It is intended to protect against silent bitrot. So I decided to do the same. We would need to decompress a delta anyway at least in part because we need to read two size values stored at the beginning of a delta stream. With this patch my system no longer runs out of space in /tmp when repacking the OpenBSD src repo. I am adding more wrapper functions around libz. I am not happy about that as we already have a lot of glue code on top of libz. But the existing abstractions do not support incremental writes to a file with compression, they only support one-shot compression of an existing file or memory buffer. It would be good to tidy this up a bit and shrink the amount of abstractions in use here. But that can be done later. ok? diff cf8f868e7c97644d885e9cc2a06debbe9eac72b0 da308c0ea675766ef4047c919ba7eb6d104d842e blob - 3c97a77f77b3da0548ab67dbcb2685723456ee39 blob + 8be992eef33b9ee118e5dc75d039f224e5acb657 --- lib/deflate.c +++ lib/deflate.c @@ -136,9 +136,9 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, return NULL; } -const struct got_error * -got_deflate_read_mmap(struct got_deflate_buf *zb, uint8_t *map, size_t offset, - size_t len, size_t *outlenp, size_t *consumed) +static const struct got_error * +deflate_read_mmap(struct got_deflate_buf *zb, uint8_t *map, size_t offset, + size_t len, size_t *outlenp, size_t *consumed, int flush_on_eof) { z_stream *z = &zb->z; size_t last_total_out = z->total_out; @@ -159,7 +159,8 @@ got_deflate_read_mmap(struct got_deflate_buf *zb, uint z->avail_in = len - *consumed; if (z->avail_in == 0) { /* EOF */ - ret = deflate(z, Z_FINISH); + if (flush_on_eof) + ret = deflate(z, Z_FINISH); break; } } @@ -179,6 +180,53 @@ got_deflate_read_mmap(struct got_deflate_buf *zb, uint return NULL; } +const struct got_error * +got_deflate_read_mmap(struct got_deflate_buf *zb, uint8_t *map, size_t offset, + size_t len, size_t *outlenp, size_t *consumed) +{ + return deflate_read_mmap(zb, map, offset, len, outlenp, consumed, 1); +} + +const struct got_error * +got_deflate_flush(struct got_deflate_buf *zb, FILE *outfile, + struct got_deflate_checksum *csum, off_t *outlenp) +{ + int ret; + size_t n; + z_stream *z = &zb->z; + + if (z->avail_in != 0) { + return got_error_msg(GOT_ERR_COMPRESSION, + "cannot flush zb with pending input data"); + } + + do { + size_t avail, last_total_out = zb->z.total_out; + + z->next_out = zb->outbuf; + z->avail_out = zb->outlen; + + ret = deflate(z, Z_FINISH); + if (ret != Z_STREAM_END && ret != Z_OK) + return got_error(GOT_ERR_COMPRESSION); + + avail = z->total_out - last_total_out; + if (avail > 0) { + n = fwrite(zb->outbuf, avail, 1, outfile); + if (n != 1) + return got_ferror(outfile, GOT_ERR_IO); + if (csum) + csum_output(csum, zb->outbuf, avail); + if (outlenp) + *outlenp += avail; + } + } while (ret != Z_STREAM_END); + + zb->flags &= ~GOT_DEFLATE_F_HAVE_MORE; + return NULL; + +} + void got_deflate_end(struct got_deflate_buf *zb) { @@ -263,3 +311,97 @@ done: got_deflate_end(&zb); return err; } + +const struct got_error * +got_deflate_append_to_file_mmap(struct got_deflate_buf *zb, off_t *outlen, + uint8_t *map, size_t offset, size_t len, FILE *outfile, + struct got_deflate_checksum *csum) +{ + const struct got_error *err; + size_t avail, consumed; + + do { + err = deflate_read_mmap(zb, map, offset, len, &avail, + &consumed, 0); + if (err) + break; + offset += consumed; + len -= consumed; + if (avail > 0) { + size_t n; + n = fwrite(zb->outbuf, avail, 1, outfile); + if (n != 1) { + err = got_ferror(outfile, GOT_ERR_IO); + break; + } + if (csum) + csum_output(csum, zb->outbuf, avail); + if (outlen) + *outlen += avail; + } + } while ((zb->flags & GOT_DEFLATE_F_HAVE_MORE) && len > 0); + + return err; +} + +const struct got_error * +got_deflate_to_mem_mmap(uint8_t **outbuf, size_t *outlen, + size_t *consumed_total, struct got_deflate_checksum *csum, uint8_t *map, + size_t offset, size_t len) +{ + const struct got_error *err; + size_t avail, consumed; + struct got_deflate_buf zb; + void *newbuf; + int nbuf = 1; + + if (outbuf) { + *outbuf = malloc(GOT_DEFLATE_BUFSIZE); + if (*outbuf == NULL) + return got_error_from_errno("malloc"); + err = got_deflate_init(&zb, *outbuf, GOT_DEFLATE_BUFSIZE); + if (err) { + free(*outbuf); + *outbuf = NULL; + return err; + } + } else { + err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE); + if (err) + return err; + } + + *outlen = 0; + if (consumed_total) + *consumed_total = 0; + do { + err = got_deflate_read_mmap(&zb, map, offset, len, &avail, + &consumed); + if (err) + goto done; + offset += consumed; + if (consumed_total) + *consumed_total += consumed; + len -= consumed; + if (avail > 0 && csum) + csum_output(csum, zb.outbuf, avail); + *outlen += avail; + if ((zb.flags & GOT_DEFLATE_F_HAVE_MORE) && outbuf != NULL) { + newbuf = reallocarray(*outbuf, ++nbuf, + GOT_DEFLATE_BUFSIZE); + if (newbuf == NULL) { + err = got_error_from_errno("reallocarray"); + free(*outbuf); + *outbuf = NULL; + *outlen = 0; + goto done; + } + *outbuf = newbuf; + zb.outbuf = newbuf + *outlen; + zb.outlen = (nbuf * GOT_DEFLATE_BUFSIZE) - *outlen; + } + } while (zb.flags & GOT_DEFLATE_F_HAVE_MORE); +done: + got_deflate_end(&zb); + return err; +} blob - 1c429af85a27229451e05798e9511d0a8f474968 blob + 09a8755cf062db2ffd1f7e2c26deef470dbba250 --- lib/got_lib_deflate.h +++ lib/got_lib_deflate.h @@ -39,8 +39,17 @@ const struct got_error *got_deflate_init(struct got_de size_t); const struct got_error *got_deflate_read(struct got_deflate_buf *, FILE *, off_t, size_t *, off_t *); +const struct got_error *got_deflate_read_mmap(struct got_deflate_buf *, + uint8_t *, size_t, size_t, size_t *, size_t *); void got_deflate_end(struct got_deflate_buf *); const struct got_error *got_deflate_to_file(off_t *, FILE *, off_t, FILE *, struct got_deflate_checksum *); const struct got_error *got_deflate_to_file_mmap(off_t *, uint8_t *, size_t, size_t, FILE *, struct got_deflate_checksum *); +const struct got_error *got_deflate_flush(struct got_deflate_buf *, FILE *, + struct got_deflate_checksum *, off_t *); +const struct got_error *got_deflate_append_to_file_mmap( + struct got_deflate_buf *, off_t *, uint8_t *, size_t, size_t, FILE *, + struct got_deflate_checksum *); +const struct got_error *got_deflate_to_mem_mmap(uint8_t **, size_t *, size_t *, + struct got_deflate_checksum *, uint8_t *, size_t, size_t); blob - 6af8d574c7b345c52d3e0c19759bf3ae6bd62b20 blob + 4bbe44dda07c97ba6f0ef878da0caadbe6de9741 --- lib/got_lib_object.h +++ lib/got_lib_object.h @@ -104,7 +104,7 @@ const struct got_error *got_object_open_from_packfile( struct got_object_id *, struct got_pack *, struct got_packidx *, int, struct got_repository *); const struct got_error *got_object_read_raw_delta(uint64_t *, uint64_t *, - off_t *, off_t *, off_t *, struct got_object_id **, int, + off_t *, off_t *, off_t *, off_t *, struct got_object_id **, int, struct got_packidx *, int, struct got_object_id *, struct got_repository *); const struct got_error *got_object_read_header_privsep(struct got_object **, struct got_object_id *, struct got_repository *, int); blob - e8fb373e287ee80486d50ed07964d9d39924308d blob + 6a3d3981c9afd96d48ef7746b2d0b1d78793a7ca --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -212,7 +212,7 @@ const struct got_error *got_packfile_extract_object(st const struct got_error *got_packfile_extract_object_to_mem(uint8_t **, size_t *, struct got_object *, struct got_pack *); const struct got_error *got_packfile_extract_raw_delta(uint8_t **, size_t *, - off_t *, off_t *, struct got_object_id *, uint64_t *, uint64_t *, + size_t *, off_t *, off_t *, struct got_object_id *, uint64_t *, uint64_t *, struct got_pack *, struct got_packidx *, int); struct got_pack *got_repo_get_cached_pack(struct got_repository *, const char *); blob - 110fe049d86c1a33fb3b33e4fe74ffa8a3dbbfa8 blob + e57f4dd3f8f4d207324b69c89c54442ae78cd5bb --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -284,6 +284,7 @@ struct got_imsg_raw_delta { uint64_t base_size; uint64_t result_size; off_t delta_size; + off_t delta_compressed_size; off_t delta_offset; off_t delta_out_offset; @@ -662,8 +663,9 @@ const struct got_error *got_privsep_send_raw_delta_req struct got_object_id *); const struct got_error *got_privsep_send_raw_delta_outfd(struct imsgbuf *, int); const struct got_error *got_privsep_send_raw_delta(struct imsgbuf *, uint64_t, - uint64_t, off_t, off_t, off_t, struct got_object_id *); + uint64_t, off_t, off_t, off_t, off_t, struct got_object_id *); const struct got_error *got_privsep_recv_raw_delta(uint64_t *, uint64_t *, - off_t *, off_t *, off_t *, struct got_object_id **, struct imsgbuf *); + off_t *, off_t *, off_t *, off_t *, struct got_object_id **, + struct imsgbuf *); void got_privsep_exec_child(int[2], const char *, const char *); blob - b87e6eecb828ef98889452c3dd9b205e5eaf3c33 blob + 4e5facc7f5e3c665aa540bb9caf3299f68626c2d --- lib/object.c +++ lib/object.c @@ -388,8 +388,8 @@ got_object_open_from_packfile(struct got_object **obj, const struct got_error * got_object_read_raw_delta(uint64_t *base_size, uint64_t *result_size, - off_t *delta_size, off_t *delta_offset, off_t *delta_out_offset, - struct got_object_id **base_id, int delta_cache_fd, + off_t *delta_size, off_t *delta_compressed_size, off_t *delta_offset, + off_t *delta_out_offset, struct got_object_id **base_id, int delta_cache_fd, struct got_packidx *packidx, int obj_idx, struct got_object_id *id, struct got_repository *repo) { @@ -400,6 +400,7 @@ got_object_read_raw_delta(uint64_t *base_size, uint64_ *base_size = 0; *result_size = 0; *delta_size = 0; + *delta_compressed_size = 0; *delta_offset = 0; *delta_out_offset = 0; @@ -439,7 +440,8 @@ got_object_read_raw_delta(uint64_t *base_size, uint64_ return err; return got_privsep_recv_raw_delta(base_size, result_size, delta_size, - delta_offset, delta_out_offset, base_id, pack->privsep_child->ibuf); + delta_compressed_size, delta_offset, delta_out_offset, base_id, + pack->privsep_child->ibuf); } static const struct got_error * blob - d875046e25b7f0b4172baa4dbd064445a73f18c4 blob + e901a95d83890c031e00dc5f8a9ee560e51ace41 --- lib/pack.c +++ lib/pack.c @@ -902,23 +902,33 @@ got_pack_parse_offset_delta(off_t *base_offset, size_t static const struct got_error * read_delta_data(uint8_t **delta_buf, size_t *delta_len, - size_t delta_data_offset, struct got_pack *pack) + size_t *delta_compressed_len, size_t delta_data_offset, + struct got_pack *pack) { const struct got_error *err = NULL; + size_t consumed = 0; if (pack->map) { if (delta_data_offset >= pack->filesize) return got_error(GOT_ERR_PACK_OFFSET); err = got_inflate_to_mem_mmap(delta_buf, delta_len, - NULL, NULL, pack->map, delta_data_offset, + &consumed, NULL, pack->map, delta_data_offset, pack->filesize - delta_data_offset); + if (err) + return err; } else { if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) return got_error_from_errno("lseek"); - err = got_inflate_to_mem_fd(delta_buf, delta_len, NULL, - NULL, 0, pack->fd); + err = got_inflate_to_mem_fd(delta_buf, delta_len, + &consumed, NULL, 0, pack->fd); + if (err) + return err; } - return err; + + if (delta_compressed_len) + *delta_compressed_len = consumed; + + return NULL; } static const struct got_error * @@ -1200,7 +1210,7 @@ got_pack_get_delta_chain_max_size(uint64_t *max_size, if (delta_buf == NULL) { cached = 0; err = read_delta_data(&delta_buf, &delta_len, - delta->data_offset, pack); + NULL, delta->data_offset, pack); if (err) return err; err = got_delta_cache_add(pack->delta_cache, @@ -1336,7 +1346,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, pack->delta_cache, delta->data_offset); if (delta_buf == NULL) { cached = 0; - err = read_delta_data(&delta_buf, &delta_len, + err = read_delta_data(&delta_buf, &delta_len, NULL, delta->data_offset, pack); if (err) goto done; @@ -1482,7 +1492,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz pack->delta_cache, delta->data_offset); if (delta_buf == NULL) { cached = 0; - err = read_delta_data(&delta_buf, &delta_len, + err = read_delta_data(&delta_buf, &delta_len, NULL, delta->data_offset, pack); if (err) goto done; @@ -1601,20 +1611,76 @@ got_packfile_extract_object_to_mem(uint8_t **buf, size return err; } +static const struct got_error * +read_raw_delta_data(uint8_t **delta_buf, size_t *delta_len, + size_t *delta_len_compressed, uint64_t *base_size, uint64_t *result_size, + off_t delta_data_offset, struct got_pack *pack, struct got_packidx *packidx) +{ + const struct got_error *err = NULL; + + /* Validate decompression and obtain the decompressed size. */ + err = read_delta_data(delta_buf, delta_len, delta_len_compressed, + delta_data_offset, pack); + if (err) + return err; + + /* Read delta base/result sizes from head of delta stream. */ + err = got_delta_get_sizes(base_size, result_size, + *delta_buf, *delta_len); + if (err) + goto done; + + /* Discard decompressed delta and read it again in compressed form. */ + free(*delta_buf); + *delta_buf = malloc(*delta_len_compressed); + if (*delta_buf == NULL) + return got_error_from_errno("malloc"); + if (pack->map) { + if (delta_data_offset >= pack->filesize) + return got_error(GOT_ERR_PACK_OFFSET); + memcpy(*delta_buf, pack->map + delta_data_offset, + *delta_len_compressed); + } else { + ssize_t n; + if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) + return got_error_from_errno("lseek"); + n = read(pack->fd, *delta_buf, *delta_len_compressed); + if (n < 0) { + err = got_error_from_errno("read"); + goto done; + } else if (n != *delta_len_compressed) { + err = got_error(GOT_ERR_IO); + goto done; + } + } +done: + if (err) { + free(*delta_buf); + *delta_buf = NULL; + *delta_len = 0; + *delta_len_compressed = 0; + *base_size = 0; + *result_size = 0; + } + return err; +} + const struct got_error * got_packfile_extract_raw_delta(uint8_t **delta_buf, size_t *delta_size, - off_t *delta_offset, off_t *base_offset, struct got_object_id *base_id, - uint64_t *base_size, uint64_t *result_size, struct got_pack *pack, - struct got_packidx *packidx, int idx) + size_t *delta_compressed_size, off_t *delta_offset, off_t *base_offset, + struct got_object_id *base_id, uint64_t *base_size, uint64_t *result_size, + struct got_pack *pack, struct got_packidx *packidx, int idx) { const struct got_error *err = NULL; off_t offset; uint8_t type; uint64_t size; size_t tslen, delta_hdrlen; + off_t delta_data_offset; *delta_buf = NULL; *delta_size = 0; + *delta_compressed_size = 0; *delta_offset = 0; *base_offset = 0; *base_size = 0; @@ -1659,8 +1725,9 @@ got_packfile_extract_raw_delta(uint8_t **delta_buf, si offset + delta_hdrlen < delta_hdrlen) return got_error(GOT_ERR_BAD_DELTA); - err = read_delta_data(delta_buf, delta_size, - offset + tslen + delta_hdrlen, pack); + delta_data_offset = offset + tslen + delta_hdrlen; + err = read_raw_delta_data(delta_buf, delta_size, delta_compressed_size, + base_size, result_size, delta_data_offset, pack, packidx); if (err) return err; @@ -1669,15 +1736,17 @@ got_packfile_extract_raw_delta(uint8_t **delta_buf, si goto done; } - err = got_delta_get_sizes(base_size, result_size, *delta_buf, size); - if (err) - goto done; - *delta_offset = offset; done: if (err) { free(*delta_buf); *delta_buf = NULL; + *delta_size = 0; + *delta_compressed_size = 0; + *delta_offset = 0; + *base_offset = 0; + *base_size = 0; + *result_size = 0; } return err; } blob - 14aa4357200fc7ebcefeb1306ac93ca9c8d7fb54 blob + f68616a611f695cceb96cb08ebd34242cb4a0f94 --- lib/pack_create.c +++ lib/pack_create.c @@ -52,6 +52,7 @@ #include "got_lib_privsep.h" #include "got_lib_repository.h" #include "got_lib_ratelimit.h" +#include "got_lib_inflate.h" #ifndef MIN #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) @@ -75,9 +76,10 @@ struct got_pack_meta { /* The best delta we picked */ struct got_pack_meta *head; struct got_pack_meta *prev; - unsigned char *delta_buf; /* if not encoded in delta cache file */ - off_t delta_offset; /* offset in delta cache file */ + unsigned char *delta_buf; /* if encoded in memory (compressed) */ + off_t delta_offset; /* offset in delta cache file (compressed) */ off_t delta_len; /* encoded delta length */ + off_t delta_compressed_len; /* encoded+compressed delta length */ int nchain; int have_reused_delta; @@ -207,13 +209,15 @@ encode_delta_in_mem(struct got_pack_meta *m, struct go const struct got_error *err; unsigned char buf[16], *bp; int i, j; - size_t len = 0; + size_t len = 0, compressed_len; + off_t bufsize = delta_size; off_t n; struct got_delta_instruction *d; + uint8_t *delta_buf; - m->delta_buf = malloc(delta_size); - if (m->delta_buf == NULL) - return got_error_from_errno("calloc"); + delta_buf = malloc(bufsize); + if (delta_buf == NULL) + return got_error_from_errno("malloc"); /* base object size */ buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK; @@ -223,9 +227,9 @@ encode_delta_in_mem(struct got_pack_meta *m, struct go buf[i] = n & GOT_DELTA_SIZE_VAL_MASK; n >>= GOT_DELTA_SIZE_SHIFT; } - err = append(&m->delta_buf, &len, &delta_size, buf, i); + err = append(&delta_buf, &len, &bufsize, buf, i); if (err) - return err; + goto done; /* target object size */ buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK; @@ -235,9 +239,9 @@ encode_delta_in_mem(struct got_pack_meta *m, struct go buf[i] = n & GOT_DELTA_SIZE_VAL_MASK; n >>= GOT_DELTA_SIZE_SHIFT; } - err = append(&m->delta_buf, &len, &delta_size, buf, i); + err = append(&delta_buf, &len, &bufsize, buf, i); if (err) - return err; + goto done; for (j = 0; j < ndeltas; j++) { d = &deltas[j]; @@ -263,51 +267,63 @@ encode_delta_in_mem(struct got_pack_meta *m, struct go n >>= 8; } } - err = append(&m->delta_buf, &len, &delta_size, + err = append(&delta_buf, &len, &bufsize, buf, bp - buf); if (err) - return err; + goto done; } else if (o->f == NULL) { n = 0; while (n != d->len) { buf[0] = (d->len - n < 127) ? d->len - n : 127; - err = append(&m->delta_buf, &len, &delta_size, + err = append(&delta_buf, &len, &bufsize, buf, 1); if (err) - return err; - err = append(&m->delta_buf, &len, &delta_size, + goto done; + err = append(&delta_buf, &len, &bufsize, o->data + o->hdrlen + d->offset + n, buf[0]); if (err) - return err; + goto done; n += buf[0]; } } else { char content[128]; size_t r; - if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) - return got_error_from_errno("fseeko"); + if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) { + err = got_error_from_errno("fseeko"); + goto done; + } n = 0; while (n != d->len) { buf[0] = (d->len - n < 127) ? d->len - n : 127; - err = append(&m->delta_buf, &len, &delta_size, + err = append(&delta_buf, &len, &bufsize, buf, 1); if (err) - return err; + goto done; r = fread(content, 1, buf[0], o->f); - if (r != buf[0]) - return got_ferror(o->f, GOT_ERR_IO); - err = append(&m->delta_buf, &len, &delta_size, + if (r != buf[0]) { + err = got_ferror(o->f, GOT_ERR_IO); + goto done; + } + err = append(&delta_buf, &len, &bufsize, content, buf[0]); if (err) - return err; + goto done; n += buf[0]; } } } + err = got_deflate_to_mem_mmap(&m->delta_buf, &compressed_len, + NULL, NULL, delta_buf, 0, len); + if (err) + goto done; + m->delta_len = len; - return NULL; + m->delta_compressed_len = compressed_len; +done: + free(delta_buf); + return err; } static const struct got_error * @@ -315,12 +331,18 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o struct got_delta_instruction *deltas, int ndeltas, off_t base_size, FILE *f) { + const struct got_error *err; unsigned char buf[16], *bp; int i, j; off_t n; - size_t w; + struct got_deflate_buf zb; struct got_delta_instruction *d; + off_t delta_len = 0, compressed_len = 0; + err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE); + if (err) + return err; + /* base object size */ buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK; n = base_size >> GOT_DELTA_SIZE_SHIFT; @@ -329,10 +351,13 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o buf[i] = n & GOT_DELTA_SIZE_VAL_MASK; n >>= GOT_DELTA_SIZE_SHIFT; } - w = fwrite(buf, 1, i, f); - if (w != i) - return got_ferror(f, GOT_ERR_IO); + err = got_deflate_append_to_file_mmap(&zb, &compressed_len, + buf, 0, i, f, NULL); + if (err) + goto done; + delta_len += i; + /* target object size */ buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK; n = o->size >> GOT_DELTA_SIZE_SHIFT; @@ -341,10 +366,13 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o buf[i] = n & GOT_DELTA_SIZE_VAL_MASK; n >>= GOT_DELTA_SIZE_SHIFT; } - w = fwrite(buf, 1, i, f); - if (w != i) - return got_ferror(f, GOT_ERR_IO); + err = got_deflate_append_to_file_mmap(&zb, &compressed_len, + buf, 0, i, f, NULL); + if (err) + goto done; + delta_len += i; + for (j = 0; j < ndeltas; j++) { d = &deltas[j]; if (d->copy) { @@ -359,7 +387,6 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o if (n == 0) break; } - n = d->len; if (n != GOT_DELTA_COPY_DEFAULT_LEN) { /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */ @@ -369,46 +396,75 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o n >>= 8; } } - w = fwrite(buf, 1, bp - buf, f); - if (w != bp - buf) - return got_ferror(f, GOT_ERR_IO); + err = got_deflate_append_to_file_mmap(&zb, + &compressed_len, buf, 0, bp - buf, f, NULL); + if (err) + goto done; + delta_len += (bp - buf); } else if (o->f == NULL) { n = 0; while (n != d->len) { buf[0] = (d->len - n < 127) ? d->len - n : 127; - w = fwrite(buf, 1, 1, f); - if (w != 1) - return got_ferror(f, GOT_ERR_IO); - w = fwrite(o->data + o->hdrlen + d->offset + n, - 1, buf[0], f); - if (w != buf[0]) - return got_ferror(f, GOT_ERR_IO); + err = got_deflate_append_to_file_mmap(&zb, + &compressed_len, buf, 0, 1, f, NULL); + if (err) + goto done; + delta_len++; + err = got_deflate_append_to_file_mmap(&zb, + &compressed_len, + o->data + o->hdrlen + d->offset + n, 0, + buf[0], f, NULL); + if (err) + goto done; + delta_len += buf[0]; n += buf[0]; } } else { char content[128]; size_t r; - if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) - return got_error_from_errno("fseeko"); + if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) { + err = got_error_from_errno("fseeko"); + goto done; + } n = 0; while (n != d->len) { buf[0] = (d->len - n < 127) ? d->len - n : 127; - w = fwrite(buf, 1, 1, f); - if (w != 1) - return got_ferror(f, GOT_ERR_IO); + err = got_deflate_append_to_file_mmap(&zb, + &compressed_len, buf, 0, 1, f, NULL); + if (err) + goto done; + delta_len++; r = fread(content, 1, buf[0], o->f); - if (r != buf[0]) - return got_ferror(o->f, GOT_ERR_IO); - w = fwrite(content, 1, buf[0], f); - if (w != buf[0]) - return got_ferror(f, GOT_ERR_IO); + if (r != buf[0]) { + err = got_ferror(o->f, GOT_ERR_IO); + goto done; + } + err = got_deflate_append_to_file_mmap(&zb, + &compressed_len, content, 0, buf[0], f, + NULL); + if (err) + goto done; + delta_len += buf[0]; n += buf[0]; } } } - m->delta_len = ftello(f) - m->delta_offset; - return NULL; + err = got_deflate_flush(&zb, f, NULL, &compressed_len); + if (err) + goto done; + + /* sanity check */ + if (compressed_len != ftello(f) - m->delta_offset) { + err = got_error(GOT_ERR_COMPRESSION); + goto done; + } + + m->delta_len = delta_len; + m->delta_compressed_len = compressed_len; +done: + got_deflate_end(&zb); + return err; } static const struct got_error * @@ -457,15 +513,16 @@ reuse_delta(int idx, struct got_pack_meta *m, struct g const struct got_error *err = NULL; struct got_pack_meta *base = NULL; struct got_object_id *base_obj_id = NULL; - off_t delta_len = 0, delta_offset = 0, delta_cache_offset = 0; + off_t delta_len = 0, delta_compressed_len = 0; + off_t delta_offset = 0, delta_cache_offset = 0; uint64_t base_size, result_size; if (m->have_reused_delta) return NULL; err = got_object_read_raw_delta(&base_size, &result_size, &delta_len, - &delta_offset, &delta_cache_offset, &base_obj_id, delta_cache_fd, - packidx, idx, &m->id, repo); + &delta_compressed_len, &delta_offset, &delta_cache_offset, + &base_obj_id, delta_cache_fd, packidx, idx, &m->id, repo); if (err) return err; @@ -477,6 +534,7 @@ reuse_delta(int idx, struct got_pack_meta *m, struct g goto done; m->delta_len = delta_len; + m->delta_compressed_len = delta_compressed_len; m->delta_offset = delta_cache_offset; m->prev = base; m->size = result_size; @@ -787,15 +845,6 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in best_ndeltas, best_size, m->prev->size); } else { m->delta_offset = ftello(delta_cache); - /* - * TODO: - * Storing compressed delta data in the delta - * cache file would probably be more efficient - * than writing uncompressed delta data here - * and compressing it while writing the pack - * file. This would also allow for reusing - * deltas in their compressed form. - */ err = encode_delta(m, raw, best_deltas, best_ndeltas, m->prev->size, delta_cache); } @@ -1485,7 +1534,7 @@ done: } const struct got_error * -hwrite(FILE *f, void *buf, int len, SHA1_CTX *ctx) +hwrite(FILE *f, void *buf, off_t len, SHA1_CTX *ctx) { size_t n; @@ -1496,6 +1545,28 @@ hwrite(FILE *f, void *buf, int len, SHA1_CTX *ctx) return NULL; } +const struct got_error * +hcopy(FILE *fsrc, FILE *fdst, off_t len, SHA1_CTX *ctx) +{ + unsigned char buf[65536]; + off_t remain = len; + size_t n; + + while (remain > 0) { + size_t copylen = MIN(sizeof(buf), remain); + n = fread(buf, 1, copylen, fsrc); + if (n != copylen) + return got_ferror(fsrc, GOT_ERR_IO); + SHA1Update(ctx, buf, copylen); + n = fwrite(buf, 1, copylen, fdst); + if (n != copylen) + return got_ferror(fdst, GOT_ERR_IO); + remain -= copylen; + } + + return NULL; +} + static void putbe32(char *b, uint32_t n) { @@ -1667,11 +1738,11 @@ write_packed_object(off_t *packfile_size, FILE *packfi err = deltahdr(packfile_size, ctx, packfile, m); if (err) goto done; - err = got_deflate_to_file_mmap(&outlen, - m->delta_buf, 0, m->delta_len, packfile, &csum); + err = hwrite(packfile, m->delta_buf, + m->delta_compressed_len, ctx); if (err) goto done; - *packfile_size += outlen; + *packfile_size += m->delta_compressed_len; free(m->delta_buf); m->delta_buf = NULL; } else { @@ -1683,11 +1754,11 @@ write_packed_object(off_t *packfile_size, FILE *packfi err = deltahdr(packfile_size, ctx, packfile, m); if (err) goto done; - err = got_deflate_to_file(&outlen, delta_cache, - m->delta_len, packfile, &csum); + err = hcopy(delta_cache, packfile, + m->delta_compressed_len, ctx); if (err) goto done; - *packfile_size += outlen; + *packfile_size += m->delta_compressed_len; } done: if (raw) @@ -1911,12 +1982,12 @@ got_pack_create(uint8_t *packsha1, FILE *packfile, progress_cb, progress_arg, &rl, cancel_cb, cancel_arg); if (err) goto done; - if (fseeko(delta_cache, 0L, SEEK_SET) == -1) { - err = got_error_from_errno("fseeko"); - goto done; - } } + if (fflush(delta_cache) == EOF) { + err = got_error_from_errno("fflush"); + goto done; + } err = genpack(packsha1, packfile, delta_cache, deltify.meta, deltify.nmeta, reuse.meta, reuse.nmeta, ncolored, nfound, ntrees, nours, repo, progress_cb, progress_arg, &rl, blob - de7f8e9eb12711a08545a3396b393be498822f4c blob + 037c96037a94e944e0990944101d87c2ae0c88bc --- lib/privsep.c +++ lib/privsep.c @@ -2753,8 +2753,8 @@ got_privsep_send_raw_delta_outfd(struct imsgbuf *ibuf, const struct got_error * got_privsep_send_raw_delta(struct imsgbuf *ibuf, uint64_t base_size, - uint64_t result_size, off_t delta_size, off_t delta_offset, - off_t delta_out_offset, struct got_object_id *base_id) + uint64_t result_size, off_t delta_size, off_t delta_compressed_size, + off_t delta_offset, off_t delta_out_offset, struct got_object_id *base_id) { struct got_imsg_raw_delta idelta; int ret; @@ -2762,6 +2762,7 @@ got_privsep_send_raw_delta(struct imsgbuf *ibuf, uint6 idelta.base_size = base_size; idelta.result_size = result_size; idelta.delta_size = delta_size; + idelta.delta_compressed_size = delta_compressed_size; idelta.delta_offset = delta_offset; idelta.delta_out_offset = delta_out_offset; memcpy(idelta.base_id, base_id->sha1, SHA1_DIGEST_LENGTH); @@ -2776,8 +2777,8 @@ got_privsep_send_raw_delta(struct imsgbuf *ibuf, uint6 const struct got_error * got_privsep_recv_raw_delta(uint64_t *base_size, uint64_t *result_size, - off_t *delta_size, off_t *delta_offset, off_t *delta_out_offset, - struct got_object_id **base_id, struct imsgbuf *ibuf) + off_t *delta_size, off_t *delta_compressed_size, off_t *delta_offset, + off_t *delta_out_offset, struct got_object_id **base_id, struct imsgbuf *ibuf) { const struct got_error *err = NULL; struct imsg imsg; @@ -2787,6 +2788,7 @@ got_privsep_recv_raw_delta(uint64_t *base_size, uint64 *base_size = 0; *result_size = 0; *delta_size = 0; + *delta_compressed_size = 0; *delta_offset = 0; *delta_out_offset = 0; *base_id = NULL; @@ -2807,6 +2809,7 @@ got_privsep_recv_raw_delta(uint64_t *base_size, uint64 *base_size = delta->base_size; *result_size = delta->result_size; *delta_size = delta->delta_size; + *delta_compressed_size = delta->delta_compressed_size; *delta_offset = delta->delta_offset; *delta_out_offset = delta->delta_out_offset; *base_id = calloc(1, sizeof(**base_id)); blob - b014128a336645e26ebddf6b06cecbdc652fc1b3 blob + 0b2b5ee521c3a402299aefd172c600c2e4f24c43 --- libexec/got-read-pack/got-read-pack.c +++ libexec/got-read-pack/got-read-pack.c @@ -869,7 +869,7 @@ raw_delta_request(struct imsg *imsg, struct imsgbuf *i { const struct got_error *err = NULL; struct got_imsg_raw_delta_request req; - size_t datalen, delta_size; + size_t datalen, delta_size, delta_compressed_size; off_t delta_offset; uint8_t *delta_buf = NULL; struct got_object_id id, base_id; @@ -886,8 +886,8 @@ raw_delta_request(struct imsg *imsg, struct imsgbuf *i imsg->fd = -1; err = got_packfile_extract_raw_delta(&delta_buf, &delta_size, - &delta_offset, &base_offset, &base_id, &base_size, &result_size, - pack, packidx, req.idx); + &delta_compressed_size, &delta_offset, &base_offset, &base_id, + &base_size, &result_size, pack, packidx, req.idx); if (err) goto done; @@ -902,8 +902,8 @@ raw_delta_request(struct imsg *imsg, struct imsgbuf *i } delta_out_offset = ftello(delta_outfile); - w = fwrite(delta_buf, 1, delta_size, delta_outfile); - if (w != delta_size) { + w = fwrite(delta_buf, 1, delta_compressed_size, delta_outfile); + if (w != delta_compressed_size) { err = got_ferror(delta_outfile, GOT_ERR_IO); goto done; } @@ -913,7 +913,8 @@ raw_delta_request(struct imsg *imsg, struct imsgbuf *i } err = got_privsep_send_raw_delta(ibuf, base_size, result_size, - delta_size, delta_offset, delta_out_offset, &base_id); + delta_size, delta_compressed_size, delta_offset, delta_out_offset, + &base_id); done: free(delta_buf); return err;