Download raw body.
compress delta data from delta_cache directly into pack file
While creating pack files in genpack() we currently copy each delta from the large "delta cache" file into a separate temporary file, and then copy and compress this temporary file into the pack file. This patch removes the need for an intermediate temporary file by allowing got_deflate_to_file() to read just a part of its source file, instead of always compressing its entire source file. We can now pass delta data from the delta_cache file directly into this function. This change has some no-op ripple effects into other code which calls into deflate.c, such as the commit logic. This code now needs to calculate the size of the object file it creates, which is cheap. While here, also fix the type of the output length parameters of got_deflate_to_file() and got_deflate_to_file_mmap(): File sizes should be traded in off_t, not size_t (size_t is used for memory buffer sizes). Tests are passing, and git fsck reports no issues on commits or pack files created with this new code. ok? diff 6b447a6decf6cde0663a251e1d400761580d7568 c3418ddcdf460457c19c4d288a9a0029450d50c6 blob - 52c3d2577c7b0308e62c7ff466152e61a38f3c50 blob + edb6ba2b65cdfd7ffdfd37b841e66e0d617db5e3 --- lib/deflate.c +++ lib/deflate.c @@ -90,7 +90,8 @@ csum_output(struct got_deflate_checksum *csum, const u } const struct got_error * -got_deflate_read(struct got_deflate_buf *zb, FILE *f, size_t *outlenp) +got_deflate_read(struct got_deflate_buf *zb, FILE *f, off_t len, + size_t *outlenp, off_t *consumed) { size_t last_total_out = zb->z.total_out; z_stream *z = &zb->z; @@ -100,9 +101,13 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, z->avail_out = zb->outlen; *outlenp = 0; + *consumed = 0; do { + size_t last_total_in = z->total_in; if (z->avail_in == 0) { - size_t n = fread(zb->inbuf, 1, zb->inlen, f); + size_t n = 0; + if (len > 0) + n = fread(zb->inbuf, 1, MIN(zb->inlen, len), f); if (n == 0) { if (ferror(f)) return got_ferror(f, GOT_ERR_IO); @@ -114,6 +119,7 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, z->avail_in = n; } ret = deflate(z, Z_NO_FLUSH); + *consumed += z->total_in - last_total_in; } while (ret == Z_OK && z->avail_out > 0); if (ret == Z_OK) { @@ -178,11 +184,12 @@ got_deflate_end(struct got_deflate_buf *zb) } const struct got_error * -got_deflate_to_file(size_t *outlen, FILE *infile, FILE *outfile, - struct got_deflate_checksum *csum) +got_deflate_to_file(off_t *outlen, FILE *infile, off_t len, + FILE *outfile, struct got_deflate_checksum *csum) { const struct got_error *err; size_t avail; + off_t consumed; struct got_deflate_buf zb; err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE); @@ -192,9 +199,10 @@ got_deflate_to_file(size_t *outlen, FILE *infile, FILE *outlen = 0; do { - err = got_deflate_read(&zb, infile, &avail); + err = got_deflate_read(&zb, infile, len, &avail, &consumed); if (err) goto done; + len -= consumed; if (avail > 0) { size_t n; n = fwrite(zb.outbuf, avail, 1, outfile); @@ -214,7 +222,7 @@ done: } const struct got_error * -got_deflate_to_file_mmap(size_t *outlen, uint8_t *map, size_t offset, +got_deflate_to_file_mmap(off_t *outlen, uint8_t *map, size_t offset, size_t len, FILE *outfile, struct got_deflate_checksum *csum) { const struct got_error *err; blob - c0b4fe271d48124cc97b135feff4b79f951c6720 blob + 1c429af85a27229451e05798e9511d0a8f474968 --- lib/got_lib_deflate.h +++ lib/got_lib_deflate.h @@ -38,9 +38,9 @@ struct got_deflate_buf { const struct got_error *got_deflate_init(struct got_deflate_buf *, uint8_t *, size_t); const struct got_error *got_deflate_read(struct got_deflate_buf *, FILE *, - size_t *); + off_t, size_t *, off_t *); void got_deflate_end(struct got_deflate_buf *); -const struct got_error *got_deflate_to_file(size_t *, FILE *, FILE *, +const struct got_error *got_deflate_to_file(off_t *, FILE *, off_t, FILE *, struct got_deflate_checksum *); -const struct got_error *got_deflate_to_file_mmap(size_t *, uint8_t *, +const struct got_error *got_deflate_to_file_mmap(off_t *, uint8_t *, size_t, size_t, FILE *, struct got_deflate_checksum *); blob - 67676602467844816045891f0532476d0e14c799 blob + 1d6840ec3d3bc9318e74417e89f1c1056a322dd0 --- lib/got_lib_object_create.h +++ lib/got_lib_object_create.h @@ -15,7 +15,7 @@ */ const struct got_error *got_object_blob_file_create(struct got_object_id **, - FILE **, const char *); + FILE **, off_t *, const char *); const struct got_error *got_object_blob_create(struct got_object_id **, const char *, struct got_repository *); blob - e5e4c44c8a0327211b0094775682f2bed8ea42a8 blob + 84740e1bdc8bd31fb2394900a4795c7b36dc6234 --- lib/object_create.c +++ lib/object_create.c @@ -49,13 +49,13 @@ static const struct got_error * create_object_file(struct got_object_id *id, FILE *content, - struct got_repository *repo) + off_t content_len, struct got_repository *repo) { const struct got_error *err = NULL, *unlock_err = NULL; char *objpath = NULL, *tmppath = NULL; FILE *tmpfile = NULL; struct got_lockfile *lf = NULL; - size_t tmplen = 0; + off_t tmplen = 0; err = got_object_get_path(&objpath, id, repo); if (err) @@ -83,7 +83,7 @@ create_object_file(struct got_object_id *id, FILE *con goto done; } - err = got_deflate_to_file(&tmplen, content, tmpfile, NULL); + err = got_deflate_to_file(&tmplen, content, content_len, tmpfile, NULL); if (err) goto done; @@ -113,7 +113,7 @@ done: const struct got_error * got_object_blob_file_create(struct got_object_id **id, FILE **blobfile, - const char *ondisk_path) + off_t *blobsize, const char *ondisk_path) { const struct got_error *err = NULL; char *header = NULL; @@ -124,6 +124,7 @@ got_object_blob_file_create(struct got_object_id **id, *id = NULL; *blobfile = NULL; + *blobsize = 0; SHA1Init(&sha1_ctx); @@ -160,6 +161,7 @@ got_object_blob_file_create(struct got_object_id **id, err = got_ferror(*blobfile, GOT_ERR_IO); goto done; } + *blobsize += headerlen; for (;;) { char buf[PATH_MAX * 8]; ssize_t inlen; @@ -185,6 +187,7 @@ got_object_blob_file_create(struct got_object_id **id, err = got_ferror(*blobfile, GOT_ERR_IO); goto done; } + *blobsize += n; if (S_ISLNK(sb.st_mode)) break; } @@ -222,12 +225,14 @@ got_object_blob_create(struct got_object_id **id, cons { const struct got_error *err = NULL; FILE *blobfile = NULL; + off_t blobsize; - err = got_object_blob_file_create(id, &blobfile, ondisk_path); + err = got_object_blob_file_create(id, &blobfile, &blobsize, + ondisk_path); if (err) return err; - err = create_object_file(*id, blobfile, repo); + err = create_object_file(*id, blobfile, blobsize, repo); if (fclose(blobfile) == EOF && err == NULL) err = got_error_from_errno("fclose"); if (err) { @@ -299,6 +304,7 @@ got_object_tree_create(struct got_object_id **id, char *header = NULL; size_t headerlen, len = 0, n; FILE *treefile = NULL; + off_t treesize = 0; struct got_pathlist_entry *pe; struct got_tree_entry **sorted_entries; struct got_tree_entry *te; @@ -345,6 +351,7 @@ got_object_tree_create(struct got_object_id **id, err = got_ferror(treefile, GOT_ERR_IO); goto done; } + treesize += headerlen; for (i = 0; i < nentries; i++) { te = sorted_entries[i]; @@ -358,6 +365,7 @@ got_object_tree_create(struct got_object_id **id, goto done; } SHA1Update(&sha1_ctx, modebuf, len); + treesize += len; len = strlen(te->name) + 1; /* must include NUL */ n = fwrite(te->name, 1, len, treefile); @@ -366,6 +374,7 @@ got_object_tree_create(struct got_object_id **id, goto done; } SHA1Update(&sha1_ctx, te->name, len); + treesize += len; len = SHA1_DIGEST_LENGTH; n = fwrite(te->id.sha1, 1, len, treefile); @@ -374,6 +383,7 @@ got_object_tree_create(struct got_object_id **id, goto done; } SHA1Update(&sha1_ctx, te->id.sha1, len); + treesize += len; } *id = malloc(sizeof(**id)); @@ -389,7 +399,7 @@ got_object_tree_create(struct got_object_id **id, } rewind(treefile); - err = create_object_file(*id, treefile, repo); + err = create_object_file(*id, treefile, treesize, repo); done: free(header); free(sorted_entries); @@ -416,6 +426,7 @@ got_object_commit_create(struct got_object_id **id, char *id_str = NULL; size_t headerlen, len = 0, n; FILE *commitfile = NULL; + off_t commitsize = 0; struct got_object_qid *qid; char *msg0, *msg; @@ -471,6 +482,7 @@ got_object_commit_create(struct got_object_id **id, err = got_ferror(commitfile, GOT_ERR_IO); goto done; } + commitsize += headerlen; err = got_object_id_str(&id_str, tree_id); if (err) @@ -567,7 +579,7 @@ got_object_commit_create(struct got_object_id **id, } rewind(commitfile); - err = create_object_file(*id, commitfile, repo); + err = create_object_file(*id, commitfile, commitsize, repo); done: free(id_str); free(msg0); @@ -596,6 +608,7 @@ got_object_tag_create(struct got_object_id **id, char *id_str = NULL, *obj_str = NULL, *type_str = NULL; size_t headerlen, len = 0, n; FILE *tagfile = NULL; + off_t tagsize = 0; char *msg0 = NULL, *msg; const char *obj_type_str; int obj_type; @@ -681,6 +694,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += headerlen; len = strlen(obj_str); SHA1Update(&sha1_ctx, obj_str, len); n = fwrite(obj_str, 1, len, tagfile); @@ -688,6 +702,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += len; len = strlen(type_str); SHA1Update(&sha1_ctx, type_str, len); n = fwrite(type_str, 1, len, tagfile); @@ -695,6 +710,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += len; len = strlen(tag_str); SHA1Update(&sha1_ctx, tag_str, len); @@ -703,6 +719,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += len; len = strlen(tagger_str); SHA1Update(&sha1_ctx, tagger_str, len); @@ -711,6 +728,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += len; SHA1Update(&sha1_ctx, "\n", 1); n = fwrite("\n", 1, 1, tagfile); @@ -718,6 +736,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += n; len = strlen(msg); SHA1Update(&sha1_ctx, msg, len); @@ -726,6 +745,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += len; SHA1Update(&sha1_ctx, "\n", 1); n = fwrite("\n", 1, 1, tagfile); @@ -733,6 +753,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += n; *id = malloc(sizeof(**id)); if (*id == NULL) { @@ -747,7 +768,7 @@ got_object_tag_create(struct got_object_id **id, } rewind(tagfile); - err = create_object_file(*id, tagfile, repo); + err = create_object_file(*id, tagfile, tagsize, repo); done: free(msg0); free(header); blob - 8a3d8a8fd7f7eb29a2707c38e554ff6a37b63685 blob + f4c9f4cd1e5f2d7ec2bb640a2745e9ef11b8f29a --- lib/pack_create.c +++ lib/pack_create.c @@ -1385,11 +1385,10 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt SHA1_CTX ctx; struct got_pack_meta *m; struct got_raw_object *raw = NULL; - FILE *delta_file = NULL; char buf[32]; - size_t outlen, n; + size_t n; struct got_deflate_checksum csum; - off_t packfile_size = 0; + off_t outlen, packfile_size = 0; int outfd = -1; SHA1Init(&ctx); @@ -1440,7 +1439,7 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt goto done; } err = got_deflate_to_file(&outlen, raw->f, - packfile, &csum); + raw->size, packfile, &csum); if (err) goto done; } @@ -1460,57 +1459,17 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt free(m->delta_buf); m->delta_buf = NULL; } else { - off_t remain; - if (delta_file == NULL) { - delta_file = got_opentemp(); - if (delta_file == NULL) { - err = got_error_from_errno( - "got_opentemp"); - goto done; - } - } - if (ftruncate(fileno(delta_file), 0L) == -1) { - err = got_error_from_errno("ftruncate"); - goto done; - } - if (fseeko(delta_file, 0L, SEEK_SET) == -1) { - err = got_error_from_errno("fseeko"); - goto done; - } if (fseeko(delta_cache, m->delta_offset, SEEK_SET) == -1) { err = got_error_from_errno("fseeko"); goto done; } - remain = m->delta_len; - while (remain > 0) { - char delta_buf[8192]; - size_t r, w, n; - n = MIN(remain, sizeof(delta_buf)); - r = fread(delta_buf, 1, n, delta_cache); - if (r != n) { - err = got_ferror(delta_cache, - GOT_ERR_IO); - goto done; - } - w = fwrite(delta_buf, 1, n, delta_file); - if (w != n) { - err = got_ferror(delta_file, - GOT_ERR_IO); - goto done; - } - remain -= n; - } err = deltahdr(&packfile_size, &ctx, packfile, m, use_offset_deltas); if (err) goto done; - if (fseeko(delta_file, 0L, SEEK_SET) == -1) { - err = got_error_from_errno("fseeko"); - goto done; - } - err = got_deflate_to_file(&outlen, delta_file, - packfile, &csum); + err = got_deflate_to_file(&outlen, delta_cache, + m->delta_len, packfile, &csum); if (err) goto done; packfile_size += outlen; @@ -1529,8 +1488,6 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt goto done; } done: - if (delta_file && fclose(delta_file) == EOF && err == NULL) - err = got_error_from_errno("fclose"); if (raw) got_object_raw_close(raw); if (outfd != -1 && close(outfd) == -1 && err == NULL) blob - dba36bee48eede3aca73746d5501b4565bbddf64 blob + 0f78ee740bd078fd66df140aa19f2527ef430e0f --- lib/worktree.c +++ lib/worktree.c @@ -2884,11 +2884,13 @@ merge_file_cb(void *arg, struct got_blob_object *blob1 case GOT_STATUS_ADD: { struct got_object_id *id; FILE *blob1_f; + off_t blob1_size; /* * Delete the added file only if its content already * exists in the repository. */ - err = got_object_blob_file_create(&id, &blob1_f, path1); + err = got_object_blob_file_create(&id, &blob1_f, + &blob1_size, path1); if (err) goto done; if (got_object_id_cmp(id, id1) == 0) {
compress delta data from delta_cache directly into pack file