Download raw body.
compress delta data from delta_cache directly into pack file
compress delta data from delta_cache directly into pack file
On Tue, Jan 18, 2022 at 12:27:06PM +0100, Stefan Sperling wrote: > Tests are passing, and git fsck reports no issues on commits or pack > files created with this new code. Nevertheless, the previous diff was buggy. Committing large files resulted in bad objects due to a bug in deflate_read(). And the size computed for commit objects was entirely wrong, but unfortunately the bug in deflate_read() compensated for this, preventing the test suite from catching these problems. This adds a new test which triggers the issue I saw while running with the previous version of this patch. ok? diff afd7e8422020153ff9e686cd81775c1f1e364b94 b1f2dff2aa4394b54e7cef2f36c8df8c8fc86549 blob - 52c3d2577c7b0308e62c7ff466152e61a38f3c50 blob + 6a151f9c54a8c0f4fc4fea9a4c91ecc7dec71aed --- lib/deflate.c +++ lib/deflate.c @@ -90,7 +90,8 @@ csum_output(struct got_deflate_checksum *csum, const u } const struct got_error * -got_deflate_read(struct got_deflate_buf *zb, FILE *f, size_t *outlenp) +got_deflate_read(struct got_deflate_buf *zb, FILE *f, off_t len, + size_t *outlenp, off_t *consumed) { size_t last_total_out = zb->z.total_out; z_stream *z = &zb->z; @@ -100,9 +101,15 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, z->avail_out = zb->outlen; *outlenp = 0; + *consumed = 0; do { + size_t last_total_in = z->total_in; if (z->avail_in == 0) { - size_t n = fread(zb->inbuf, 1, zb->inlen, f); + size_t n = 0; + if (*consumed < len) { + n = fread(zb->inbuf, 1, + MIN(zb->inlen, len - *consumed), f); + } if (n == 0) { if (ferror(f)) return got_ferror(f, GOT_ERR_IO); @@ -114,6 +121,7 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, z->avail_in = n; } ret = deflate(z, Z_NO_FLUSH); + *consumed += z->total_in - last_total_in; } while (ret == Z_OK && z->avail_out > 0); if (ret == Z_OK) { @@ -178,11 +186,12 @@ got_deflate_end(struct got_deflate_buf *zb) } const struct got_error * -got_deflate_to_file(size_t *outlen, FILE *infile, FILE *outfile, - struct got_deflate_checksum *csum) +got_deflate_to_file(off_t *outlen, FILE *infile, off_t len, + FILE *outfile, struct got_deflate_checksum *csum) { const struct got_error *err; size_t avail; + off_t consumed; struct got_deflate_buf zb; err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE); @@ -192,9 +201,10 @@ got_deflate_to_file(size_t *outlen, FILE *infile, FILE *outlen = 0; do { - err = got_deflate_read(&zb, infile, &avail); + err = got_deflate_read(&zb, infile, len, &avail, &consumed); if (err) goto done; + len -= consumed; if (avail > 0) { size_t n; n = fwrite(zb.outbuf, avail, 1, outfile); @@ -214,7 +224,7 @@ done: } const struct got_error * -got_deflate_to_file_mmap(size_t *outlen, uint8_t *map, size_t offset, +got_deflate_to_file_mmap(off_t *outlen, uint8_t *map, size_t offset, size_t len, FILE *outfile, struct got_deflate_checksum *csum) { const struct got_error *err; blob - c0b4fe271d48124cc97b135feff4b79f951c6720 blob + 1c429af85a27229451e05798e9511d0a8f474968 --- lib/got_lib_deflate.h +++ lib/got_lib_deflate.h @@ -38,9 +38,9 @@ struct got_deflate_buf { const struct got_error *got_deflate_init(struct got_deflate_buf *, uint8_t *, size_t); const struct got_error *got_deflate_read(struct got_deflate_buf *, FILE *, - size_t *); + off_t, size_t *, off_t *); void got_deflate_end(struct got_deflate_buf *); -const struct got_error *got_deflate_to_file(size_t *, FILE *, FILE *, +const struct got_error *got_deflate_to_file(off_t *, FILE *, off_t, FILE *, struct got_deflate_checksum *); -const struct got_error *got_deflate_to_file_mmap(size_t *, uint8_t *, +const struct got_error *got_deflate_to_file_mmap(off_t *, uint8_t *, size_t, size_t, FILE *, struct got_deflate_checksum *); blob - 67676602467844816045891f0532476d0e14c799 blob + 1d6840ec3d3bc9318e74417e89f1c1056a322dd0 --- lib/got_lib_object_create.h +++ lib/got_lib_object_create.h @@ -15,7 +15,7 @@ */ const struct got_error *got_object_blob_file_create(struct got_object_id **, - FILE **, const char *); + FILE **, off_t *, const char *); const struct got_error *got_object_blob_create(struct got_object_id **, const char *, struct got_repository *); blob - e5e4c44c8a0327211b0094775682f2bed8ea42a8 blob + 3102c04085d4e8c0157a4d3fd9c725a4b76f8c88 --- lib/object_create.c +++ lib/object_create.c @@ -49,13 +49,13 @@ static const struct got_error * create_object_file(struct got_object_id *id, FILE *content, - struct got_repository *repo) + off_t content_len, struct got_repository *repo) { const struct got_error *err = NULL, *unlock_err = NULL; char *objpath = NULL, *tmppath = NULL; FILE *tmpfile = NULL; struct got_lockfile *lf = NULL; - size_t tmplen = 0; + off_t tmplen = 0; err = got_object_get_path(&objpath, id, repo); if (err) @@ -83,7 +83,7 @@ create_object_file(struct got_object_id *id, FILE *con goto done; } - err = got_deflate_to_file(&tmplen, content, tmpfile, NULL); + err = got_deflate_to_file(&tmplen, content, content_len, tmpfile, NULL); if (err) goto done; @@ -113,7 +113,7 @@ done: const struct got_error * got_object_blob_file_create(struct got_object_id **id, FILE **blobfile, - const char *ondisk_path) + off_t *blobsize, const char *ondisk_path) { const struct got_error *err = NULL; char *header = NULL; @@ -124,6 +124,7 @@ got_object_blob_file_create(struct got_object_id **id, *id = NULL; *blobfile = NULL; + *blobsize = 0; SHA1Init(&sha1_ctx); @@ -160,6 +161,7 @@ got_object_blob_file_create(struct got_object_id **id, err = got_ferror(*blobfile, GOT_ERR_IO); goto done; } + *blobsize += headerlen; for (;;) { char buf[PATH_MAX * 8]; ssize_t inlen; @@ -185,6 +187,7 @@ got_object_blob_file_create(struct got_object_id **id, err = got_ferror(*blobfile, GOT_ERR_IO); goto done; } + *blobsize += n; if (S_ISLNK(sb.st_mode)) break; } @@ -222,12 +225,14 @@ got_object_blob_create(struct got_object_id **id, cons { const struct got_error *err = NULL; FILE *blobfile = NULL; + off_t blobsize; - err = got_object_blob_file_create(id, &blobfile, ondisk_path); + err = got_object_blob_file_create(id, &blobfile, &blobsize, + ondisk_path); if (err) return err; - err = create_object_file(*id, blobfile, repo); + err = create_object_file(*id, blobfile, blobsize, repo); if (fclose(blobfile) == EOF && err == NULL) err = got_error_from_errno("fclose"); if (err) { @@ -299,6 +304,7 @@ got_object_tree_create(struct got_object_id **id, char *header = NULL; size_t headerlen, len = 0, n; FILE *treefile = NULL; + off_t treesize = 0; struct got_pathlist_entry *pe; struct got_tree_entry **sorted_entries; struct got_tree_entry *te; @@ -345,6 +351,7 @@ got_object_tree_create(struct got_object_id **id, err = got_ferror(treefile, GOT_ERR_IO); goto done; } + treesize += headerlen; for (i = 0; i < nentries; i++) { te = sorted_entries[i]; @@ -358,6 +365,7 @@ got_object_tree_create(struct got_object_id **id, goto done; } SHA1Update(&sha1_ctx, modebuf, len); + treesize += n; len = strlen(te->name) + 1; /* must include NUL */ n = fwrite(te->name, 1, len, treefile); @@ -366,6 +374,7 @@ got_object_tree_create(struct got_object_id **id, goto done; } SHA1Update(&sha1_ctx, te->name, len); + treesize += n; len = SHA1_DIGEST_LENGTH; n = fwrite(te->id.sha1, 1, len, treefile); @@ -374,6 +383,7 @@ got_object_tree_create(struct got_object_id **id, goto done; } SHA1Update(&sha1_ctx, te->id.sha1, len); + treesize += n; } *id = malloc(sizeof(**id)); @@ -389,7 +399,7 @@ got_object_tree_create(struct got_object_id **id, } rewind(treefile); - err = create_object_file(*id, treefile, repo); + err = create_object_file(*id, treefile, treesize, repo); done: free(header); free(sorted_entries); @@ -416,6 +426,7 @@ got_object_commit_create(struct got_object_id **id, char *id_str = NULL; size_t headerlen, len = 0, n; FILE *commitfile = NULL; + off_t commitsize = 0; struct got_object_qid *qid; char *msg0, *msg; @@ -471,6 +482,7 @@ got_object_commit_create(struct got_object_id **id, err = got_ferror(commitfile, GOT_ERR_IO); goto done; } + commitsize += headerlen; err = got_object_id_str(&id_str, tree_id); if (err) @@ -487,6 +499,7 @@ got_object_commit_create(struct got_object_id **id, err = got_ferror(commitfile, GOT_ERR_IO); goto done; } + commitsize += n; if (parent_ids) { free(id_str); @@ -510,6 +523,7 @@ got_object_commit_create(struct got_object_id **id, free(parent_str); goto done; } + commitsize += n; free(parent_str); free(id_str); id_str = NULL; @@ -523,6 +537,7 @@ got_object_commit_create(struct got_object_id **id, err = got_ferror(commitfile, GOT_ERR_IO); goto done; } + commitsize += n; len = strlen(committer_str); SHA1Update(&sha1_ctx, committer_str, len); @@ -531,6 +546,7 @@ got_object_commit_create(struct got_object_id **id, err = got_ferror(commitfile, GOT_ERR_IO); goto done; } + commitsize += n; SHA1Update(&sha1_ctx, "\n", 1); n = fwrite("\n", 1, 1, commitfile); @@ -538,6 +554,7 @@ got_object_commit_create(struct got_object_id **id, err = got_ferror(commitfile, GOT_ERR_IO); goto done; } + commitsize += n; len = strlen(msg); SHA1Update(&sha1_ctx, msg, len); @@ -546,6 +563,7 @@ got_object_commit_create(struct got_object_id **id, err = got_ferror(commitfile, GOT_ERR_IO); goto done; } + commitsize += n; SHA1Update(&sha1_ctx, "\n", 1); n = fwrite("\n", 1, 1, commitfile); @@ -553,6 +571,7 @@ got_object_commit_create(struct got_object_id **id, err = got_ferror(commitfile, GOT_ERR_IO); goto done; } + commitsize += n; *id = malloc(sizeof(**id)); if (*id == NULL) { @@ -567,7 +586,7 @@ got_object_commit_create(struct got_object_id **id, } rewind(commitfile); - err = create_object_file(*id, commitfile, repo); + err = create_object_file(*id, commitfile, commitsize, repo); done: free(id_str); free(msg0); @@ -596,6 +615,7 @@ got_object_tag_create(struct got_object_id **id, char *id_str = NULL, *obj_str = NULL, *type_str = NULL; size_t headerlen, len = 0, n; FILE *tagfile = NULL; + off_t tagsize = 0; char *msg0 = NULL, *msg; const char *obj_type_str; int obj_type; @@ -681,6 +701,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += headerlen; len = strlen(obj_str); SHA1Update(&sha1_ctx, obj_str, len); n = fwrite(obj_str, 1, len, tagfile); @@ -688,6 +709,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += n; len = strlen(type_str); SHA1Update(&sha1_ctx, type_str, len); n = fwrite(type_str, 1, len, tagfile); @@ -695,6 +717,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += n; len = strlen(tag_str); SHA1Update(&sha1_ctx, tag_str, len); @@ -703,6 +726,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += n; len = strlen(tagger_str); SHA1Update(&sha1_ctx, tagger_str, len); @@ -711,6 +735,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += n; SHA1Update(&sha1_ctx, "\n", 1); n = fwrite("\n", 1, 1, tagfile); @@ -718,6 +743,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += n; len = strlen(msg); SHA1Update(&sha1_ctx, msg, len); @@ -726,6 +752,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += n; SHA1Update(&sha1_ctx, "\n", 1); n = fwrite("\n", 1, 1, tagfile); @@ -733,6 +760,7 @@ got_object_tag_create(struct got_object_id **id, err = got_ferror(tagfile, GOT_ERR_IO); goto done; } + tagsize += n; *id = malloc(sizeof(**id)); if (*id == NULL) { @@ -747,7 +775,7 @@ got_object_tag_create(struct got_object_id **id, } rewind(tagfile); - err = create_object_file(*id, tagfile, repo); + err = create_object_file(*id, tagfile, tagsize, repo); done: free(msg0); free(header); blob - 8a3d8a8fd7f7eb29a2707c38e554ff6a37b63685 blob + f4c9f4cd1e5f2d7ec2bb640a2745e9ef11b8f29a --- lib/pack_create.c +++ lib/pack_create.c @@ -1385,11 +1385,10 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt SHA1_CTX ctx; struct got_pack_meta *m; struct got_raw_object *raw = NULL; - FILE *delta_file = NULL; char buf[32]; - size_t outlen, n; + size_t n; struct got_deflate_checksum csum; - off_t packfile_size = 0; + off_t outlen, packfile_size = 0; int outfd = -1; SHA1Init(&ctx); @@ -1440,7 +1439,7 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt goto done; } err = got_deflate_to_file(&outlen, raw->f, - packfile, &csum); + raw->size, packfile, &csum); if (err) goto done; } @@ -1460,57 +1459,17 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt free(m->delta_buf); m->delta_buf = NULL; } else { - off_t remain; - if (delta_file == NULL) { - delta_file = got_opentemp(); - if (delta_file == NULL) { - err = got_error_from_errno( - "got_opentemp"); - goto done; - } - } - if (ftruncate(fileno(delta_file), 0L) == -1) { - err = got_error_from_errno("ftruncate"); - goto done; - } - if (fseeko(delta_file, 0L, SEEK_SET) == -1) { - err = got_error_from_errno("fseeko"); - goto done; - } if (fseeko(delta_cache, m->delta_offset, SEEK_SET) == -1) { err = got_error_from_errno("fseeko"); goto done; } - remain = m->delta_len; - while (remain > 0) { - char delta_buf[8192]; - size_t r, w, n; - n = MIN(remain, sizeof(delta_buf)); - r = fread(delta_buf, 1, n, delta_cache); - if (r != n) { - err = got_ferror(delta_cache, - GOT_ERR_IO); - goto done; - } - w = fwrite(delta_buf, 1, n, delta_file); - if (w != n) { - err = got_ferror(delta_file, - GOT_ERR_IO); - goto done; - } - remain -= n; - } err = deltahdr(&packfile_size, &ctx, packfile, m, use_offset_deltas); if (err) goto done; - if (fseeko(delta_file, 0L, SEEK_SET) == -1) { - err = got_error_from_errno("fseeko"); - goto done; - } - err = got_deflate_to_file(&outlen, delta_file, - packfile, &csum); + err = got_deflate_to_file(&outlen, delta_cache, + m->delta_len, packfile, &csum); if (err) goto done; packfile_size += outlen; @@ -1529,8 +1488,6 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt goto done; } done: - if (delta_file && fclose(delta_file) == EOF && err == NULL) - err = got_error_from_errno("fclose"); if (raw) got_object_raw_close(raw); if (outfd != -1 && close(outfd) == -1 && err == NULL) blob - dba36bee48eede3aca73746d5501b4565bbddf64 blob + 0f78ee740bd078fd66df140aa19f2527ef430e0f --- lib/worktree.c +++ lib/worktree.c @@ -2884,11 +2884,13 @@ merge_file_cb(void *arg, struct got_blob_object *blob1 case GOT_STATUS_ADD: { struct got_object_id *id; FILE *blob1_f; + off_t blob1_size; /* * Delete the added file only if its content already * exists in the repository. */ - err = got_object_blob_file_create(&id, &blob1_f, path1); + err = got_object_blob_file_create(&id, &blob1_f, + &blob1_size, path1); if (err) goto done; if (got_object_id_cmp(id, id1) == 0) { blob - d2b9ef37e4422a8ab8e017e17df21a7c6f3be7e8 blob + 34d74d26fefb2a4461a2c489cade5dcd7ebfcd66 --- regress/cmdline/commit.sh +++ regress/cmdline/commit.sh @@ -1458,6 +1458,54 @@ EOF test_done "$testroot" "$ret" } +test_commit_large_file() { + local testroot=`test_init commit_large_file` + + got checkout $testroot/repo $testroot/wt > /dev/null + ret="$?" + if [ "$ret" != "0" ]; then + test_done "$testroot" "$ret" + return 1 + fi + + dd status=none if=/dev/zero of=$testroot/wt/new bs=1m count=64 + (cd $testroot/wt && got add new >/dev/null) + + (cd $testroot/wt && got commit -m 'test commit_large_file' \ + > $testroot/stdout) + + local head_rev=`git_show_head $testroot/repo` + echo "A new" > $testroot/stdout.expected + echo "Created commit $head_rev" >> $testroot/stdout.expected + + cmp -s $testroot/stdout.expected $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + new_id=`get_blob_id $testroot/repo "" new` + got cat -r $testroot/repo $new_id > $testroot/new + ret="$?" + if [ "$ret" != "0" ]; then + echo "commit failed unexpectedly" >&2 + test_done "$testroot" "1" + return 1 + fi + + cmp -s $testroot/new $testroot/wt/new + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/new $testroot/wt/new + fi + test_done "$testroot" "$ret" + + +} + + test_parseargs "$@" run_test test_commit_basic run_test test_commit_new_subdir @@ -1484,3 +1532,4 @@ run_test test_commit_with_unrelated_submodule run_test test_commit_symlink run_test test_commit_fix_bad_symlink run_test test_commit_prepared_logmsg +run_test test_commit_large_file
compress delta data from delta_cache directly into pack file
compress delta data from delta_cache directly into pack file