Download raw body.
compress delta data from delta_cache directly into pack file
compress delta data from delta_cache directly into pack file
compress delta data from delta_cache directly into pack file
On Wed, Jan 19, 2022 at 01:13:49PM +0100, Stefan Sperling wrote: > On Tue, Jan 18, 2022 at 12:27:06PM +0100, Stefan Sperling wrote: > > Tests are passing, and git fsck reports no issues on commits or pack > > files created with this new code. > > Nevertheless, the previous diff was buggy. > > Committing large files resulted in bad objects due to a bug in deflate_read(). > And the size computed for commit objects was entirely wrong, but unfortunately > the bug in deflate_read() compensated for this, preventing the test suite from > catching these problems. > > This adds a new test which triggers the issue I saw while running with the > previous version of this patch. > > ok? I have committed this earlier today. > diff afd7e8422020153ff9e686cd81775c1f1e364b94 b1f2dff2aa4394b54e7cef2f36c8df8c8fc86549 > blob - 52c3d2577c7b0308e62c7ff466152e61a38f3c50 > blob + 6a151f9c54a8c0f4fc4fea9a4c91ecc7dec71aed > --- lib/deflate.c > +++ lib/deflate.c > @@ -90,7 +90,8 @@ csum_output(struct got_deflate_checksum *csum, const u > } > > const struct got_error * > -got_deflate_read(struct got_deflate_buf *zb, FILE *f, size_t *outlenp) > +got_deflate_read(struct got_deflate_buf *zb, FILE *f, off_t len, > + size_t *outlenp, off_t *consumed) > { > size_t last_total_out = zb->z.total_out; > z_stream *z = &zb->z; > @@ -100,9 +101,15 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, > z->avail_out = zb->outlen; > > *outlenp = 0; > + *consumed = 0; > do { > + size_t last_total_in = z->total_in; > if (z->avail_in == 0) { > - size_t n = fread(zb->inbuf, 1, zb->inlen, f); > + size_t n = 0; > + if (*consumed < len) { > + n = fread(zb->inbuf, 1, > + MIN(zb->inlen, len - *consumed), f); > + } > if (n == 0) { > if (ferror(f)) > return got_ferror(f, GOT_ERR_IO); > @@ -114,6 +121,7 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, > z->avail_in = n; > } > ret = deflate(z, Z_NO_FLUSH); > + *consumed += z->total_in - last_total_in; > } while (ret == Z_OK && z->avail_out > 0); > > if (ret == Z_OK) { > @@ -178,11 +186,12 @@ got_deflate_end(struct got_deflate_buf *zb) > } > > const struct got_error * > -got_deflate_to_file(size_t *outlen, FILE *infile, FILE *outfile, > - struct got_deflate_checksum *csum) > +got_deflate_to_file(off_t *outlen, FILE *infile, off_t len, > + FILE *outfile, struct got_deflate_checksum *csum) > { > const struct got_error *err; > size_t avail; > + off_t consumed; > struct got_deflate_buf zb; > > err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE); > @@ -192,9 +201,10 @@ got_deflate_to_file(size_t *outlen, FILE *infile, FILE > *outlen = 0; > > do { > - err = got_deflate_read(&zb, infile, &avail); > + err = got_deflate_read(&zb, infile, len, &avail, &consumed); > if (err) > goto done; > + len -= consumed; > if (avail > 0) { > size_t n; > n = fwrite(zb.outbuf, avail, 1, outfile); > @@ -214,7 +224,7 @@ done: > } > > const struct got_error * > -got_deflate_to_file_mmap(size_t *outlen, uint8_t *map, size_t offset, > +got_deflate_to_file_mmap(off_t *outlen, uint8_t *map, size_t offset, > size_t len, FILE *outfile, struct got_deflate_checksum *csum) > { > const struct got_error *err; > blob - c0b4fe271d48124cc97b135feff4b79f951c6720 > blob + 1c429af85a27229451e05798e9511d0a8f474968 > --- lib/got_lib_deflate.h > +++ lib/got_lib_deflate.h > @@ -38,9 +38,9 @@ struct got_deflate_buf { > const struct got_error *got_deflate_init(struct got_deflate_buf *, uint8_t *, > size_t); > const struct got_error *got_deflate_read(struct got_deflate_buf *, FILE *, > - size_t *); > + off_t, size_t *, off_t *); > void got_deflate_end(struct got_deflate_buf *); > -const struct got_error *got_deflate_to_file(size_t *, FILE *, FILE *, > +const struct got_error *got_deflate_to_file(off_t *, FILE *, off_t, FILE *, > struct got_deflate_checksum *); > -const struct got_error *got_deflate_to_file_mmap(size_t *, uint8_t *, > +const struct got_error *got_deflate_to_file_mmap(off_t *, uint8_t *, > size_t, size_t, FILE *, struct got_deflate_checksum *); > blob - 67676602467844816045891f0532476d0e14c799 > blob + 1d6840ec3d3bc9318e74417e89f1c1056a322dd0 > --- lib/got_lib_object_create.h > +++ lib/got_lib_object_create.h > @@ -15,7 +15,7 @@ > */ > > const struct got_error *got_object_blob_file_create(struct got_object_id **, > - FILE **, const char *); > + FILE **, off_t *, const char *); > const struct got_error *got_object_blob_create(struct got_object_id **, > const char *, struct got_repository *); > > blob - e5e4c44c8a0327211b0094775682f2bed8ea42a8 > blob + 3102c04085d4e8c0157a4d3fd9c725a4b76f8c88 > --- lib/object_create.c > +++ lib/object_create.c > @@ -49,13 +49,13 @@ > > static const struct got_error * > create_object_file(struct got_object_id *id, FILE *content, > - struct got_repository *repo) > + off_t content_len, struct got_repository *repo) > { > const struct got_error *err = NULL, *unlock_err = NULL; > char *objpath = NULL, *tmppath = NULL; > FILE *tmpfile = NULL; > struct got_lockfile *lf = NULL; > - size_t tmplen = 0; > + off_t tmplen = 0; > > err = got_object_get_path(&objpath, id, repo); > if (err) > @@ -83,7 +83,7 @@ create_object_file(struct got_object_id *id, FILE *con > goto done; > } > > - err = got_deflate_to_file(&tmplen, content, tmpfile, NULL); > + err = got_deflate_to_file(&tmplen, content, content_len, tmpfile, NULL); > if (err) > goto done; > > @@ -113,7 +113,7 @@ done: > > const struct got_error * > got_object_blob_file_create(struct got_object_id **id, FILE **blobfile, > - const char *ondisk_path) > + off_t *blobsize, const char *ondisk_path) > { > const struct got_error *err = NULL; > char *header = NULL; > @@ -124,6 +124,7 @@ got_object_blob_file_create(struct got_object_id **id, > > *id = NULL; > *blobfile = NULL; > + *blobsize = 0; > > SHA1Init(&sha1_ctx); > > @@ -160,6 +161,7 @@ got_object_blob_file_create(struct got_object_id **id, > err = got_ferror(*blobfile, GOT_ERR_IO); > goto done; > } > + *blobsize += headerlen; > for (;;) { > char buf[PATH_MAX * 8]; > ssize_t inlen; > @@ -185,6 +187,7 @@ got_object_blob_file_create(struct got_object_id **id, > err = got_ferror(*blobfile, GOT_ERR_IO); > goto done; > } > + *blobsize += n; > if (S_ISLNK(sb.st_mode)) > break; > } > @@ -222,12 +225,14 @@ got_object_blob_create(struct got_object_id **id, cons > { > const struct got_error *err = NULL; > FILE *blobfile = NULL; > + off_t blobsize; > > - err = got_object_blob_file_create(id, &blobfile, ondisk_path); > + err = got_object_blob_file_create(id, &blobfile, &blobsize, > + ondisk_path); > if (err) > return err; > > - err = create_object_file(*id, blobfile, repo); > + err = create_object_file(*id, blobfile, blobsize, repo); > if (fclose(blobfile) == EOF && err == NULL) > err = got_error_from_errno("fclose"); > if (err) { > @@ -299,6 +304,7 @@ got_object_tree_create(struct got_object_id **id, > char *header = NULL; > size_t headerlen, len = 0, n; > FILE *treefile = NULL; > + off_t treesize = 0; > struct got_pathlist_entry *pe; > struct got_tree_entry **sorted_entries; > struct got_tree_entry *te; > @@ -345,6 +351,7 @@ got_object_tree_create(struct got_object_id **id, > err = got_ferror(treefile, GOT_ERR_IO); > goto done; > } > + treesize += headerlen; > > for (i = 0; i < nentries; i++) { > te = sorted_entries[i]; > @@ -358,6 +365,7 @@ got_object_tree_create(struct got_object_id **id, > goto done; > } > SHA1Update(&sha1_ctx, modebuf, len); > + treesize += n; > > len = strlen(te->name) + 1; /* must include NUL */ > n = fwrite(te->name, 1, len, treefile); > @@ -366,6 +374,7 @@ got_object_tree_create(struct got_object_id **id, > goto done; > } > SHA1Update(&sha1_ctx, te->name, len); > + treesize += n; > > len = SHA1_DIGEST_LENGTH; > n = fwrite(te->id.sha1, 1, len, treefile); > @@ -374,6 +383,7 @@ got_object_tree_create(struct got_object_id **id, > goto done; > } > SHA1Update(&sha1_ctx, te->id.sha1, len); > + treesize += n; > } > > *id = malloc(sizeof(**id)); > @@ -389,7 +399,7 @@ got_object_tree_create(struct got_object_id **id, > } > rewind(treefile); > > - err = create_object_file(*id, treefile, repo); > + err = create_object_file(*id, treefile, treesize, repo); > done: > free(header); > free(sorted_entries); > @@ -416,6 +426,7 @@ got_object_commit_create(struct got_object_id **id, > char *id_str = NULL; > size_t headerlen, len = 0, n; > FILE *commitfile = NULL; > + off_t commitsize = 0; > struct got_object_qid *qid; > char *msg0, *msg; > > @@ -471,6 +482,7 @@ got_object_commit_create(struct got_object_id **id, > err = got_ferror(commitfile, GOT_ERR_IO); > goto done; > } > + commitsize += headerlen; > > err = got_object_id_str(&id_str, tree_id); > if (err) > @@ -487,6 +499,7 @@ got_object_commit_create(struct got_object_id **id, > err = got_ferror(commitfile, GOT_ERR_IO); > goto done; > } > + commitsize += n; > > if (parent_ids) { > free(id_str); > @@ -510,6 +523,7 @@ got_object_commit_create(struct got_object_id **id, > free(parent_str); > goto done; > } > + commitsize += n; > free(parent_str); > free(id_str); > id_str = NULL; > @@ -523,6 +537,7 @@ got_object_commit_create(struct got_object_id **id, > err = got_ferror(commitfile, GOT_ERR_IO); > goto done; > } > + commitsize += n; > > len = strlen(committer_str); > SHA1Update(&sha1_ctx, committer_str, len); > @@ -531,6 +546,7 @@ got_object_commit_create(struct got_object_id **id, > err = got_ferror(commitfile, GOT_ERR_IO); > goto done; > } > + commitsize += n; > > SHA1Update(&sha1_ctx, "\n", 1); > n = fwrite("\n", 1, 1, commitfile); > @@ -538,6 +554,7 @@ got_object_commit_create(struct got_object_id **id, > err = got_ferror(commitfile, GOT_ERR_IO); > goto done; > } > + commitsize += n; > > len = strlen(msg); > SHA1Update(&sha1_ctx, msg, len); > @@ -546,6 +563,7 @@ got_object_commit_create(struct got_object_id **id, > err = got_ferror(commitfile, GOT_ERR_IO); > goto done; > } > + commitsize += n; > > SHA1Update(&sha1_ctx, "\n", 1); > n = fwrite("\n", 1, 1, commitfile); > @@ -553,6 +571,7 @@ got_object_commit_create(struct got_object_id **id, > err = got_ferror(commitfile, GOT_ERR_IO); > goto done; > } > + commitsize += n; > > *id = malloc(sizeof(**id)); > if (*id == NULL) { > @@ -567,7 +586,7 @@ got_object_commit_create(struct got_object_id **id, > } > rewind(commitfile); > > - err = create_object_file(*id, commitfile, repo); > + err = create_object_file(*id, commitfile, commitsize, repo); > done: > free(id_str); > free(msg0); > @@ -596,6 +615,7 @@ got_object_tag_create(struct got_object_id **id, > char *id_str = NULL, *obj_str = NULL, *type_str = NULL; > size_t headerlen, len = 0, n; > FILE *tagfile = NULL; > + off_t tagsize = 0; > char *msg0 = NULL, *msg; > const char *obj_type_str; > int obj_type; > @@ -681,6 +701,7 @@ got_object_tag_create(struct got_object_id **id, > err = got_ferror(tagfile, GOT_ERR_IO); > goto done; > } > + tagsize += headerlen; > len = strlen(obj_str); > SHA1Update(&sha1_ctx, obj_str, len); > n = fwrite(obj_str, 1, len, tagfile); > @@ -688,6 +709,7 @@ got_object_tag_create(struct got_object_id **id, > err = got_ferror(tagfile, GOT_ERR_IO); > goto done; > } > + tagsize += n; > len = strlen(type_str); > SHA1Update(&sha1_ctx, type_str, len); > n = fwrite(type_str, 1, len, tagfile); > @@ -695,6 +717,7 @@ got_object_tag_create(struct got_object_id **id, > err = got_ferror(tagfile, GOT_ERR_IO); > goto done; > } > + tagsize += n; > > len = strlen(tag_str); > SHA1Update(&sha1_ctx, tag_str, len); > @@ -703,6 +726,7 @@ got_object_tag_create(struct got_object_id **id, > err = got_ferror(tagfile, GOT_ERR_IO); > goto done; > } > + tagsize += n; > > len = strlen(tagger_str); > SHA1Update(&sha1_ctx, tagger_str, len); > @@ -711,6 +735,7 @@ got_object_tag_create(struct got_object_id **id, > err = got_ferror(tagfile, GOT_ERR_IO); > goto done; > } > + tagsize += n; > > SHA1Update(&sha1_ctx, "\n", 1); > n = fwrite("\n", 1, 1, tagfile); > @@ -718,6 +743,7 @@ got_object_tag_create(struct got_object_id **id, > err = got_ferror(tagfile, GOT_ERR_IO); > goto done; > } > + tagsize += n; > > len = strlen(msg); > SHA1Update(&sha1_ctx, msg, len); > @@ -726,6 +752,7 @@ got_object_tag_create(struct got_object_id **id, > err = got_ferror(tagfile, GOT_ERR_IO); > goto done; > } > + tagsize += n; > > SHA1Update(&sha1_ctx, "\n", 1); > n = fwrite("\n", 1, 1, tagfile); > @@ -733,6 +760,7 @@ got_object_tag_create(struct got_object_id **id, > err = got_ferror(tagfile, GOT_ERR_IO); > goto done; > } > + tagsize += n; > > *id = malloc(sizeof(**id)); > if (*id == NULL) { > @@ -747,7 +775,7 @@ got_object_tag_create(struct got_object_id **id, > } > rewind(tagfile); > > - err = create_object_file(*id, tagfile, repo); > + err = create_object_file(*id, tagfile, tagsize, repo); > done: > free(msg0); > free(header); > blob - 8a3d8a8fd7f7eb29a2707c38e554ff6a37b63685 > blob + f4c9f4cd1e5f2d7ec2bb640a2745e9ef11b8f29a > --- lib/pack_create.c > +++ lib/pack_create.c > @@ -1385,11 +1385,10 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt > SHA1_CTX ctx; > struct got_pack_meta *m; > struct got_raw_object *raw = NULL; > - FILE *delta_file = NULL; > char buf[32]; > - size_t outlen, n; > + size_t n; > struct got_deflate_checksum csum; > - off_t packfile_size = 0; > + off_t outlen, packfile_size = 0; > int outfd = -1; > > SHA1Init(&ctx); > @@ -1440,7 +1439,7 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt > goto done; > } > err = got_deflate_to_file(&outlen, raw->f, > - packfile, &csum); > + raw->size, packfile, &csum); > if (err) > goto done; > } > @@ -1460,57 +1459,17 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt > free(m->delta_buf); > m->delta_buf = NULL; > } else { > - off_t remain; > - if (delta_file == NULL) { > - delta_file = got_opentemp(); > - if (delta_file == NULL) { > - err = got_error_from_errno( > - "got_opentemp"); > - goto done; > - } > - } > - if (ftruncate(fileno(delta_file), 0L) == -1) { > - err = got_error_from_errno("ftruncate"); > - goto done; > - } > - if (fseeko(delta_file, 0L, SEEK_SET) == -1) { > - err = got_error_from_errno("fseeko"); > - goto done; > - } > if (fseeko(delta_cache, m->delta_offset, SEEK_SET) > == -1) { > err = got_error_from_errno("fseeko"); > goto done; > } > - remain = m->delta_len; > - while (remain > 0) { > - char delta_buf[8192]; > - size_t r, w, n; > - n = MIN(remain, sizeof(delta_buf)); > - r = fread(delta_buf, 1, n, delta_cache); > - if (r != n) { > - err = got_ferror(delta_cache, > - GOT_ERR_IO); > - goto done; > - } > - w = fwrite(delta_buf, 1, n, delta_file); > - if (w != n) { > - err = got_ferror(delta_file, > - GOT_ERR_IO); > - goto done; > - } > - remain -= n; > - } > err = deltahdr(&packfile_size, &ctx, packfile, > m, use_offset_deltas); > if (err) > goto done; > - if (fseeko(delta_file, 0L, SEEK_SET) == -1) { > - err = got_error_from_errno("fseeko"); > - goto done; > - } > - err = got_deflate_to_file(&outlen, delta_file, > - packfile, &csum); > + err = got_deflate_to_file(&outlen, delta_cache, > + m->delta_len, packfile, &csum); > if (err) > goto done; > packfile_size += outlen; > @@ -1529,8 +1488,6 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt > goto done; > } > done: > - if (delta_file && fclose(delta_file) == EOF && err == NULL) > - err = got_error_from_errno("fclose"); > if (raw) > got_object_raw_close(raw); > if (outfd != -1 && close(outfd) == -1 && err == NULL) > blob - dba36bee48eede3aca73746d5501b4565bbddf64 > blob + 0f78ee740bd078fd66df140aa19f2527ef430e0f > --- lib/worktree.c > +++ lib/worktree.c > @@ -2884,11 +2884,13 @@ merge_file_cb(void *arg, struct got_blob_object *blob1 > case GOT_STATUS_ADD: { > struct got_object_id *id; > FILE *blob1_f; > + off_t blob1_size; > /* > * Delete the added file only if its content already > * exists in the repository. > */ > - err = got_object_blob_file_create(&id, &blob1_f, path1); > + err = got_object_blob_file_create(&id, &blob1_f, > + &blob1_size, path1); > if (err) > goto done; > if (got_object_id_cmp(id, id1) == 0) { > blob - d2b9ef37e4422a8ab8e017e17df21a7c6f3be7e8 > blob + 34d74d26fefb2a4461a2c489cade5dcd7ebfcd66 > --- regress/cmdline/commit.sh > +++ regress/cmdline/commit.sh > @@ -1458,6 +1458,54 @@ EOF > test_done "$testroot" "$ret" > } > > +test_commit_large_file() { > + local testroot=`test_init commit_large_file` > + > + got checkout $testroot/repo $testroot/wt > /dev/null > + ret="$?" > + if [ "$ret" != "0" ]; then > + test_done "$testroot" "$ret" > + return 1 > + fi > + > + dd status=none if=/dev/zero of=$testroot/wt/new bs=1m count=64 > + (cd $testroot/wt && got add new >/dev/null) > + > + (cd $testroot/wt && got commit -m 'test commit_large_file' \ > + > $testroot/stdout) > + > + local head_rev=`git_show_head $testroot/repo` > + echo "A new" > $testroot/stdout.expected > + echo "Created commit $head_rev" >> $testroot/stdout.expected > + > + cmp -s $testroot/stdout.expected $testroot/stdout > + ret="$?" > + if [ "$ret" != "0" ]; then > + diff -u $testroot/stdout.expected $testroot/stdout > + test_done "$testroot" "$ret" > + return 1 > + fi > + > + new_id=`get_blob_id $testroot/repo "" new` > + got cat -r $testroot/repo $new_id > $testroot/new > + ret="$?" > + if [ "$ret" != "0" ]; then > + echo "commit failed unexpectedly" >&2 > + test_done "$testroot" "1" > + return 1 > + fi > + > + cmp -s $testroot/new $testroot/wt/new > + ret="$?" > + if [ "$ret" != "0" ]; then > + diff -u $testroot/new $testroot/wt/new > + fi > + test_done "$testroot" "$ret" > + > + > +} > + > + > test_parseargs "$@" > run_test test_commit_basic > run_test test_commit_new_subdir > @@ -1484,3 +1532,4 @@ run_test test_commit_with_unrelated_submodule > run_test test_commit_symlink > run_test test_commit_fix_bad_symlink > run_test test_commit_prepared_logmsg > +run_test test_commit_large_file > >
compress delta data from delta_cache directly into pack file
compress delta data from delta_cache directly into pack file
compress delta data from delta_cache directly into pack file