"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: compress delta data from delta_cache directly into pack file
To:
gameoftrees@openbsd.org
Date:
Wed, 19 Jan 2022 13:13:49 +0100

Download raw body.

Thread
On Tue, Jan 18, 2022 at 12:27:06PM +0100, Stefan Sperling wrote:
> Tests are passing, and git fsck reports no issues on commits or pack
> files created with this new code.

Nevertheless, the previous diff was buggy.

Committing large files resulted in bad objects due to a bug in deflate_read().
And the size computed for commit objects was entirely wrong, but unfortunately
the bug in deflate_read() compensated for this, preventing the test suite from
catching these problems.

This adds a new test which triggers the issue I saw while running with the
previous version of this patch.

ok?
 
diff afd7e8422020153ff9e686cd81775c1f1e364b94 b1f2dff2aa4394b54e7cef2f36c8df8c8fc86549
blob - 52c3d2577c7b0308e62c7ff466152e61a38f3c50
blob + 6a151f9c54a8c0f4fc4fea9a4c91ecc7dec71aed
--- lib/deflate.c
+++ lib/deflate.c
@@ -90,7 +90,8 @@ csum_output(struct got_deflate_checksum *csum, const u
 }
 
 const struct got_error *
-got_deflate_read(struct got_deflate_buf *zb, FILE *f, size_t *outlenp)
+got_deflate_read(struct got_deflate_buf *zb, FILE *f, off_t len,
+    size_t *outlenp, off_t *consumed)
 {
 	size_t last_total_out = zb->z.total_out;
 	z_stream *z = &zb->z;
@@ -100,9 +101,15 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, 
 	z->avail_out = zb->outlen;
 
 	*outlenp = 0;
+	*consumed = 0;
 	do {
+		size_t last_total_in = z->total_in;
 		if (z->avail_in == 0) {
-			size_t n = fread(zb->inbuf, 1, zb->inlen, f);
+			size_t n = 0;
+			if (*consumed < len) {
+				n = fread(zb->inbuf, 1,
+				    MIN(zb->inlen, len - *consumed), f);
+			}
 			if (n == 0) {
 				if (ferror(f))
 					return got_ferror(f, GOT_ERR_IO);
@@ -114,6 +121,7 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, 
 			z->avail_in = n;
 		}
 		ret = deflate(z, Z_NO_FLUSH);
+		*consumed += z->total_in - last_total_in;
 	} while (ret == Z_OK && z->avail_out > 0);
 
 	if (ret == Z_OK) {
@@ -178,11 +186,12 @@ got_deflate_end(struct got_deflate_buf *zb)
 }
 
 const struct got_error *
-got_deflate_to_file(size_t *outlen, FILE *infile, FILE *outfile,
-    struct got_deflate_checksum *csum)
+got_deflate_to_file(off_t *outlen, FILE *infile, off_t len,
+    FILE *outfile, struct got_deflate_checksum *csum)
 {
 	const struct got_error *err;
 	size_t avail;
+	off_t consumed;
 	struct got_deflate_buf zb;
 
 	err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE);
@@ -192,9 +201,10 @@ got_deflate_to_file(size_t *outlen, FILE *infile, FILE
 	*outlen = 0;
 
 	do {
-		err = got_deflate_read(&zb, infile, &avail);
+		err = got_deflate_read(&zb, infile, len, &avail, &consumed);
 		if (err)
 			goto done;
+		len -= consumed;
 		if (avail > 0) {
 			size_t n;
 			n = fwrite(zb.outbuf, avail, 1, outfile);
@@ -214,7 +224,7 @@ done:
 }
 
 const struct got_error *
-got_deflate_to_file_mmap(size_t *outlen, uint8_t *map, size_t offset,
+got_deflate_to_file_mmap(off_t *outlen, uint8_t *map, size_t offset,
     size_t len, FILE *outfile, struct got_deflate_checksum *csum)
 {
 	const struct got_error *err;
blob - c0b4fe271d48124cc97b135feff4b79f951c6720
blob + 1c429af85a27229451e05798e9511d0a8f474968
--- lib/got_lib_deflate.h
+++ lib/got_lib_deflate.h
@@ -38,9 +38,9 @@ struct got_deflate_buf {
 const struct got_error *got_deflate_init(struct got_deflate_buf *, uint8_t *,
     size_t);
 const struct got_error *got_deflate_read(struct got_deflate_buf *, FILE *,
-    size_t *);
+    off_t, size_t *, off_t *);
 void got_deflate_end(struct got_deflate_buf *);
-const struct got_error *got_deflate_to_file(size_t *, FILE *, FILE *,
+const struct got_error *got_deflate_to_file(off_t *, FILE *, off_t, FILE *,
     struct got_deflate_checksum *);
-const struct got_error *got_deflate_to_file_mmap(size_t *, uint8_t *,
+const struct got_error *got_deflate_to_file_mmap(off_t *, uint8_t *,
     size_t, size_t, FILE *, struct got_deflate_checksum *);
blob - 67676602467844816045891f0532476d0e14c799
blob + 1d6840ec3d3bc9318e74417e89f1c1056a322dd0
--- lib/got_lib_object_create.h
+++ lib/got_lib_object_create.h
@@ -15,7 +15,7 @@
  */
 
 const struct got_error *got_object_blob_file_create(struct got_object_id **,
-    FILE **, const char *);
+    FILE **, off_t *, const char *);
 const struct got_error *got_object_blob_create(struct got_object_id **,
     const char *, struct got_repository *);
 
blob - e5e4c44c8a0327211b0094775682f2bed8ea42a8
blob + 3102c04085d4e8c0157a4d3fd9c725a4b76f8c88
--- lib/object_create.c
+++ lib/object_create.c
@@ -49,13 +49,13 @@
 
 static const struct got_error *
 create_object_file(struct got_object_id *id, FILE *content,
-    struct got_repository *repo)
+    off_t content_len, struct got_repository *repo)
 {
 	const struct got_error *err = NULL, *unlock_err = NULL;
 	char *objpath = NULL, *tmppath = NULL;
 	FILE *tmpfile = NULL;
 	struct got_lockfile *lf = NULL;
-	size_t tmplen = 0;
+	off_t tmplen = 0;
 
 	err = got_object_get_path(&objpath, id, repo);
 	if (err)
@@ -83,7 +83,7 @@ create_object_file(struct got_object_id *id, FILE *con
 		goto done;
 	}
 
-	err = got_deflate_to_file(&tmplen, content, tmpfile, NULL);
+	err = got_deflate_to_file(&tmplen, content, content_len, tmpfile, NULL);
 	if (err)
 		goto done;
 
@@ -113,7 +113,7 @@ done:
 
 const struct got_error *
 got_object_blob_file_create(struct got_object_id **id, FILE **blobfile,
-    const char *ondisk_path)
+    off_t *blobsize, const char *ondisk_path)
 {
 	const struct got_error *err = NULL;
 	char *header = NULL;
@@ -124,6 +124,7 @@ got_object_blob_file_create(struct got_object_id **id,
 
 	*id = NULL;
 	*blobfile = NULL;
+	*blobsize = 0;
 
 	SHA1Init(&sha1_ctx);
 
@@ -160,6 +161,7 @@ got_object_blob_file_create(struct got_object_id **id,
 		err = got_ferror(*blobfile, GOT_ERR_IO);
 		goto done;
 	}
+	*blobsize += headerlen;
 	for (;;) {
 		char buf[PATH_MAX * 8];
 		ssize_t inlen;
@@ -185,6 +187,7 @@ got_object_blob_file_create(struct got_object_id **id,
 			err = got_ferror(*blobfile, GOT_ERR_IO);
 			goto done;
 		}
+		*blobsize += n;
 		if (S_ISLNK(sb.st_mode))
 			break;
 	}
@@ -222,12 +225,14 @@ got_object_blob_create(struct got_object_id **id, cons
 {
 	const struct got_error *err = NULL;
 	FILE *blobfile = NULL;
+	off_t blobsize;
 
-	err = got_object_blob_file_create(id, &blobfile, ondisk_path);
+	err = got_object_blob_file_create(id, &blobfile, &blobsize,
+	    ondisk_path);
 	if (err)
 		return err;
 
-	err = create_object_file(*id, blobfile, repo);
+	err = create_object_file(*id, blobfile, blobsize, repo);
 	if (fclose(blobfile) == EOF && err == NULL)
 		err = got_error_from_errno("fclose");
 	if (err) {
@@ -299,6 +304,7 @@ got_object_tree_create(struct got_object_id **id,
 	char *header = NULL;
 	size_t headerlen, len = 0, n;
 	FILE *treefile = NULL;
+	off_t treesize = 0;
 	struct got_pathlist_entry *pe;
 	struct got_tree_entry **sorted_entries;
 	struct got_tree_entry *te;
@@ -345,6 +351,7 @@ got_object_tree_create(struct got_object_id **id,
 		err = got_ferror(treefile, GOT_ERR_IO);
 		goto done;
 	}
+	treesize += headerlen;
 
 	for (i = 0; i < nentries; i++) {
 		te = sorted_entries[i];
@@ -358,6 +365,7 @@ got_object_tree_create(struct got_object_id **id,
 			goto done;
 		}
 		SHA1Update(&sha1_ctx, modebuf, len);
+		treesize += n;
 
 		len = strlen(te->name) + 1; /* must include NUL */
 		n = fwrite(te->name, 1, len, treefile);
@@ -366,6 +374,7 @@ got_object_tree_create(struct got_object_id **id,
 			goto done;
 		}
 		SHA1Update(&sha1_ctx, te->name, len);
+		treesize += n;
 
 		len = SHA1_DIGEST_LENGTH;
 		n = fwrite(te->id.sha1, 1, len, treefile);
@@ -374,6 +383,7 @@ got_object_tree_create(struct got_object_id **id,
 			goto done;
 		}
 		SHA1Update(&sha1_ctx, te->id.sha1, len);
+		treesize += n;
 	}
 
 	*id = malloc(sizeof(**id));
@@ -389,7 +399,7 @@ got_object_tree_create(struct got_object_id **id,
 	}
 	rewind(treefile);
 
-	err = create_object_file(*id, treefile, repo);
+	err = create_object_file(*id, treefile, treesize, repo);
 done:
 	free(header);
 	free(sorted_entries);
@@ -416,6 +426,7 @@ got_object_commit_create(struct got_object_id **id,
 	char *id_str = NULL;
 	size_t headerlen, len = 0, n;
 	FILE *commitfile = NULL;
+	off_t commitsize = 0;
 	struct got_object_qid *qid;
 	char *msg0, *msg;
 
@@ -471,6 +482,7 @@ got_object_commit_create(struct got_object_id **id,
 		err = got_ferror(commitfile, GOT_ERR_IO);
 		goto done;
 	}
+	commitsize += headerlen;
 
 	err = got_object_id_str(&id_str, tree_id);
 	if (err)
@@ -487,6 +499,7 @@ got_object_commit_create(struct got_object_id **id,
 		err = got_ferror(commitfile, GOT_ERR_IO);
 		goto done;
 	}
+	commitsize += n;
 
 	if (parent_ids) {
 		free(id_str);
@@ -510,6 +523,7 @@ got_object_commit_create(struct got_object_id **id,
 				free(parent_str);
 				goto done;
 			}
+			commitsize += n;
 			free(parent_str);
 			free(id_str);
 			id_str = NULL;
@@ -523,6 +537,7 @@ got_object_commit_create(struct got_object_id **id,
 		err = got_ferror(commitfile, GOT_ERR_IO);
 		goto done;
 	}
+	commitsize += n;
 
 	len = strlen(committer_str);
 	SHA1Update(&sha1_ctx, committer_str, len);
@@ -531,6 +546,7 @@ got_object_commit_create(struct got_object_id **id,
 		err = got_ferror(commitfile, GOT_ERR_IO);
 		goto done;
 	}
+	commitsize += n;
 
 	SHA1Update(&sha1_ctx, "\n", 1);
 	n = fwrite("\n", 1, 1, commitfile);
@@ -538,6 +554,7 @@ got_object_commit_create(struct got_object_id **id,
 		err = got_ferror(commitfile, GOT_ERR_IO);
 		goto done;
 	}
+	commitsize += n;
 
 	len = strlen(msg);
 	SHA1Update(&sha1_ctx, msg, len);
@@ -546,6 +563,7 @@ got_object_commit_create(struct got_object_id **id,
 		err = got_ferror(commitfile, GOT_ERR_IO);
 		goto done;
 	}
+	commitsize += n;
 
 	SHA1Update(&sha1_ctx, "\n", 1);
 	n = fwrite("\n", 1, 1, commitfile);
@@ -553,6 +571,7 @@ got_object_commit_create(struct got_object_id **id,
 		err = got_ferror(commitfile, GOT_ERR_IO);
 		goto done;
 	}
+	commitsize += n;
 
 	*id = malloc(sizeof(**id));
 	if (*id == NULL) {
@@ -567,7 +586,7 @@ got_object_commit_create(struct got_object_id **id,
 	}
 	rewind(commitfile);
 
-	err = create_object_file(*id, commitfile, repo);
+	err = create_object_file(*id, commitfile, commitsize, repo);
 done:
 	free(id_str);
 	free(msg0);
@@ -596,6 +615,7 @@ got_object_tag_create(struct got_object_id **id,
 	char *id_str = NULL, *obj_str = NULL, *type_str = NULL;
 	size_t headerlen, len = 0, n;
 	FILE *tagfile = NULL;
+	off_t tagsize = 0;
 	char *msg0 = NULL, *msg;
 	const char *obj_type_str;
 	int obj_type;
@@ -681,6 +701,7 @@ got_object_tag_create(struct got_object_id **id,
 		err = got_ferror(tagfile, GOT_ERR_IO);
 		goto done;
 	}
+	tagsize += headerlen;
 	len = strlen(obj_str);
 	SHA1Update(&sha1_ctx, obj_str, len);
 	n = fwrite(obj_str, 1, len, tagfile);
@@ -688,6 +709,7 @@ got_object_tag_create(struct got_object_id **id,
 		err = got_ferror(tagfile, GOT_ERR_IO);
 		goto done;
 	}
+	tagsize += n;
 	len = strlen(type_str);
 	SHA1Update(&sha1_ctx, type_str, len);
 	n = fwrite(type_str, 1, len, tagfile);
@@ -695,6 +717,7 @@ got_object_tag_create(struct got_object_id **id,
 		err = got_ferror(tagfile, GOT_ERR_IO);
 		goto done;
 	}
+	tagsize += n;
 
 	len = strlen(tag_str);
 	SHA1Update(&sha1_ctx, tag_str, len);
@@ -703,6 +726,7 @@ got_object_tag_create(struct got_object_id **id,
 		err = got_ferror(tagfile, GOT_ERR_IO);
 		goto done;
 	}
+	tagsize += n;
 
 	len = strlen(tagger_str);
 	SHA1Update(&sha1_ctx, tagger_str, len);
@@ -711,6 +735,7 @@ got_object_tag_create(struct got_object_id **id,
 		err = got_ferror(tagfile, GOT_ERR_IO);
 		goto done;
 	}
+	tagsize += n;
 
 	SHA1Update(&sha1_ctx, "\n", 1);
 	n = fwrite("\n", 1, 1, tagfile);
@@ -718,6 +743,7 @@ got_object_tag_create(struct got_object_id **id,
 		err = got_ferror(tagfile, GOT_ERR_IO);
 		goto done;
 	}
+	tagsize += n;
 
 	len = strlen(msg);
 	SHA1Update(&sha1_ctx, msg, len);
@@ -726,6 +752,7 @@ got_object_tag_create(struct got_object_id **id,
 		err = got_ferror(tagfile, GOT_ERR_IO);
 		goto done;
 	}
+	tagsize += n;
 
 	SHA1Update(&sha1_ctx, "\n", 1);
 	n = fwrite("\n", 1, 1, tagfile);
@@ -733,6 +760,7 @@ got_object_tag_create(struct got_object_id **id,
 		err = got_ferror(tagfile, GOT_ERR_IO);
 		goto done;
 	}
+	tagsize += n;
 
 	*id = malloc(sizeof(**id));
 	if (*id == NULL) {
@@ -747,7 +775,7 @@ got_object_tag_create(struct got_object_id **id,
 	}
 	rewind(tagfile);
 
-	err = create_object_file(*id, tagfile, repo);
+	err = create_object_file(*id, tagfile, tagsize, repo);
 done:
 	free(msg0);
 	free(header);
blob - 8a3d8a8fd7f7eb29a2707c38e554ff6a37b63685
blob + f4c9f4cd1e5f2d7ec2bb640a2745e9ef11b8f29a
--- lib/pack_create.c
+++ lib/pack_create.c
@@ -1385,11 +1385,10 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
 	SHA1_CTX ctx;
 	struct got_pack_meta *m;
 	struct got_raw_object *raw = NULL;
-	FILE *delta_file = NULL;
 	char buf[32];
-	size_t outlen, n;
+	size_t n;
 	struct got_deflate_checksum csum;
-	off_t packfile_size = 0;
+	off_t outlen, packfile_size = 0;
 	int outfd = -1;
 
 	SHA1Init(&ctx);
@@ -1440,7 +1439,7 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
 					goto done;
 				}
 				err = got_deflate_to_file(&outlen, raw->f,
-				    packfile, &csum);
+				    raw->size, packfile, &csum);
 				if (err)
 					goto done;
 			}
@@ -1460,57 +1459,17 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
 			free(m->delta_buf);
 			m->delta_buf = NULL;
 		} else {
-			off_t remain;
-			if (delta_file == NULL) {
-				delta_file = got_opentemp();
-				if (delta_file == NULL) {
-					err = got_error_from_errno(
-					    "got_opentemp");
-					goto done;
-				}
-			}
-			if (ftruncate(fileno(delta_file), 0L) == -1) {
-				err = got_error_from_errno("ftruncate");
-				goto done;
-			}
-			if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
-				err = got_error_from_errno("fseeko");
-				goto done;
-			}
 			if (fseeko(delta_cache, m->delta_offset, SEEK_SET)
 			    == -1) {
 				err = got_error_from_errno("fseeko");
 				goto done;
 			}
-			remain = m->delta_len;
-			while (remain > 0) {
-				char delta_buf[8192];
-				size_t r, w, n;
-				n = MIN(remain, sizeof(delta_buf));
-				r = fread(delta_buf, 1, n, delta_cache);
-				if (r != n) {
-					err = got_ferror(delta_cache,
-					    GOT_ERR_IO);
-					goto done;
-				}
-				w = fwrite(delta_buf, 1, n, delta_file);
-				if (w != n) {
-					err = got_ferror(delta_file,
-					    GOT_ERR_IO);
-					goto done;
-				}
-				remain -= n;
-			}
 			err = deltahdr(&packfile_size, &ctx, packfile,
 			    m, use_offset_deltas);
 			if (err)
 				goto done;
-			if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
-				err = got_error_from_errno("fseeko");
-				goto done;
-			}
-			err = got_deflate_to_file(&outlen, delta_file,
-			    packfile, &csum);
+			err = got_deflate_to_file(&outlen, delta_cache,
+			    m->delta_len, packfile, &csum);
 			if (err)
 				goto done;
 			packfile_size += outlen;
@@ -1529,8 +1488,6 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
 			goto done;
 	}
 done:
-	if (delta_file && fclose(delta_file) == EOF && err == NULL)
-		err = got_error_from_errno("fclose");
 	if (raw)
 		got_object_raw_close(raw);
 	if (outfd != -1 && close(outfd) == -1 && err == NULL)
blob - dba36bee48eede3aca73746d5501b4565bbddf64
blob + 0f78ee740bd078fd66df140aa19f2527ef430e0f
--- lib/worktree.c
+++ lib/worktree.c
@@ -2884,11 +2884,13 @@ merge_file_cb(void *arg, struct got_blob_object *blob1
 		case GOT_STATUS_ADD: {
 			struct got_object_id *id;
 			FILE *blob1_f;
+			off_t blob1_size;
 			/*
 			 * Delete the added file only if its content already
 			 * exists in the repository.
 			 */
-			err = got_object_blob_file_create(&id, &blob1_f, path1);
+			err = got_object_blob_file_create(&id, &blob1_f,
+			    &blob1_size, path1);
 			if (err)
 				goto done;
 			if (got_object_id_cmp(id, id1) == 0) {
blob - d2b9ef37e4422a8ab8e017e17df21a7c6f3be7e8
blob + 34d74d26fefb2a4461a2c489cade5dcd7ebfcd66
--- regress/cmdline/commit.sh
+++ regress/cmdline/commit.sh
@@ -1458,6 +1458,54 @@ EOF
 	test_done "$testroot" "$ret"
 }
 
+test_commit_large_file() {
+	local testroot=`test_init commit_large_file`
+
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	dd status=none if=/dev/zero of=$testroot/wt/new bs=1m count=64
+	(cd $testroot/wt && got add new >/dev/null)
+
+	(cd $testroot/wt && got commit -m 'test commit_large_file' \
+		> $testroot/stdout)
+
+	local head_rev=`git_show_head $testroot/repo`
+	echo "A  new" > $testroot/stdout.expected
+	echo "Created commit $head_rev" >> $testroot/stdout.expected
+
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	new_id=`get_blob_id $testroot/repo "" new`
+	got cat -r $testroot/repo $new_id > $testroot/new
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		echo "commit failed unexpectedly" >&2
+		test_done "$testroot" "1"
+		return 1
+	fi
+
+	cmp -s $testroot/new $testroot/wt/new
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/new $testroot/wt/new
+	fi
+	test_done "$testroot" "$ret"
+
+
+}
+
+
 test_parseargs "$@"
 run_test test_commit_basic
 run_test test_commit_new_subdir
@@ -1484,3 +1532,4 @@ run_test test_commit_with_unrelated_submodule
 run_test test_commit_symlink
 run_test test_commit_fix_bad_symlink
 run_test test_commit_prepared_logmsg
+run_test test_commit_large_file