"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: compress delta data from delta_cache directly into pack file
To:
gameoftrees@openbsd.org
Date:
Fri, 21 Jan 2022 15:57:46 +0100

Download raw body.

Thread
On Wed, Jan 19, 2022 at 01:13:49PM +0100, Stefan Sperling wrote:
> On Tue, Jan 18, 2022 at 12:27:06PM +0100, Stefan Sperling wrote:
> > Tests are passing, and git fsck reports no issues on commits or pack
> > files created with this new code.
> 
> Nevertheless, the previous diff was buggy.
> 
> Committing large files resulted in bad objects due to a bug in deflate_read().
> And the size computed for commit objects was entirely wrong, but unfortunately
> the bug in deflate_read() compensated for this, preventing the test suite from
> catching these problems.
> 
> This adds a new test which triggers the issue I saw while running with the
> previous version of this patch.
> 
> ok?

I have committed this earlier today.

> diff afd7e8422020153ff9e686cd81775c1f1e364b94 b1f2dff2aa4394b54e7cef2f36c8df8c8fc86549
> blob - 52c3d2577c7b0308e62c7ff466152e61a38f3c50
> blob + 6a151f9c54a8c0f4fc4fea9a4c91ecc7dec71aed
> --- lib/deflate.c
> +++ lib/deflate.c
> @@ -90,7 +90,8 @@ csum_output(struct got_deflate_checksum *csum, const u
>  }
>  
>  const struct got_error *
> -got_deflate_read(struct got_deflate_buf *zb, FILE *f, size_t *outlenp)
> +got_deflate_read(struct got_deflate_buf *zb, FILE *f, off_t len,
> +    size_t *outlenp, off_t *consumed)
>  {
>  	size_t last_total_out = zb->z.total_out;
>  	z_stream *z = &zb->z;
> @@ -100,9 +101,15 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, 
>  	z->avail_out = zb->outlen;
>  
>  	*outlenp = 0;
> +	*consumed = 0;
>  	do {
> +		size_t last_total_in = z->total_in;
>  		if (z->avail_in == 0) {
> -			size_t n = fread(zb->inbuf, 1, zb->inlen, f);
> +			size_t n = 0;
> +			if (*consumed < len) {
> +				n = fread(zb->inbuf, 1,
> +				    MIN(zb->inlen, len - *consumed), f);
> +			}
>  			if (n == 0) {
>  				if (ferror(f))
>  					return got_ferror(f, GOT_ERR_IO);
> @@ -114,6 +121,7 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, 
>  			z->avail_in = n;
>  		}
>  		ret = deflate(z, Z_NO_FLUSH);
> +		*consumed += z->total_in - last_total_in;
>  	} while (ret == Z_OK && z->avail_out > 0);
>  
>  	if (ret == Z_OK) {
> @@ -178,11 +186,12 @@ got_deflate_end(struct got_deflate_buf *zb)
>  }
>  
>  const struct got_error *
> -got_deflate_to_file(size_t *outlen, FILE *infile, FILE *outfile,
> -    struct got_deflate_checksum *csum)
> +got_deflate_to_file(off_t *outlen, FILE *infile, off_t len,
> +    FILE *outfile, struct got_deflate_checksum *csum)
>  {
>  	const struct got_error *err;
>  	size_t avail;
> +	off_t consumed;
>  	struct got_deflate_buf zb;
>  
>  	err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE);
> @@ -192,9 +201,10 @@ got_deflate_to_file(size_t *outlen, FILE *infile, FILE
>  	*outlen = 0;
>  
>  	do {
> -		err = got_deflate_read(&zb, infile, &avail);
> +		err = got_deflate_read(&zb, infile, len, &avail, &consumed);
>  		if (err)
>  			goto done;
> +		len -= consumed;
>  		if (avail > 0) {
>  			size_t n;
>  			n = fwrite(zb.outbuf, avail, 1, outfile);
> @@ -214,7 +224,7 @@ done:
>  }
>  
>  const struct got_error *
> -got_deflate_to_file_mmap(size_t *outlen, uint8_t *map, size_t offset,
> +got_deflate_to_file_mmap(off_t *outlen, uint8_t *map, size_t offset,
>      size_t len, FILE *outfile, struct got_deflate_checksum *csum)
>  {
>  	const struct got_error *err;
> blob - c0b4fe271d48124cc97b135feff4b79f951c6720
> blob + 1c429af85a27229451e05798e9511d0a8f474968
> --- lib/got_lib_deflate.h
> +++ lib/got_lib_deflate.h
> @@ -38,9 +38,9 @@ struct got_deflate_buf {
>  const struct got_error *got_deflate_init(struct got_deflate_buf *, uint8_t *,
>      size_t);
>  const struct got_error *got_deflate_read(struct got_deflate_buf *, FILE *,
> -    size_t *);
> +    off_t, size_t *, off_t *);
>  void got_deflate_end(struct got_deflate_buf *);
> -const struct got_error *got_deflate_to_file(size_t *, FILE *, FILE *,
> +const struct got_error *got_deflate_to_file(off_t *, FILE *, off_t, FILE *,
>      struct got_deflate_checksum *);
> -const struct got_error *got_deflate_to_file_mmap(size_t *, uint8_t *,
> +const struct got_error *got_deflate_to_file_mmap(off_t *, uint8_t *,
>      size_t, size_t, FILE *, struct got_deflate_checksum *);
> blob - 67676602467844816045891f0532476d0e14c799
> blob + 1d6840ec3d3bc9318e74417e89f1c1056a322dd0
> --- lib/got_lib_object_create.h
> +++ lib/got_lib_object_create.h
> @@ -15,7 +15,7 @@
>   */
>  
>  const struct got_error *got_object_blob_file_create(struct got_object_id **,
> -    FILE **, const char *);
> +    FILE **, off_t *, const char *);
>  const struct got_error *got_object_blob_create(struct got_object_id **,
>      const char *, struct got_repository *);
>  
> blob - e5e4c44c8a0327211b0094775682f2bed8ea42a8
> blob + 3102c04085d4e8c0157a4d3fd9c725a4b76f8c88
> --- lib/object_create.c
> +++ lib/object_create.c
> @@ -49,13 +49,13 @@
>  
>  static const struct got_error *
>  create_object_file(struct got_object_id *id, FILE *content,
> -    struct got_repository *repo)
> +    off_t content_len, struct got_repository *repo)
>  {
>  	const struct got_error *err = NULL, *unlock_err = NULL;
>  	char *objpath = NULL, *tmppath = NULL;
>  	FILE *tmpfile = NULL;
>  	struct got_lockfile *lf = NULL;
> -	size_t tmplen = 0;
> +	off_t tmplen = 0;
>  
>  	err = got_object_get_path(&objpath, id, repo);
>  	if (err)
> @@ -83,7 +83,7 @@ create_object_file(struct got_object_id *id, FILE *con
>  		goto done;
>  	}
>  
> -	err = got_deflate_to_file(&tmplen, content, tmpfile, NULL);
> +	err = got_deflate_to_file(&tmplen, content, content_len, tmpfile, NULL);
>  	if (err)
>  		goto done;
>  
> @@ -113,7 +113,7 @@ done:
>  
>  const struct got_error *
>  got_object_blob_file_create(struct got_object_id **id, FILE **blobfile,
> -    const char *ondisk_path)
> +    off_t *blobsize, const char *ondisk_path)
>  {
>  	const struct got_error *err = NULL;
>  	char *header = NULL;
> @@ -124,6 +124,7 @@ got_object_blob_file_create(struct got_object_id **id,
>  
>  	*id = NULL;
>  	*blobfile = NULL;
> +	*blobsize = 0;
>  
>  	SHA1Init(&sha1_ctx);
>  
> @@ -160,6 +161,7 @@ got_object_blob_file_create(struct got_object_id **id,
>  		err = got_ferror(*blobfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	*blobsize += headerlen;
>  	for (;;) {
>  		char buf[PATH_MAX * 8];
>  		ssize_t inlen;
> @@ -185,6 +187,7 @@ got_object_blob_file_create(struct got_object_id **id,
>  			err = got_ferror(*blobfile, GOT_ERR_IO);
>  			goto done;
>  		}
> +		*blobsize += n;
>  		if (S_ISLNK(sb.st_mode))
>  			break;
>  	}
> @@ -222,12 +225,14 @@ got_object_blob_create(struct got_object_id **id, cons
>  {
>  	const struct got_error *err = NULL;
>  	FILE *blobfile = NULL;
> +	off_t blobsize;
>  
> -	err = got_object_blob_file_create(id, &blobfile, ondisk_path);
> +	err = got_object_blob_file_create(id, &blobfile, &blobsize,
> +	    ondisk_path);
>  	if (err)
>  		return err;
>  
> -	err = create_object_file(*id, blobfile, repo);
> +	err = create_object_file(*id, blobfile, blobsize, repo);
>  	if (fclose(blobfile) == EOF && err == NULL)
>  		err = got_error_from_errno("fclose");
>  	if (err) {
> @@ -299,6 +304,7 @@ got_object_tree_create(struct got_object_id **id,
>  	char *header = NULL;
>  	size_t headerlen, len = 0, n;
>  	FILE *treefile = NULL;
> +	off_t treesize = 0;
>  	struct got_pathlist_entry *pe;
>  	struct got_tree_entry **sorted_entries;
>  	struct got_tree_entry *te;
> @@ -345,6 +351,7 @@ got_object_tree_create(struct got_object_id **id,
>  		err = got_ferror(treefile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	treesize += headerlen;
>  
>  	for (i = 0; i < nentries; i++) {
>  		te = sorted_entries[i];
> @@ -358,6 +365,7 @@ got_object_tree_create(struct got_object_id **id,
>  			goto done;
>  		}
>  		SHA1Update(&sha1_ctx, modebuf, len);
> +		treesize += n;
>  
>  		len = strlen(te->name) + 1; /* must include NUL */
>  		n = fwrite(te->name, 1, len, treefile);
> @@ -366,6 +374,7 @@ got_object_tree_create(struct got_object_id **id,
>  			goto done;
>  		}
>  		SHA1Update(&sha1_ctx, te->name, len);
> +		treesize += n;
>  
>  		len = SHA1_DIGEST_LENGTH;
>  		n = fwrite(te->id.sha1, 1, len, treefile);
> @@ -374,6 +383,7 @@ got_object_tree_create(struct got_object_id **id,
>  			goto done;
>  		}
>  		SHA1Update(&sha1_ctx, te->id.sha1, len);
> +		treesize += n;
>  	}
>  
>  	*id = malloc(sizeof(**id));
> @@ -389,7 +399,7 @@ got_object_tree_create(struct got_object_id **id,
>  	}
>  	rewind(treefile);
>  
> -	err = create_object_file(*id, treefile, repo);
> +	err = create_object_file(*id, treefile, treesize, repo);
>  done:
>  	free(header);
>  	free(sorted_entries);
> @@ -416,6 +426,7 @@ got_object_commit_create(struct got_object_id **id,
>  	char *id_str = NULL;
>  	size_t headerlen, len = 0, n;
>  	FILE *commitfile = NULL;
> +	off_t commitsize = 0;
>  	struct got_object_qid *qid;
>  	char *msg0, *msg;
>  
> @@ -471,6 +482,7 @@ got_object_commit_create(struct got_object_id **id,
>  		err = got_ferror(commitfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	commitsize += headerlen;
>  
>  	err = got_object_id_str(&id_str, tree_id);
>  	if (err)
> @@ -487,6 +499,7 @@ got_object_commit_create(struct got_object_id **id,
>  		err = got_ferror(commitfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	commitsize += n;
>  
>  	if (parent_ids) {
>  		free(id_str);
> @@ -510,6 +523,7 @@ got_object_commit_create(struct got_object_id **id,
>  				free(parent_str);
>  				goto done;
>  			}
> +			commitsize += n;
>  			free(parent_str);
>  			free(id_str);
>  			id_str = NULL;
> @@ -523,6 +537,7 @@ got_object_commit_create(struct got_object_id **id,
>  		err = got_ferror(commitfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	commitsize += n;
>  
>  	len = strlen(committer_str);
>  	SHA1Update(&sha1_ctx, committer_str, len);
> @@ -531,6 +546,7 @@ got_object_commit_create(struct got_object_id **id,
>  		err = got_ferror(commitfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	commitsize += n;
>  
>  	SHA1Update(&sha1_ctx, "\n", 1);
>  	n = fwrite("\n", 1, 1, commitfile);
> @@ -538,6 +554,7 @@ got_object_commit_create(struct got_object_id **id,
>  		err = got_ferror(commitfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	commitsize += n;
>  
>  	len = strlen(msg);
>  	SHA1Update(&sha1_ctx, msg, len);
> @@ -546,6 +563,7 @@ got_object_commit_create(struct got_object_id **id,
>  		err = got_ferror(commitfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	commitsize += n;
>  
>  	SHA1Update(&sha1_ctx, "\n", 1);
>  	n = fwrite("\n", 1, 1, commitfile);
> @@ -553,6 +571,7 @@ got_object_commit_create(struct got_object_id **id,
>  		err = got_ferror(commitfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	commitsize += n;
>  
>  	*id = malloc(sizeof(**id));
>  	if (*id == NULL) {
> @@ -567,7 +586,7 @@ got_object_commit_create(struct got_object_id **id,
>  	}
>  	rewind(commitfile);
>  
> -	err = create_object_file(*id, commitfile, repo);
> +	err = create_object_file(*id, commitfile, commitsize, repo);
>  done:
>  	free(id_str);
>  	free(msg0);
> @@ -596,6 +615,7 @@ got_object_tag_create(struct got_object_id **id,
>  	char *id_str = NULL, *obj_str = NULL, *type_str = NULL;
>  	size_t headerlen, len = 0, n;
>  	FILE *tagfile = NULL;
> +	off_t tagsize = 0;
>  	char *msg0 = NULL, *msg;
>  	const char *obj_type_str;
>  	int obj_type;
> @@ -681,6 +701,7 @@ got_object_tag_create(struct got_object_id **id,
>  		err = got_ferror(tagfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	tagsize += headerlen;
>  	len = strlen(obj_str);
>  	SHA1Update(&sha1_ctx, obj_str, len);
>  	n = fwrite(obj_str, 1, len, tagfile);
> @@ -688,6 +709,7 @@ got_object_tag_create(struct got_object_id **id,
>  		err = got_ferror(tagfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	tagsize += n;
>  	len = strlen(type_str);
>  	SHA1Update(&sha1_ctx, type_str, len);
>  	n = fwrite(type_str, 1, len, tagfile);
> @@ -695,6 +717,7 @@ got_object_tag_create(struct got_object_id **id,
>  		err = got_ferror(tagfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	tagsize += n;
>  
>  	len = strlen(tag_str);
>  	SHA1Update(&sha1_ctx, tag_str, len);
> @@ -703,6 +726,7 @@ got_object_tag_create(struct got_object_id **id,
>  		err = got_ferror(tagfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	tagsize += n;
>  
>  	len = strlen(tagger_str);
>  	SHA1Update(&sha1_ctx, tagger_str, len);
> @@ -711,6 +735,7 @@ got_object_tag_create(struct got_object_id **id,
>  		err = got_ferror(tagfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	tagsize += n;
>  
>  	SHA1Update(&sha1_ctx, "\n", 1);
>  	n = fwrite("\n", 1, 1, tagfile);
> @@ -718,6 +743,7 @@ got_object_tag_create(struct got_object_id **id,
>  		err = got_ferror(tagfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	tagsize += n;
>  
>  	len = strlen(msg);
>  	SHA1Update(&sha1_ctx, msg, len);
> @@ -726,6 +752,7 @@ got_object_tag_create(struct got_object_id **id,
>  		err = got_ferror(tagfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	tagsize += n;
>  
>  	SHA1Update(&sha1_ctx, "\n", 1);
>  	n = fwrite("\n", 1, 1, tagfile);
> @@ -733,6 +760,7 @@ got_object_tag_create(struct got_object_id **id,
>  		err = got_ferror(tagfile, GOT_ERR_IO);
>  		goto done;
>  	}
> +	tagsize += n;
>  
>  	*id = malloc(sizeof(**id));
>  	if (*id == NULL) {
> @@ -747,7 +775,7 @@ got_object_tag_create(struct got_object_id **id,
>  	}
>  	rewind(tagfile);
>  
> -	err = create_object_file(*id, tagfile, repo);
> +	err = create_object_file(*id, tagfile, tagsize, repo);
>  done:
>  	free(msg0);
>  	free(header);
> blob - 8a3d8a8fd7f7eb29a2707c38e554ff6a37b63685
> blob + f4c9f4cd1e5f2d7ec2bb640a2745e9ef11b8f29a
> --- lib/pack_create.c
> +++ lib/pack_create.c
> @@ -1385,11 +1385,10 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
>  	SHA1_CTX ctx;
>  	struct got_pack_meta *m;
>  	struct got_raw_object *raw = NULL;
> -	FILE *delta_file = NULL;
>  	char buf[32];
> -	size_t outlen, n;
> +	size_t n;
>  	struct got_deflate_checksum csum;
> -	off_t packfile_size = 0;
> +	off_t outlen, packfile_size = 0;
>  	int outfd = -1;
>  
>  	SHA1Init(&ctx);
> @@ -1440,7 +1439,7 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
>  					goto done;
>  				}
>  				err = got_deflate_to_file(&outlen, raw->f,
> -				    packfile, &csum);
> +				    raw->size, packfile, &csum);
>  				if (err)
>  					goto done;
>  			}
> @@ -1460,57 +1459,17 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
>  			free(m->delta_buf);
>  			m->delta_buf = NULL;
>  		} else {
> -			off_t remain;
> -			if (delta_file == NULL) {
> -				delta_file = got_opentemp();
> -				if (delta_file == NULL) {
> -					err = got_error_from_errno(
> -					    "got_opentemp");
> -					goto done;
> -				}
> -			}
> -			if (ftruncate(fileno(delta_file), 0L) == -1) {
> -				err = got_error_from_errno("ftruncate");
> -				goto done;
> -			}
> -			if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
> -				err = got_error_from_errno("fseeko");
> -				goto done;
> -			}
>  			if (fseeko(delta_cache, m->delta_offset, SEEK_SET)
>  			    == -1) {
>  				err = got_error_from_errno("fseeko");
>  				goto done;
>  			}
> -			remain = m->delta_len;
> -			while (remain > 0) {
> -				char delta_buf[8192];
> -				size_t r, w, n;
> -				n = MIN(remain, sizeof(delta_buf));
> -				r = fread(delta_buf, 1, n, delta_cache);
> -				if (r != n) {
> -					err = got_ferror(delta_cache,
> -					    GOT_ERR_IO);
> -					goto done;
> -				}
> -				w = fwrite(delta_buf, 1, n, delta_file);
> -				if (w != n) {
> -					err = got_ferror(delta_file,
> -					    GOT_ERR_IO);
> -					goto done;
> -				}
> -				remain -= n;
> -			}
>  			err = deltahdr(&packfile_size, &ctx, packfile,
>  			    m, use_offset_deltas);
>  			if (err)
>  				goto done;
> -			if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
> -				err = got_error_from_errno("fseeko");
> -				goto done;
> -			}
> -			err = got_deflate_to_file(&outlen, delta_file,
> -			    packfile, &csum);
> +			err = got_deflate_to_file(&outlen, delta_cache,
> +			    m->delta_len, packfile, &csum);
>  			if (err)
>  				goto done;
>  			packfile_size += outlen;
> @@ -1529,8 +1488,6 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
>  			goto done;
>  	}
>  done:
> -	if (delta_file && fclose(delta_file) == EOF && err == NULL)
> -		err = got_error_from_errno("fclose");
>  	if (raw)
>  		got_object_raw_close(raw);
>  	if (outfd != -1 && close(outfd) == -1 && err == NULL)
> blob - dba36bee48eede3aca73746d5501b4565bbddf64
> blob + 0f78ee740bd078fd66df140aa19f2527ef430e0f
> --- lib/worktree.c
> +++ lib/worktree.c
> @@ -2884,11 +2884,13 @@ merge_file_cb(void *arg, struct got_blob_object *blob1
>  		case GOT_STATUS_ADD: {
>  			struct got_object_id *id;
>  			FILE *blob1_f;
> +			off_t blob1_size;
>  			/*
>  			 * Delete the added file only if its content already
>  			 * exists in the repository.
>  			 */
> -			err = got_object_blob_file_create(&id, &blob1_f, path1);
> +			err = got_object_blob_file_create(&id, &blob1_f,
> +			    &blob1_size, path1);
>  			if (err)
>  				goto done;
>  			if (got_object_id_cmp(id, id1) == 0) {
> blob - d2b9ef37e4422a8ab8e017e17df21a7c6f3be7e8
> blob + 34d74d26fefb2a4461a2c489cade5dcd7ebfcd66
> --- regress/cmdline/commit.sh
> +++ regress/cmdline/commit.sh
> @@ -1458,6 +1458,54 @@ EOF
>  	test_done "$testroot" "$ret"
>  }
>  
> +test_commit_large_file() {
> +	local testroot=`test_init commit_large_file`
> +
> +	got checkout $testroot/repo $testroot/wt > /dev/null
> +	ret="$?"
> +	if [ "$ret" != "0" ]; then
> +		test_done "$testroot" "$ret"
> +		return 1
> +	fi
> +
> +	dd status=none if=/dev/zero of=$testroot/wt/new bs=1m count=64
> +	(cd $testroot/wt && got add new >/dev/null)
> +
> +	(cd $testroot/wt && got commit -m 'test commit_large_file' \
> +		> $testroot/stdout)
> +
> +	local head_rev=`git_show_head $testroot/repo`
> +	echo "A  new" > $testroot/stdout.expected
> +	echo "Created commit $head_rev" >> $testroot/stdout.expected
> +
> +	cmp -s $testroot/stdout.expected $testroot/stdout
> +	ret="$?"
> +	if [ "$ret" != "0" ]; then
> +		diff -u $testroot/stdout.expected $testroot/stdout
> +		test_done "$testroot" "$ret"
> +		return 1
> +	fi
> +
> +	new_id=`get_blob_id $testroot/repo "" new`
> +	got cat -r $testroot/repo $new_id > $testroot/new
> +	ret="$?"
> +	if [ "$ret" != "0" ]; then
> +		echo "commit failed unexpectedly" >&2
> +		test_done "$testroot" "1"
> +		return 1
> +	fi
> +
> +	cmp -s $testroot/new $testroot/wt/new
> +	ret="$?"
> +	if [ "$ret" != "0" ]; then
> +		diff -u $testroot/new $testroot/wt/new
> +	fi
> +	test_done "$testroot" "$ret"
> +
> +
> +}
> +
> +
>  test_parseargs "$@"
>  run_test test_commit_basic
>  run_test test_commit_new_subdir
> @@ -1484,3 +1532,4 @@ run_test test_commit_with_unrelated_submodule
>  run_test test_commit_symlink
>  run_test test_commit_fix_bad_symlink
>  run_test test_commit_prepared_logmsg
> +run_test test_commit_large_file
> 
>