"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: let gotadmin pack map files into memory
To:
gameoftrees@openbsd.org
Date:
Sat, 8 Jan 2022 00:35:35 +0100

Download raw body.

Thread
On Fri, Dec 31, 2021 at 08:10:22PM +0100, Stefan Sperling wrote:
> On Fri, Dec 31, 2021 at 05:50:43PM +0100, Stefan Sperling wrote:
> > Computing deltas between files involves a lot of seeking around in
> > order to search for matching blocks. With this patch we bypass the
> > stdio layer for files which are small enough to be mapped into memory,
> > while keeping the stdio case as a fallback very large files.
> 
> Please ignore this patch for now.
> It causes bad object errors from 'git fsck' after 'gotadmin pack -a'.

Here is a fixed version of this patch.

diff c5e958c75b5aa4670baf12e377e4039999236aea aedf7836dfba3485346e9bc11662cd990ac3ea2c
blob - df0038ffc2f2db7feca6d9cc6ba518f9df340906
blob + 52c3d2577c7b0308e62c7ff466152e61a38f3c50
--- lib/deflate.c
+++ lib/deflate.c
@@ -128,6 +128,46 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, 
 	return NULL;
 }
 
+const struct got_error *
+got_deflate_read_mmap(struct got_deflate_buf *zb, uint8_t *map, size_t offset,
+    size_t len, size_t *outlenp, size_t *consumed)
+{
+	z_stream *z = &zb->z;
+	size_t last_total_out = z->total_out;
+	int ret = Z_ERRNO;
+
+	z->next_out = zb->outbuf;
+	z->avail_out = zb->outlen;
+
+	*outlenp = 0;
+	*consumed = 0;
+	do {
+		size_t last_total_in = z->total_in;
+		if (z->avail_in == 0) {
+			z->next_in = map + offset + *consumed;
+			z->avail_in = len - *consumed;
+			if (z->avail_in == 0) {
+				/* EOF */
+				ret = deflate(z, Z_FINISH);
+				break;
+			}
+		}
+		ret = deflate(z, Z_NO_FLUSH);
+		*consumed += z->total_in - last_total_in;
+	} while (ret == Z_OK && z->avail_out > 0);
+
+	if (ret == Z_OK) {
+		zb->flags |= GOT_DEFLATE_F_HAVE_MORE;
+	} else {
+		if (ret != Z_STREAM_END)
+			return got_error(GOT_ERR_COMPRESSION);
+		zb->flags &= ~GOT_DEFLATE_F_HAVE_MORE;
+	}
+
+	*outlenp = z->total_out - last_total_out;
+	return NULL;
+}
+
 void
 got_deflate_end(struct got_deflate_buf *zb)
 {
@@ -172,3 +212,41 @@ done:
 	got_deflate_end(&zb);
 	return err;
 }
+
+const struct got_error *
+got_deflate_to_file_mmap(size_t *outlen, uint8_t *map, size_t offset,
+    size_t len, FILE *outfile, struct got_deflate_checksum *csum)
+{
+	const struct got_error *err;
+	size_t avail, consumed;
+	struct got_deflate_buf zb;
+
+	err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE);
+	if (err)
+		goto done;
+
+	*outlen = 0;
+	do {
+		err = got_deflate_read_mmap(&zb, map, offset, len, &avail,
+		    &consumed);
+		if (err)
+			goto done;
+		offset += consumed;
+		len -= consumed;
+		if (avail > 0) {
+			size_t n;
+			n = fwrite(zb.outbuf, avail, 1, outfile);
+			if (n != 1) {
+				err = got_ferror(outfile, GOT_ERR_IO);
+				goto done;
+			}
+			if (csum)
+				csum_output(csum, zb.outbuf, avail);
+			*outlen += avail;
+		}
+	} while (zb.flags & GOT_DEFLATE_F_HAVE_MORE);
+
+done:
+	got_deflate_end(&zb);
+	return err;
+}
blob - b7d25c92ed71ded6a1b1729d2519e6d8daa10393
blob + 0285cfc40e99bc160a29bfb3df79477d73b75cf4
--- lib/deltify.c
+++ lib/deltify.c
@@ -179,6 +179,70 @@ addblk(struct got_delta_table *dt, FILE *f, off_t file
 }
 
 static const struct got_error *
+addblk_mem(struct got_delta_table *dt, uint8_t *data, off_t file_offset0,
+    off_t len, off_t offset, uint64_t h)
+{
+	const struct got_error *err = NULL;
+	int i;
+	uint8_t *block1;
+	uint8_t *block2;
+
+	if (len == 0)
+		return NULL;
+
+	i = h % dt->nalloc;
+	while (dt->blocks[i].len != 0) {
+		/*
+		 * Avoid adding duplicate blocks.
+		 * NB: A matching hash is insufficient for detecting equality.
+		 * The hash can only detect inequality.
+		 */
+		if (len == dt->blocks[i].len && h == dt->blocks[i].hash) {
+			block1 = data + file_offset0 + dt->blocks[i].offset;
+			block2 = data + file_offset0 + offset;
+			if (memcmp(block1, block2, len) == 0)
+				return NULL;
+		}
+
+		i = (i + 1) % dt->nalloc;
+	}
+	assert(dt->blocks[i].len == 0);
+	dt->blocks[i].len = len;
+	dt->blocks[i].offset = offset;
+	dt->blocks[i].hash = h;
+	dt->nblocks++;
+	if (dt->nalloc < dt->nblocks + 64) {
+		struct got_delta_block *db;
+		size_t old_size = dt->nalloc;
+		db = dt->blocks;
+		dt->blocks = calloc(dt->nalloc + 64,
+		    sizeof(struct got_delta_block));
+		if (dt->blocks == NULL) {
+			err = got_error_from_errno("calloc");
+			dt->blocks = db;
+			return err;
+		}
+		dt->nalloc += 64;
+		/*
+		 * Recompute all block positions. Hash-based indices of blocks
+		 * in the array depend on the allocated length of the array.
+		 */
+		dt->nblocks = 0;
+		for (i = 0; i < old_size; i++) {
+			if (db[i].len == 0)
+				continue;
+			err = addblk_mem(dt, data, file_offset0, db[i].len,
+			    db[i].offset, db[i].hash);
+			if (err)
+				break;
+		}
+		free(db);
+	}
+
+	return err;
+}
+
+static const struct got_error *
 lookupblk(struct got_delta_block **block, struct got_delta_table *dt,
     unsigned char *p, off_t len, FILE *basefile, off_t basefile_offset0)
 {
@@ -210,6 +274,31 @@ lookupblk(struct got_delta_block **block, struct got_d
 }
 
 static const struct got_error *
+lookupblk_mem(struct got_delta_block **block, struct got_delta_table *dt,
+    unsigned char *p, off_t len, uint8_t *basedata, off_t basefile_offset0)
+{
+	int i;
+	uint64_t h;
+	uint8_t *b;
+
+	*block = NULL;
+
+	h = hashblk(p, len);
+	for (i = h % dt->nalloc; dt->blocks[i].len != 0;
+	     i = (i + 1) % dt->nalloc) {
+		if (dt->blocks[i].hash != h ||
+		    dt->blocks[i].len != len)
+			continue;
+		b = basedata + basefile_offset0 + dt->blocks[i].offset;
+		if (memcmp(p, b, len) == 0) {
+			*block = &dt->blocks[i];
+			break;
+		}
+	}
+	return NULL;
+}
+
+static const struct got_error *
 nextblk(uint8_t *buf, off_t *blocklen, FILE *f)
 {
 	uint32_t gh;
@@ -241,6 +330,31 @@ nextblk(uint8_t *buf, off_t *blocklen, FILE *f)
 	return NULL;
 }
 
+static const struct got_error *
+nextblk_mem(off_t *blocklen, uint8_t *data, off_t fileoffset, off_t filesize)
+{
+	uint32_t gh;
+	const unsigned char *p;
+
+	*blocklen = 0;
+
+	if (fileoffset >= filesize ||
+	    filesize - fileoffset < GOT_DELTIFY_MINCHUNK)
+		return NULL; /* no more delta-worthy blocks left */
+
+	/* Got a deltifiable block. Find the split-point where it ends. */
+	p = data + fileoffset + GOT_DELTIFY_MINCHUNK;
+	gh = 0;
+	while (p != data + MIN(fileoffset + GOT_DELTIFY_MAXCHUNK, filesize)) {
+		gh = (gh << 1) + geartab[*p++];
+		if ((gh & GOT_DELTIFY_SPLITMASK) == 0)
+			break;
+	}
+
+	*blocklen = (p - (data + fileoffset));
+	return NULL;
+}
+
 const struct got_error *
 got_deltify_init(struct got_delta_table **dt, FILE *f, off_t fileoffset,
     off_t filesize)
@@ -291,6 +405,50 @@ done:
 	return err;
 }
 
+const struct got_error *
+got_deltify_init_mem(struct got_delta_table **dt, uint8_t *data,
+    off_t fileoffset, off_t filesize)
+{
+	const struct got_error *err = NULL;
+	uint64_t h;
+	const off_t offset0 = fileoffset;
+
+	*dt = calloc(1, sizeof(**dt));
+	if (*dt == NULL)
+		return got_error_from_errno("calloc");
+
+	(*dt)->nblocks = 0;
+	(*dt)->nalloc = 128;
+	(*dt)->blocks = calloc((*dt)->nalloc, sizeof(struct got_delta_block));
+	if ((*dt)->blocks == NULL) {
+		err = got_error_from_errno("calloc");
+		goto done;
+	}
+
+	while (fileoffset < filesize) {
+		off_t blocklen;
+		err = nextblk_mem(&blocklen, data, fileoffset, filesize);
+		if (err)
+			goto done;
+		if (blocklen == 0)
+			break;
+		h = hashblk(data + fileoffset, blocklen);
+		err = addblk_mem(*dt, data, offset0, blocklen,
+		    fileoffset - offset0, h);
+		if (err)
+			goto done;
+		fileoffset += blocklen;
+	}
+done:
+	if (err) {
+		free((*dt)->blocks);
+		free(*dt);
+		*dt = NULL;
+	}
+
+	return err;
+}
+
 void
 got_deltify_free(struct got_delta_table *dt)
 {
@@ -359,6 +517,115 @@ stretchblk(FILE *basefile, off_t base_offset0, struct 
 	return NULL;
 }
 
+static const struct got_error *
+stretchblk_file_mem(uint8_t *basedata, off_t base_offset0, off_t basefile_size,
+     struct got_delta_block *block, FILE *f, off_t filesize, off_t *blocklen)
+{
+	uint8_t buf[GOT_DELTIFY_MAXCHUNK];
+	size_t r, i;
+	int buf_equal = 1;
+	off_t base_offset = base_offset0 + block->offset + *blocklen;
+
+	if (base_offset > basefile_size) {
+		return got_error_fmt(GOT_ERR_RANGE,
+		    "read beyond the size of delta base at offset %llu",
+		    base_offset);
+	}
+
+	while (buf_equal && *blocklen < (1 << 24) - 1) {
+		if (base_offset + *blocklen >= basefile_size)
+			break;
+		r = fread(buf, 1, sizeof(buf), f);
+		if (r == 0) {
+			if (ferror(f))
+				return got_ferror(f, GOT_ERR_IO);
+			break;
+		}
+		for (i = 0; i < MIN(basefile_size - base_offset, r); i++) {
+			if (buf[i] != *(basedata + base_offset + i)) {
+				buf_equal = 0;
+				break;
+			}
+			(*blocklen)++;
+		}
+	}
+
+	return NULL;
+}
+
+static const struct got_error *
+stretchblk_mem_file(FILE *basefile, off_t base_offset0,
+    struct got_delta_block *block, uint8_t *data, off_t fileoffset,
+    off_t filesize, off_t *blocklen)
+{
+	uint8_t basebuf[GOT_DELTIFY_MAXCHUNK];
+	size_t base_r, i;
+	int buf_equal = 1;
+
+	if (fileoffset > filesize) {
+		return got_error_fmt(GOT_ERR_RANGE,
+		    "read beyond the size of deltify file at offset %llu",
+		    fileoffset);
+	}
+
+	if (fseeko(basefile, base_offset0 + block->offset + *blocklen,
+	    SEEK_SET) == -1)
+		return got_error_from_errno("fseeko");
+
+	while (buf_equal && *blocklen < (1 << 24) - 1) {
+		if (fileoffset + *blocklen >= filesize)
+			break;
+		base_r = fread(basebuf, 1, sizeof(basebuf), basefile);
+		if (base_r == 0) {
+			if (ferror(basefile))
+				return got_ferror(basefile, GOT_ERR_IO);
+			break;
+		}
+		for (i = 0; i < MIN(base_r, filesize - fileoffset); i++) {
+			if (*(data + fileoffset + i) != basebuf[i]) {
+				buf_equal = 0;
+				break;
+			}
+			(*blocklen)++;
+		}
+	}
+
+	return NULL;
+}
+
+static const struct got_error *
+stretchblk_mem_mem(uint8_t *basedata, off_t base_offset0, off_t basefile_size,
+    struct got_delta_block *block, uint8_t *data, off_t fileoffset,
+    off_t filesize, off_t *blocklen)
+{
+	off_t i, maxlen;
+	off_t base_offset = base_offset0 + block->offset + *blocklen;
+	uint8_t *p, *q;
+
+	if (base_offset > basefile_size) {
+		return got_error_fmt(GOT_ERR_RANGE,
+		    "read beyond the size of delta base at offset %llu",
+		    base_offset);
+	}
+
+	if (fileoffset > filesize) {
+		return got_error_fmt(GOT_ERR_RANGE,
+		    "read beyond the size of deltify file at offset %llu",
+		    fileoffset);
+	}
+
+	p = data + fileoffset;
+	q = basedata + base_offset;
+	maxlen = MIN(basefile_size - base_offset, filesize - fileoffset);
+	for (i = 0; i < maxlen && *blocklen < (1 << 24) - 1; i++) {
+		if (p[i] != q[i])
+			break;
+		(*blocklen)++;
+	}
+
+	return NULL;
+}
+
 const struct got_error *
 got_deltify(struct got_delta_instruction **deltas, int *ndeltas,
     FILE *f, off_t fileoffset, off_t filesize,
@@ -445,3 +712,240 @@ got_deltify(struct got_delta_instruction **deltas, int
 	}
 	return err;
 }
+
+const struct got_error *
+got_deltify_file_mem(struct got_delta_instruction **deltas, int *ndeltas,
+    FILE *f, off_t fileoffset, off_t filesize,
+    struct got_delta_table *dt, uint8_t *basedata,
+    off_t basefile_offset0, off_t basefile_size)
+{
+	const struct got_error *err = NULL;
+	const off_t offset0 = fileoffset;
+	size_t nalloc = 0;
+	const size_t alloc_chunk_size = 64;
+
+	*deltas = NULL;
+	*ndeltas = 0;
+
+	/*
+	 * offset0 indicates where data to be deltified begins.
+	 * For example, we want to avoid deltifying a Git object header at
+	 * the beginning of the file.
+	 */
+	if (fseeko(f, offset0, SEEK_SET) == -1)
+		return got_error_from_errno("fseeko");
+
+	*deltas = reallocarray(NULL, alloc_chunk_size,
+	    sizeof(struct got_delta_instruction));
+	if (*deltas == NULL)
+		return got_error_from_errno("reallocarray");
+	nalloc = alloc_chunk_size;
+
+	while (fileoffset < filesize) {
+		uint8_t buf[GOT_DELTIFY_MAXCHUNK];
+		off_t blocklen;
+		struct got_delta_block *block;
+		err = nextblk(buf, &blocklen, f);
+		if (err)
+			break;
+		if (blocklen == 0) {
+			/* Source remainder from the file itself. */
+			if (fileoffset < filesize) {
+				err = emitdelta(deltas, &nalloc, ndeltas,
+				    alloc_chunk_size, 0, fileoffset - offset0,
+				    filesize - fileoffset);
+			}
+			break;
+		}
+		err = lookupblk_mem(&block, dt, buf, blocklen, basedata,
+		    basefile_offset0);
+		if (err)
+			break;
+		if (block != NULL) {
+			/*
+			 * We have found a matching block in the delta base.
+			 * Attempt to stretch the block as far as possible and
+			 * generate a copy instruction.
+			 */
+			err = stretchblk_file_mem(basedata, basefile_offset0,
+			    basefile_size, block, f, filesize, &blocklen);
+			if (err)
+				break;
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 1, block->offset, blocklen);
+			if (err)
+				break;
+		} else {
+			/*
+			 * No match.
+			 * This block needs to be sourced from the file itself.
+			 */
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 0, fileoffset - offset0, blocklen);
+			if (err)
+				break;
+		}
+		fileoffset += blocklen;
+		if (fseeko(f, fileoffset, SEEK_SET) == -1) {
+			err = got_error_from_errno("fseeko");
+			break;
+		}
+	}
+
+	if (err) {
+		free(*deltas);
+		*deltas = NULL;
+		*ndeltas = 0;
+	}
+	return err;
+}
+
+const struct got_error *
+got_deltify_mem_file(struct got_delta_instruction **deltas, int *ndeltas,
+    uint8_t *data, off_t fileoffset, off_t filesize,
+    struct got_delta_table *dt, FILE *basefile,
+    off_t basefile_offset0, off_t basefile_size)
+{
+	const struct got_error *err = NULL;
+	const off_t offset0 = fileoffset;
+	size_t nalloc = 0;
+	const size_t alloc_chunk_size = 64;
+
+	*deltas = NULL;
+	*ndeltas = 0;
+
+	*deltas = reallocarray(NULL, alloc_chunk_size,
+	    sizeof(struct got_delta_instruction));
+	if (*deltas == NULL)
+		return got_error_from_errno("reallocarray");
+	nalloc = alloc_chunk_size;
+
+	while (fileoffset < filesize) {
+		off_t blocklen;
+		struct got_delta_block *block;
+		err = nextblk_mem(&blocklen, data, fileoffset, filesize);
+		if (err)
+			break;
+		if (blocklen == 0) {
+			/* Source remainder from the file itself. */
+			if (fileoffset < filesize) {
+				err = emitdelta(deltas, &nalloc, ndeltas,
+				    alloc_chunk_size, 0, fileoffset - offset0,
+				    filesize - fileoffset);
+			}
+			break;
+		}
+		err = lookupblk(&block, dt, data + fileoffset, blocklen,
+		    basefile, basefile_offset0);
+		if (err)
+			break;
+		if (block != NULL) {
+			/*
+			 * We have found a matching block in the delta base.
+			 * Attempt to stretch the block as far as possible and
+			 * generate a copy instruction.
+			 */
+			err = stretchblk_mem_file(basefile, basefile_offset0,
+			    block, data, fileoffset + blocklen, filesize,
+			    &blocklen);
+			if (err)
+				break;
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 1, block->offset, blocklen);
+			if (err)
+				break;
+		} else {
+			/*
+			 * No match.
+			 * This block needs to be sourced from the file itself.
+			 */
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 0, fileoffset - offset0, blocklen);
+			if (err)
+				break;
+		}
+		fileoffset += blocklen;
+	}
+
+	if (err) {
+		free(*deltas);
+		*deltas = NULL;
+		*ndeltas = 0;
+	}
+	return err;
+}
+
+const struct got_error *
+got_deltify_mem_mem(struct got_delta_instruction **deltas, int *ndeltas,
+    uint8_t *data, off_t fileoffset, off_t filesize,
+    struct got_delta_table *dt, uint8_t *basedata,
+    off_t basefile_offset0, off_t basefile_size)
+{
+	const struct got_error *err = NULL;
+	const off_t offset0 = fileoffset;
+	size_t nalloc = 0;
+	const size_t alloc_chunk_size = 64;
+
+	*deltas = NULL;
+	*ndeltas = 0;
+
+	*deltas = reallocarray(NULL, alloc_chunk_size,
+	    sizeof(struct got_delta_instruction));
+	if (*deltas == NULL)
+		return got_error_from_errno("reallocarray");
+	nalloc = alloc_chunk_size;
+
+	while (fileoffset < filesize) {
+		off_t blocklen;
+		struct got_delta_block *block;
+		err = nextblk_mem(&blocklen, data, fileoffset, filesize);
+		if (err)
+			break;
+		if (blocklen == 0) {
+			/* Source remainder from the file itself. */
+			if (fileoffset < filesize) {
+				err = emitdelta(deltas, &nalloc, ndeltas,
+				    alloc_chunk_size, 0, fileoffset - offset0,
+				    filesize - fileoffset);
+			}
+			break;
+		}
+		err = lookupblk_mem(&block, dt, data + fileoffset, blocklen,
+		    basedata, basefile_offset0);
+		if (err)
+			break;
+		if (block != NULL) {
+			/*
+			 * We have found a matching block in the delta base.
+			 * Attempt to stretch the block as far as possible and
+			 * generate a copy instruction.
+			 */
+			err = stretchblk_mem_mem(basedata, basefile_offset0,
+			    basefile_size, block, data, fileoffset + blocklen,
+			    filesize, &blocklen);
+			if (err)
+				break;
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 1, block->offset, blocklen);
+			if (err)
+				break;
+		} else {
+			/*
+			 * No match.
+			 * This block needs to be sourced from the file itself.
+			 */
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 0, fileoffset - offset0, blocklen);
+			if (err)
+				break;
+		}
+		fileoffset += blocklen;
+	}
+
+	if (err) {
+		free(*deltas);
+		*deltas = NULL;
+		*ndeltas = 0;
+	}
+	return err;
+}
blob - 6eee636c9f50d9e8a6efef4d4aad6daf6373cdf1
blob + c0b4fe271d48124cc97b135feff4b79f951c6720
--- lib/got_lib_deflate.h
+++ lib/got_lib_deflate.h
@@ -42,3 +42,5 @@ const struct got_error *got_deflate_read(struct got_de
 void got_deflate_end(struct got_deflate_buf *);
 const struct got_error *got_deflate_to_file(size_t *, FILE *, FILE *,
     struct got_deflate_checksum *);
+const struct got_error *got_deflate_to_file_mmap(size_t *, uint8_t *,
+    size_t, size_t, FILE *, struct got_deflate_checksum *);
blob - 848c59979d94aa7c6bc1d69f81419df67855c141
blob + 956f6c3dab971228b22f00297f9ee0ef0ef8f894
--- lib/got_lib_deltify.h
+++ lib/got_lib_deltify.h
@@ -42,8 +42,22 @@ enum {
 
 const struct got_error *got_deltify_init(struct got_delta_table **dt, FILE *f,
     off_t fileoffset, off_t filesize);
+const struct got_error *got_deltify_init_mem(struct got_delta_table **dt,
+    uint8_t *data, off_t fileoffset, off_t filesize);
 const struct got_error *got_deltify(struct got_delta_instruction **deltas,
     int *ndeltas, FILE *f, off_t fileoffset, off_t filesize,
     struct got_delta_table *dt, FILE *basefile, off_t basefile_offset0,
     off_t basefile_size);
+const struct got_error *got_deltify_file_mem(
+    struct got_delta_instruction **deltas, int *ndeltas,
+    FILE *f, off_t fileoffset, off_t filesize, struct got_delta_table *dt,
+    uint8_t *basedata, off_t basefile_offset0, off_t basefile_size);
+const struct got_error *got_deltify_mem_file(
+    struct got_delta_instruction **deltas, int *ndeltas,
+    uint8_t *data, off_t fileoffset, off_t filesize, struct got_delta_table *dt,
+    FILE *basefile, off_t basefile_offset0, off_t basefile_size);
+const struct got_error *got_deltify_mem_mem(
+    struct got_delta_instruction **deltas, int *ndeltas,
+    uint8_t *data, off_t fileoffset, off_t filesize, struct got_delta_table *dt,
+    uint8_t *basedata, off_t basefile_offset0, off_t basefile_size);
 void got_deltify_free(struct got_delta_table *dt);
blob - 0115e7b78c440326e9e91c010a967b702ff8432f
blob + 60920514cdf4045702691ca0ad19a5d080a8893e
--- lib/got_lib_object.h
+++ lib/got_lib_object.h
@@ -36,7 +36,8 @@ struct got_object {
 };
 
 struct got_raw_object {
-	FILE *f;
+	FILE *f;		/* NULL if data buffer is being used */
+	int fd;			/* -1 unless data buffer is memory-mapped */
 	uint8_t *data;
 	off_t size;
 	size_t hdrlen;
blob - fd5a9771d547a2696e75c7663d977c2e02cb367f
blob + ce95f8795c1e303f174dcdf4d6eeb41563569e6f
--- lib/object.c
+++ lib/object.c
@@ -22,6 +22,7 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include <sys/resource.h>
+#include <sys/mman.h>
 
 #include <errno.h>
 #include <fcntl.h>
@@ -598,13 +599,9 @@ got_object_raw_open(struct got_raw_object **obj, int *
 		err = got_error_from_errno("calloc");
 		goto done;
 	}
+	(*obj)->fd = -1;
 
 	if (outbuf) {
-		(*obj)->f = fmemopen(outbuf, hdrlen + size, "r");
-		if ((*obj)->f == NULL) {
-			err = got_error_from_errno("fdopen");
-			goto done;
-		}
 		(*obj)->data = outbuf;
 	} else {
 		struct stat sb;
@@ -617,14 +614,30 @@ got_object_raw_open(struct got_raw_object **obj, int *
 			err = got_error(GOT_ERR_PRIVSEP_LEN);
 			goto done;
 		}
-
-		(*obj)->f = fdopen(*outfd, "r");
-		if ((*obj)->f == NULL) {
-			err = got_error_from_errno("fdopen");
-			goto done;
+#ifndef GOT_PACK_NO_MMAP
+		if (hdrlen + size > 0) {
+			(*obj)->data = mmap(NULL, hdrlen + size, PROT_READ,
+			    MAP_PRIVATE, *outfd, 0);
+			if ((*obj)->data == MAP_FAILED) {
+				if (errno != ENOMEM) {
+					err = got_error_from_errno("mmap");
+					goto done;
+				}
+				(*obj)->data = NULL;
+			} else {
+				(*obj)->fd = *outfd;
+				*outfd = -1;
+			}
 		}
-		(*obj)->data = NULL;
-		*outfd = -1;
+#endif
+		if (*outfd != -1) {
+			(*obj)->f = fdopen(*outfd, "r");
+			if ((*obj)->f == NULL) {
+				err = got_error_from_errno("fdopen");
+				goto done;
+			}
+			*outfd = -1;
+		}
 	}
 	(*obj)->hdrlen = hdrlen;
 	(*obj)->size = size;
blob - a92f3d54cd59722d9a59cd6d2c715b21bc4c1965
blob + b4f415940ee578ecafa1e8ceca60032674013904
--- lib/object_parse.c
+++ lib/object_parse.c
@@ -21,6 +21,7 @@
 #include <sys/uio.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
+#include <sys/mman.h>
 
 #include <errno.h>
 #include <stdio.h>
@@ -144,9 +145,18 @@ got_object_raw_close(struct got_raw_object *obj)
 			return NULL;
 	}
 
-	if (obj->f != NULL && fclose(obj->f) == EOF && err == NULL)
-		err = got_error_from_errno("fclose");
-	free(obj->data);
+	if (obj->f == NULL) {
+		if (obj->fd != -1) {
+			if (munmap(obj->data, obj->hdrlen + obj->size) == -1)
+				err = got_error_from_errno("munmap");
+			if (close(obj->fd) == -1 && err == NULL)
+				err = got_error_from_errno("close");
+		} else
+			free(obj->data);
+	} else {
+		if (fclose(obj->f) == EOF && err == NULL)
+			err = got_error_from_errno("fclose");
+	}
 	free(obj);
 	return err;
 }
blob - 884cbfab86cc8fe210c5fe53fddb3417cb415a7c
blob + 7342a8ac1b751a6ef554f7a3b5d05eed6d664933
--- lib/pack_create.c
+++ lib/pack_create.c
@@ -227,6 +227,19 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o
 			w = fwrite(buf, 1, bp - buf, f);
 			if (w != bp - buf)
 				return got_ferror(f, GOT_ERR_IO);
+		} else if (o->f == NULL) {
+			n = 0;
+			while (n != d->len) {
+				buf[0] = (d->len - n < 127) ? d->len - n : 127;
+				w = fwrite(buf, 1, 1, f);
+				if (w != 1)
+					return got_ferror(f, GOT_ERR_IO);
+				w = fwrite(o->data + o->hdrlen + d->offset + n,
+				    1, buf[0], f);
+				if (w != buf[0])
+					return got_ferror(f, GOT_ERR_IO);
+				n += buf[0];
+			}
 		} else {
 			char content[128];
 			size_t r;
@@ -307,8 +320,13 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in
 			goto done;
 		m->size = raw->size;
 
-		err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
-		    raw->size + raw->hdrlen);
+		if (raw->f == NULL) {
+			err = got_deltify_init_mem(&m->dtab, raw->data,
+			    raw->hdrlen, raw->size + raw->hdrlen);
+		} else {
+			err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
+			    raw->size + raw->hdrlen);
+		}
 		if (err)
 			goto done;
 
@@ -337,10 +355,34 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in
 			    &base->id);
 			if (err)
 				goto done;
-			err = got_deltify(&deltas, &ndeltas,
-			    raw->f, raw->hdrlen, raw->size + raw->hdrlen,
-			    base->dtab, base_raw->f, base_raw->hdrlen,
-			    base_raw->size + base_raw->hdrlen);
+			if (raw->f == NULL && base_raw->f == NULL) {
+				err = got_deltify_mem_mem(&deltas, &ndeltas,
+				    raw->data, raw->hdrlen,
+				    raw->size + raw->hdrlen,
+				    base->dtab, base_raw->data,
+				    base_raw->hdrlen,
+				    base_raw->size + base_raw->hdrlen);
+			} else if (raw->f == NULL) {
+				err = got_deltify_mem_file(&deltas, &ndeltas,
+				    raw->data, raw->hdrlen,
+				    raw->size + raw->hdrlen,
+				    base->dtab, base_raw->f,
+				    base_raw->hdrlen,
+				    base_raw->size + base_raw->hdrlen);
+			} else if (base_raw->f == NULL) {
+				err = got_deltify_file_mem(&deltas, &ndeltas,
+				    raw->f, raw->hdrlen,
+				    raw->size + raw->hdrlen,
+				    base->dtab, base_raw->data,
+				    base_raw->hdrlen,
+				    base_raw->size + base_raw->hdrlen);
+			} else {
+				err = got_deltify(&deltas, &ndeltas,
+				    raw->f, raw->hdrlen,
+				    raw->size + raw->hdrlen,
+				    base->dtab, base_raw->f, base_raw->hdrlen,
+				    base_raw->size + base_raw->hdrlen);
+			}
 			got_object_raw_close(base_raw);
 			base_raw = NULL;
 			if (err)
@@ -1205,14 +1247,23 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
 			if (err)
 				goto done;
 			packfile_size += nh;
-			if (fseeko(raw->f, raw->hdrlen, SEEK_SET) == -1) {
-				err = got_error_from_errno("fseeko");
-				goto done;
+			if (raw->f == NULL) {
+				err = got_deflate_to_file_mmap(&outlen,
+				    raw->data + raw->hdrlen, 0, raw->size,
+				    packfile, &csum);
+				if (err)
+					goto done;
+			} else {
+				if (fseeko(raw->f, raw->hdrlen, SEEK_SET)
+				    == -1) {
+					err = got_error_from_errno("fseeko");
+					goto done;
+				}
+				err = got_deflate_to_file(&outlen, raw->f,
+				    packfile, &csum);
+				if (err)
+					goto done;
 			}
-			err = got_deflate_to_file(&outlen, raw->f, packfile,
-			    &csum);
-			if (err)
-				goto done;
 			packfile_size += outlen;
 			got_object_raw_close(raw);
 			raw = NULL;
blob - 872d88379087aa49d66ebbc780ae19c92d4637e9
blob + b4e79453a7e3443c1559612f98ab36e7d8c7ca50
--- regress/deltify/deltify_test.c
+++ regress/deltify/deltify_test.c
@@ -122,6 +122,255 @@ done:
 	return (err == NULL);
 }
 
+static int
+deltify_abc_axc_file_mem(void)
+{
+	const struct got_error *err = NULL;
+	size_t i;
+	uint8_t base_data[3 * GOT_DELTIFY_MAXCHUNK];
+	FILE *derived_file, *result_file;
+	struct got_delta_table *dt;
+	struct got_delta_instruction *deltas;
+	int ndeltas;
+	int have_nblocks = 0;
+
+	derived_file = got_opentemp();
+	if (derived_file == NULL)
+		return 1;
+
+	result_file = got_opentemp();
+	if (result_file == NULL)
+		return 1;
+
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_data[i] = 'a';
+		fputc('a', derived_file);
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_data[GOT_DELTIFY_MAXCHUNK + i] = 'b';
+		fputc('x', derived_file);
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_data[2 * GOT_DELTIFY_MAXCHUNK + i] = 'c';
+		fputc('c', derived_file);
+	}
+
+	rewind(derived_file);
+
+	err = got_deltify_init_mem(&dt, base_data, 0, 3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	for (i = 0; i < dt->nalloc; i++) {
+		if (dt->blocks[i].len > 0)
+			have_nblocks++;
+	}
+	if (have_nblocks != dt->nblocks) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+	err = got_deltify_file_mem(&deltas, &ndeltas, derived_file, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK, dt, base_data, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	if (ndeltas != 3) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'aaaa...' from base file. */
+	if (!(deltas[0].copy == 1 && deltas[0].offset == 0 &&
+	    deltas[0].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'xxxx...' from derived file. */
+	if (!(deltas[1].copy == 0 && deltas[1].offset == GOT_DELTIFY_MAXCHUNK &&
+	    deltas[1].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'ccccc...' from base file. */
+	if (!(deltas[2].copy == 1 &&
+	    deltas[2].offset == 2 * GOT_DELTIFY_MAXCHUNK &&
+	    deltas[2].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+done:
+	got_deltify_free(dt);
+	fclose(derived_file);
+	fclose(result_file);
+	return (err == NULL);
+}
+
+static int
+deltify_abc_axc_mem_file(void)
+{
+	const struct got_error *err = NULL;
+	size_t i;
+	FILE *base_file, *result_file;
+	uint8_t derived_file[3 * GOT_DELTIFY_MAXCHUNK];
+	struct got_delta_table *dt;
+	struct got_delta_instruction *deltas;
+	int ndeltas;
+	int have_nblocks = 0;
+
+	base_file = got_opentemp();
+	if (base_file == NULL)
+		return 1;
+
+	result_file = got_opentemp();
+	if (result_file == NULL)
+		return 1;
+
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		fputc('a', base_file);
+		derived_file[i] = 'a';
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		fputc('b', base_file);
+		derived_file[GOT_DELTIFY_MAXCHUNK + i] = 'x';
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		fputc('c', base_file);
+		derived_file[2 * GOT_DELTIFY_MAXCHUNK + i] = 'c';
+	}
+
+	rewind(base_file);
+
+	err = got_deltify_init(&dt, base_file, 0, 3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	for (i = 0; i < dt->nalloc; i++) {
+		if (dt->blocks[i].len > 0)
+			have_nblocks++;
+	}
+	if (have_nblocks != dt->nblocks) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+	err = got_deltify_mem_file(&deltas, &ndeltas, derived_file, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK, dt, base_file, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	if (ndeltas != 3) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'aaaa...' from base file. */
+	if (!(deltas[0].copy == 1 && deltas[0].offset == 0 &&
+	    deltas[0].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'xxxx...' from derived file. */
+	if (!(deltas[1].copy == 0 && deltas[1].offset == GOT_DELTIFY_MAXCHUNK &&
+	    deltas[1].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'ccccc...' from base file. */
+	if (!(deltas[2].copy == 1 &&
+	    deltas[2].offset == 2 * GOT_DELTIFY_MAXCHUNK &&
+	    deltas[2].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+done:
+	got_deltify_free(dt);
+	fclose(base_file);
+	fclose(result_file);
+	return (err == NULL);
+}
+
+static int
+deltify_abc_axc_mem_mem(void)
+{
+	const struct got_error *err = NULL;
+	size_t i;
+	FILE *result_file;
+	uint8_t base_file[3 * GOT_DELTIFY_MAXCHUNK];
+	uint8_t derived_file[3 * GOT_DELTIFY_MAXCHUNK];
+	struct got_delta_table *dt;
+	struct got_delta_instruction *deltas;
+	int ndeltas;
+	int have_nblocks = 0;
+
+	result_file = got_opentemp();
+	if (result_file == NULL)
+		return 1;
+
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_file[i] = 'a';
+		derived_file[i] = 'a';
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_file[GOT_DELTIFY_MAXCHUNK + i] = 'b';
+		derived_file[GOT_DELTIFY_MAXCHUNK + i] = 'x';
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_file[2 * GOT_DELTIFY_MAXCHUNK + i] = 'c';
+		derived_file[2 * GOT_DELTIFY_MAXCHUNK + i] = 'c';
+	}
+
+	err = got_deltify_init_mem(&dt, base_file, 0, 3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	for (i = 0; i < dt->nalloc; i++) {
+		if (dt->blocks[i].len > 0)
+			have_nblocks++;
+	}
+	if (have_nblocks != dt->nblocks) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+	err = got_deltify_mem_mem(&deltas, &ndeltas, derived_file, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK, dt, base_file, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	if (ndeltas != 3) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'aaaa...' from base file. */
+	if (!(deltas[0].copy == 1 && deltas[0].offset == 0 &&
+	    deltas[0].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'xxxx...' from derived file. */
+	if (!(deltas[1].copy == 0 && deltas[1].offset == GOT_DELTIFY_MAXCHUNK &&
+	    deltas[1].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'ccccc...' from base file. */
+	if (!(deltas[2].copy == 1 &&
+	    deltas[2].offset == 2 * GOT_DELTIFY_MAXCHUNK &&
+	    deltas[2].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+done:
+	got_deltify_free(dt);
+	fclose(result_file);
+	return (err == NULL);
+}
+
 static int quiet;
 
 #define RUN_TEST(expr, name) \
@@ -172,6 +421,9 @@ main(int argc, char *argv[])
 		err(1, "unveil");
 
 	RUN_TEST(deltify_abc_axc(), "deltify_abc_axc");
+	RUN_TEST(deltify_abc_axc_file_mem(), "deltify_abc_axc_file_mem");
+	RUN_TEST(deltify_abc_axc_mem_file(), "deltify_abc_axc_mem_file");
+	RUN_TEST(deltify_abc_axc_mem_mem(), "deltify_abc_axc_mem_mem");
 
 	return failure ? 1 : 0;
 }