"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
let gotadmin pack map files into memory
To:
gameoftrees@openbsd.org
Date:
Fri, 31 Dec 2021 17:50:43 +0100

Download raw body.

Thread
Computing deltas between files involves a lot of seeking around in
order to search for matching blocks. With this patch we bypass the
stdio layer for files which are small enough to be mapped into memory,
while keeping the stdio case as a fallback very large files.

This also makes it possible to avoid fmemopen() for very small files
which are stored in dynamically allocated buffers. We can now use those
buffers directly and save some memcpy() overhead.

This patch by itself not improve performance when packing the got.git
repo, even though I see the new mmap code path being used.

However, moving away from files to memory will make it easier to remove
another bottleneck: According to gprof, calls to mkstemp() via
got_opentempfd() via got_object_raw_open() take up almost 25% of the
runtime of gotadmin pack. Closing unused tempfiles takes up another 10%.
We can likely avoid much of this overhead by only opening temporary
files when actually needed, and this patch paves the way.

ok?

 M  lib/deflate.c
 M  lib/deltify.c
 M  lib/got_lib_deflate.h
 M  lib/got_lib_deltify.h
 M  lib/got_lib_object.h
 M  lib/object.c
 M  lib/object_parse.c
 M  lib/pack_create.c
 M  regress/deltify/deltify_test.c

diff 2d68ca830b24b096f79e477e84749ef8d5576473 f469f2b9fee87aee8cea8e64e503ee8c75193dfb
blob - df0038ffc2f2db7feca6d9cc6ba518f9df340906
blob + cbda561b01ab171886e921ac7d5205b431c26dfa
--- lib/deflate.c
+++ lib/deflate.c
@@ -128,6 +128,46 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, 
 	return NULL;
 }
 
+const struct got_error *
+got_deflate_read_mmap(struct got_deflate_buf *zb, uint8_t *map, size_t offset,
+    size_t len, size_t *outlenp, size_t *consumed)
+{
+	size_t last_total_out = zb->z.total_out;
+	size_t last_total_in = zb->z.total_in;
+	z_stream *z = &zb->z;
+	int ret = Z_ERRNO;
+
+	z->next_out = zb->outbuf;
+	z->avail_out = zb->outlen;
+
+	*outlenp = 0;
+	*consumed = 0;
+	do {
+		if (z->avail_in == 0) {
+			z->next_in = map + offset + *consumed;
+			z->avail_in = len - *consumed;
+			if (z->avail_in == 0) {
+				/* EOF */
+				ret = deflate(z, Z_FINISH);
+				break;
+			}
+		}
+		ret = deflate(z, Z_NO_FLUSH);
+		*consumed += z->total_in - last_total_in;
+	} while (ret == Z_OK && z->avail_out > 0);
+
+	if (ret == Z_OK) {
+		zb->flags |= GOT_DEFLATE_F_HAVE_MORE;
+	} else {
+		if (ret != Z_STREAM_END)
+			return got_error(GOT_ERR_COMPRESSION);
+		zb->flags &= ~GOT_DEFLATE_F_HAVE_MORE;
+	}
+
+	*outlenp = z->total_out - last_total_out;
+	return NULL;
+}
+
 void
 got_deflate_end(struct got_deflate_buf *zb)
 {
@@ -172,3 +212,41 @@ done:
 	got_deflate_end(&zb);
 	return err;
 }
+
+const struct got_error *
+got_deflate_to_file_mmap(size_t *outlen, uint8_t *map, size_t offset,
+    size_t len, FILE *outfile, struct got_deflate_checksum *csum)
+{
+	const struct got_error *err;
+	size_t avail, consumed;
+	struct got_deflate_buf zb;
+
+	err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE);
+	if (err)
+		goto done;
+
+	*outlen = 0;
+	do {
+		err = got_deflate_read_mmap(&zb, map, offset, len, &avail,
+		    &consumed);
+		if (err)
+			goto done;
+		offset += consumed;
+		len -= consumed;
+		if (avail > 0) {
+			size_t n;
+			n = fwrite(zb.outbuf, avail, 1, outfile);
+			if (n != 1) {
+				err = got_ferror(outfile, GOT_ERR_IO);
+				goto done;
+			}
+			if (csum)
+				csum_output(csum, zb.outbuf, avail);
+			*outlen += avail;
+		}
+	} while (zb.flags & GOT_DEFLATE_F_HAVE_MORE);
+
+done:
+	got_deflate_end(&zb);
+	return err;
+}
blob - b7d25c92ed71ded6a1b1729d2519e6d8daa10393
blob + f149375588ad41520cc260aae6f0cf1e1aac1931
--- lib/deltify.c
+++ lib/deltify.c
@@ -179,6 +179,70 @@ addblk(struct got_delta_table *dt, FILE *f, off_t file
 }
 
 static const struct got_error *
+addblk_mem(struct got_delta_table *dt, uint8_t *data, off_t file_offset0,
+    off_t len, off_t offset, uint64_t h)
+{
+	const struct got_error *err = NULL;
+	int i;
+	uint8_t *block1;
+	uint8_t *block2;
+
+	if (len == 0)
+		return NULL;
+
+	i = h % dt->nalloc;
+	while (dt->blocks[i].len != 0) {
+		/*
+		 * Avoid adding duplicate blocks.
+		 * NB: A matching hash is insufficient for detecting equality.
+		 * The hash can only detect inequality.
+		 */
+		if (len == dt->blocks[i].len && h == dt->blocks[i].hash) {
+			block1 = data + file_offset0 + dt->blocks[i].offset;
+			block2 = data + file_offset0 + offset;
+			if (memcmp(block1, block2, len) == 0)
+				return NULL;
+		}
+
+		i = (i + 1) % dt->nalloc;
+	}
+	assert(dt->blocks[i].len == 0);
+	dt->blocks[i].len = len;
+	dt->blocks[i].offset = offset;
+	dt->blocks[i].hash = h;
+	dt->nblocks++;
+	if (dt->nalloc < dt->nblocks + 64) {
+		struct got_delta_block *db;
+		size_t old_size = dt->nalloc;
+		db = dt->blocks;
+		dt->blocks = calloc(dt->nalloc + 64,
+		    sizeof(struct got_delta_block));
+		if (dt->blocks == NULL) {
+			err = got_error_from_errno("calloc");
+			dt->blocks = db;
+			return err;
+		}
+		dt->nalloc += 64;
+		/*
+		 * Recompute all block positions. Hash-based indices of blocks
+		 * in the array depend on the allocated length of the array.
+		 */
+		dt->nblocks = 0;
+		for (i = 0; i < old_size; i++) {
+			if (db[i].len == 0)
+				continue;
+			err = addblk_mem(dt, data, file_offset0, db[i].len,
+			    db[i].offset, db[i].hash);
+			if (err)
+				break;
+		}
+		free(db);
+	}
+
+	return err;
+}
+
+static const struct got_error *
 lookupblk(struct got_delta_block **block, struct got_delta_table *dt,
     unsigned char *p, off_t len, FILE *basefile, off_t basefile_offset0)
 {
@@ -210,6 +274,31 @@ lookupblk(struct got_delta_block **block, struct got_d
 }
 
 static const struct got_error *
+lookupblk_mem(struct got_delta_block **block, struct got_delta_table *dt,
+    unsigned char *p, off_t len, uint8_t *basedata, off_t basefile_offset0)
+{
+	int i;
+	uint64_t h;
+	uint8_t *b;
+
+	*block = NULL;
+
+	h = hashblk(p, len);
+	for (i = h % dt->nalloc; dt->blocks[i].len != 0;
+	     i = (i + 1) % dt->nalloc) {
+		if (dt->blocks[i].hash != h ||
+		    dt->blocks[i].len != len)
+			continue;
+		b = basedata + basefile_offset0 + dt->blocks[i].offset;
+		if (memcmp(p, b, len) == 0) {
+			*block = &dt->blocks[i];
+			break;
+		}
+	}
+	return NULL;
+}
+
+static const struct got_error *
 nextblk(uint8_t *buf, off_t *blocklen, FILE *f)
 {
 	uint32_t gh;
@@ -241,6 +330,31 @@ nextblk(uint8_t *buf, off_t *blocklen, FILE *f)
 	return NULL;
 }
 
+static const struct got_error *
+nextblk_mem(off_t *blocklen, uint8_t *data, off_t fileoffset, off_t filesize)
+{
+	uint32_t gh;
+	const unsigned char *p;
+
+	*blocklen = 0;
+
+	if (fileoffset >= filesize ||
+	    filesize - fileoffset < GOT_DELTIFY_MINCHUNK)
+		return NULL; /* no more delta-worthy blocks left */
+
+	/* Got a deltifiable block. Find the split-point where it ends. */
+	p = data + fileoffset + GOT_DELTIFY_MINCHUNK;
+	gh = 0;
+	while (p != data + MIN(fileoffset + GOT_DELTIFY_MAXCHUNK, filesize)) {
+		gh = (gh << 1) + geartab[*p++];
+		if ((gh & GOT_DELTIFY_SPLITMASK) == 0)
+			break;
+	}
+
+	*blocklen = (p - (data + fileoffset));
+	return NULL;
+}
+
 const struct got_error *
 got_deltify_init(struct got_delta_table **dt, FILE *f, off_t fileoffset,
     off_t filesize)
@@ -291,6 +405,50 @@ done:
 	return err;
 }
 
+const struct got_error *
+got_deltify_init_mem(struct got_delta_table **dt, uint8_t *data,
+    off_t fileoffset, off_t filesize)
+{
+	const struct got_error *err = NULL;
+	uint64_t h;
+	const off_t offset0 = fileoffset;
+
+	*dt = calloc(1, sizeof(**dt));
+	if (*dt == NULL)
+		return got_error_from_errno("calloc");
+
+	(*dt)->nblocks = 0;
+	(*dt)->nalloc = 128;
+	(*dt)->blocks = calloc((*dt)->nalloc, sizeof(struct got_delta_block));
+	if ((*dt)->blocks == NULL) {
+		err = got_error_from_errno("calloc");
+		goto done;
+	}
+
+	while (fileoffset < filesize) {
+		off_t blocklen;
+		err = nextblk_mem(&blocklen, data, fileoffset, filesize);
+		if (err)
+			goto done;
+		if (blocklen == 0)
+			break;
+		h = hashblk(data + fileoffset, blocklen);
+		err = addblk_mem(*dt, data, offset0, blocklen,
+		    fileoffset - offset0, h);
+		if (err)
+			goto done;
+		fileoffset += blocklen;
+	}
+done:
+	if (err) {
+		free((*dt)->blocks);
+		free(*dt);
+		*dt = NULL;
+	}
+
+	return err;
+}
+
 void
 got_deltify_free(struct got_delta_table *dt)
 {
@@ -359,6 +517,121 @@ stretchblk(FILE *basefile, off_t base_offset0, struct 
 	return NULL;
 }
 
+static const struct got_error *
+stretchblk_file_mem(uint8_t *basedata, off_t base_offset0, off_t basefile_size,
+     struct got_delta_block *block, FILE *f, off_t filesize, off_t *blocklen)
+{
+	uint8_t buf[GOT_DELTIFY_MAXCHUNK];
+	size_t r, i;
+	int buf_equal = 1;
+	off_t base_offset = base_offset0 + block->offset + *blocklen;
+
+	if (base_offset > basefile_size) {
+		return got_error_fmt(GOT_ERR_RANGE,
+		    "read beyond the size of delta base at offset %llu",
+		    base_offset);
+	}
+
+	while (buf_equal && *blocklen < (1 << 24) - 1) {
+		if (base_offset + *blocklen >= basefile_size)
+			break;
+		r = fread(buf, 1, sizeof(buf), f);
+		if (r == 0) {
+			if (ferror(f))
+				return got_ferror(f, GOT_ERR_IO);
+			break;
+		}
+		for (i = 0; i < MIN(basefile_size - base_offset, r); i++) {
+			if (buf[i] != *(basedata + base_offset + i)) {
+				buf_equal = 0;
+				break;
+			}
+			(*blocklen)++;
+		}
+	}
+
+	return NULL;
+}
+
+static const struct got_error *
+stretchblk_mem_file(FILE *basefile, off_t base_offset0,
+    struct got_delta_block *block, uint8_t *data, off_t fileoffset,
+    off_t filesize, off_t *blocklen)
+{
+	uint8_t basebuf[GOT_DELTIFY_MAXCHUNK];
+	size_t base_r, i;
+	int buf_equal = 1;
+
+	if (fileoffset > filesize) {
+		return got_error_fmt(GOT_ERR_RANGE,
+		    "read beyond the size of deltify file at offset %llu",
+		    fileoffset);
+	}
+
+	if (fseeko(basefile, base_offset0 + block->offset + *blocklen,
+	    SEEK_SET) == -1)
+		return got_error_from_errno("fseeko");
+
+	while (buf_equal && *blocklen < (1 << 24) - 1) {
+		if (fileoffset + *blocklen >= filesize)
+			break;
+		base_r = fread(basebuf, 1, sizeof(basebuf), basefile);
+		if (base_r == 0) {
+			if (ferror(basefile))
+				return got_ferror(basefile, GOT_ERR_IO);
+			break;
+		}
+		for (i = 0; i < MIN(base_r, filesize - fileoffset); i++) {
+			if (*(data + fileoffset + i) != basebuf[i]) {
+				buf_equal = 0;
+				break;
+			}
+			(*blocklen)++;
+		}
+	}
+
+	return NULL;
+}
+
+static const struct got_error *
+stretchblk_mem_mem(uint8_t *basedata, off_t base_offset0, off_t basefile_size,
+    struct got_delta_block *block, uint8_t *data, off_t fileoffset,
+    off_t filesize, off_t *blocklen)
+{
+	off_t i, maxlen;
+	int buf_equal = 1;
+	off_t base_offset = base_offset0 + block->offset + *blocklen;
+
+	if (base_offset > basefile_size) {
+		return got_error_fmt(GOT_ERR_RANGE,
+		    "read beyond the size of delta base at offset %llu",
+		    base_offset);
+	}
+
+	if (fileoffset > filesize) {
+		return got_error_fmt(GOT_ERR_RANGE,
+		    "read beyond the size of deltify file at offset %llu",
+		    fileoffset);
+	}
+
+	maxlen = MIN(basefile_size - base_offset, filesize - fileoffset);
+	while (buf_equal && *blocklen < (1 << 24) - 1) {
+		if (fileoffset + *blocklen >= filesize ||
+		    base_offset + *blocklen >= basefile_size)
+			break;
+		for (i = 0; i < maxlen; i++) {
+			if (*(data + fileoffset + i) !=
+			    *(basedata + base_offset + i)) {
+				buf_equal = 0;
+				break;
+			}
+			(*blocklen)++;
+		}
+	}
+
+	return NULL;
+}
+
 const struct got_error *
 got_deltify(struct got_delta_instruction **deltas, int *ndeltas,
     FILE *f, off_t fileoffset, off_t filesize,
@@ -445,3 +718,240 @@ got_deltify(struct got_delta_instruction **deltas, int
 	}
 	return err;
 }
+
+const struct got_error *
+got_deltify_file_mem(struct got_delta_instruction **deltas, int *ndeltas,
+    FILE *f, off_t fileoffset, off_t filesize,
+    struct got_delta_table *dt, uint8_t *basedata,
+    off_t basefile_offset0, off_t basefile_size)
+{
+	const struct got_error *err = NULL;
+	const off_t offset0 = fileoffset;
+	size_t nalloc = 0;
+	const size_t alloc_chunk_size = 64;
+
+	*deltas = NULL;
+	*ndeltas = 0;
+
+	/*
+	 * offset0 indicates where data to be deltified begins.
+	 * For example, we want to avoid deltifying a Git object header at
+	 * the beginning of the file.
+	 */
+	if (fseeko(f, offset0, SEEK_SET) == -1)
+		return got_error_from_errno("fseeko");
+
+	*deltas = reallocarray(NULL, alloc_chunk_size,
+	    sizeof(struct got_delta_instruction));
+	if (*deltas == NULL)
+		return got_error_from_errno("reallocarray");
+	nalloc = alloc_chunk_size;
+
+	while (fileoffset < filesize) {
+		uint8_t buf[GOT_DELTIFY_MAXCHUNK];
+		off_t blocklen;
+		struct got_delta_block *block;
+		err = nextblk(buf, &blocklen, f);
+		if (err)
+			break;
+		if (blocklen == 0) {
+			/* Source remainder from the file itself. */
+			if (fileoffset < filesize) {
+				err = emitdelta(deltas, &nalloc, ndeltas,
+				    alloc_chunk_size, 0, fileoffset - offset0,
+				    filesize - fileoffset);
+			}
+			break;
+		}
+		err = lookupblk_mem(&block, dt, buf, blocklen, basedata,
+		    basefile_offset0);
+		if (err)
+			break;
+		if (block != NULL) {
+			/*
+			 * We have found a matching block in the delta base.
+			 * Attempt to stretch the block as far as possible and
+			 * generate a copy instruction.
+			 */
+			err = stretchblk_file_mem(basedata, basefile_offset0,
+			    basefile_size, block, f, filesize, &blocklen);
+			if (err)
+				break;
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 1, block->offset, blocklen);
+			if (err)
+				break;
+		} else {
+			/*
+			 * No match.
+			 * This block needs to be sourced from the file itself.
+			 */
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 0, fileoffset - offset0, blocklen);
+			if (err)
+				break;
+		}
+		fileoffset += blocklen;
+		if (fseeko(f, fileoffset, SEEK_SET) == -1) {
+			err = got_error_from_errno("fseeko");
+			break;
+		}
+	}
+
+	if (err) {
+		free(*deltas);
+		*deltas = NULL;
+		*ndeltas = 0;
+	}
+	return err;
+}
+
+const struct got_error *
+got_deltify_mem_file(struct got_delta_instruction **deltas, int *ndeltas,
+    uint8_t *data, off_t fileoffset, off_t filesize,
+    struct got_delta_table *dt, FILE *basefile,
+    off_t basefile_offset0, off_t basefile_size)
+{
+	const struct got_error *err = NULL;
+	const off_t offset0 = fileoffset;
+	size_t nalloc = 0;
+	const size_t alloc_chunk_size = 64;
+
+	*deltas = NULL;
+	*ndeltas = 0;
+
+	*deltas = reallocarray(NULL, alloc_chunk_size,
+	    sizeof(struct got_delta_instruction));
+	if (*deltas == NULL)
+		return got_error_from_errno("reallocarray");
+	nalloc = alloc_chunk_size;
+
+	while (fileoffset < filesize) {
+		off_t blocklen;
+		struct got_delta_block *block;
+		err = nextblk_mem(&blocklen, data, fileoffset, filesize);
+		if (err)
+			break;
+		if (blocklen == 0) {
+			/* Source remainder from the file itself. */
+			if (fileoffset < filesize) {
+				err = emitdelta(deltas, &nalloc, ndeltas,
+				    alloc_chunk_size, 0, fileoffset - offset0,
+				    filesize - fileoffset);
+			}
+			break;
+		}
+		err = lookupblk(&block, dt, data + fileoffset, blocklen,
+		    basefile, basefile_offset0);
+		if (err)
+			break;
+		if (block != NULL) {
+			/*
+			 * We have found a matching block in the delta base.
+			 * Attempt to stretch the block as far as possible and
+			 * generate a copy instruction.
+			 */
+			err = stretchblk_mem_file(basefile, basefile_offset0,
+			    block, data, fileoffset + blocklen, filesize,
+			    &blocklen);
+			if (err)
+				break;
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 1, block->offset, blocklen);
+			if (err)
+				break;
+		} else {
+			/*
+			 * No match.
+			 * This block needs to be sourced from the file itself.
+			 */
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 0, fileoffset - offset0, blocklen);
+			if (err)
+				break;
+		}
+		fileoffset += blocklen;
+	}
+
+	if (err) {
+		free(*deltas);
+		*deltas = NULL;
+		*ndeltas = 0;
+	}
+	return err;
+}
+
+const struct got_error *
+got_deltify_mem_mem(struct got_delta_instruction **deltas, int *ndeltas,
+    uint8_t *data, off_t fileoffset, off_t filesize,
+    struct got_delta_table *dt, uint8_t *basedata,
+    off_t basefile_offset0, off_t basefile_size)
+{
+	const struct got_error *err = NULL;
+	const off_t offset0 = fileoffset;
+	size_t nalloc = 0;
+	const size_t alloc_chunk_size = 64;
+
+	*deltas = NULL;
+	*ndeltas = 0;
+
+	*deltas = reallocarray(NULL, alloc_chunk_size,
+	    sizeof(struct got_delta_instruction));
+	if (*deltas == NULL)
+		return got_error_from_errno("reallocarray");
+	nalloc = alloc_chunk_size;
+
+	while (fileoffset < filesize) {
+		off_t blocklen;
+		struct got_delta_block *block;
+		err = nextblk_mem(&blocklen, data, fileoffset, filesize);
+		if (err)
+			break;
+		if (blocklen == 0) {
+			/* Source remainder from the file itself. */
+			if (fileoffset < filesize) {
+				err = emitdelta(deltas, &nalloc, ndeltas,
+				    alloc_chunk_size, 0, fileoffset - offset0,
+				    filesize - fileoffset);
+			}
+			break;
+		}
+		err = lookupblk_mem(&block, dt, data + fileoffset, blocklen,
+		    basedata, basefile_offset0);
+		if (err)
+			break;
+		if (block != NULL) {
+			/*
+			 * We have found a matching block in the delta base.
+			 * Attempt to stretch the block as far as possible and
+			 * generate a copy instruction.
+			 */
+			err = stretchblk_mem_mem(basedata, basefile_offset0,
+			    basefile_size, block, data, fileoffset + blocklen,
+			    filesize, &blocklen);
+			if (err)
+				break;
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 1, block->offset, blocklen);
+			if (err)
+				break;
+		} else {
+			/*
+			 * No match.
+			 * This block needs to be sourced from the file itself.
+			 */
+			err = emitdelta(deltas, &nalloc, ndeltas,
+			    alloc_chunk_size, 0, fileoffset - offset0, blocklen);
+			if (err)
+				break;
+		}
+		fileoffset += blocklen;
+	}
+
+	if (err) {
+		free(*deltas);
+		*deltas = NULL;
+		*ndeltas = 0;
+	}
+	return err;
+}
blob - 6eee636c9f50d9e8a6efef4d4aad6daf6373cdf1
blob + c0b4fe271d48124cc97b135feff4b79f951c6720
--- lib/got_lib_deflate.h
+++ lib/got_lib_deflate.h
@@ -42,3 +42,5 @@ const struct got_error *got_deflate_read(struct got_de
 void got_deflate_end(struct got_deflate_buf *);
 const struct got_error *got_deflate_to_file(size_t *, FILE *, FILE *,
     struct got_deflate_checksum *);
+const struct got_error *got_deflate_to_file_mmap(size_t *, uint8_t *,
+    size_t, size_t, FILE *, struct got_deflate_checksum *);
blob - 848c59979d94aa7c6bc1d69f81419df67855c141
blob + 956f6c3dab971228b22f00297f9ee0ef0ef8f894
--- lib/got_lib_deltify.h
+++ lib/got_lib_deltify.h
@@ -42,8 +42,22 @@ enum {
 
 const struct got_error *got_deltify_init(struct got_delta_table **dt, FILE *f,
     off_t fileoffset, off_t filesize);
+const struct got_error *got_deltify_init_mem(struct got_delta_table **dt,
+    uint8_t *data, off_t fileoffset, off_t filesize);
 const struct got_error *got_deltify(struct got_delta_instruction **deltas,
     int *ndeltas, FILE *f, off_t fileoffset, off_t filesize,
     struct got_delta_table *dt, FILE *basefile, off_t basefile_offset0,
     off_t basefile_size);
+const struct got_error *got_deltify_file_mem(
+    struct got_delta_instruction **deltas, int *ndeltas,
+    FILE *f, off_t fileoffset, off_t filesize, struct got_delta_table *dt,
+    uint8_t *basedata, off_t basefile_offset0, off_t basefile_size);
+const struct got_error *got_deltify_mem_file(
+    struct got_delta_instruction **deltas, int *ndeltas,
+    uint8_t *data, off_t fileoffset, off_t filesize, struct got_delta_table *dt,
+    FILE *basefile, off_t basefile_offset0, off_t basefile_size);
+const struct got_error *got_deltify_mem_mem(
+    struct got_delta_instruction **deltas, int *ndeltas,
+    uint8_t *data, off_t fileoffset, off_t filesize, struct got_delta_table *dt,
+    uint8_t *basedata, off_t basefile_offset0, off_t basefile_size);
 void got_deltify_free(struct got_delta_table *dt);
blob - 0115e7b78c440326e9e91c010a967b702ff8432f
blob + 60920514cdf4045702691ca0ad19a5d080a8893e
--- lib/got_lib_object.h
+++ lib/got_lib_object.h
@@ -36,7 +36,8 @@ struct got_object {
 };
 
 struct got_raw_object {
-	FILE *f;
+	FILE *f;		/* NULL if data buffer is being used */
+	int fd;			/* -1 unless data buffer is memory-mapped */
 	uint8_t *data;
 	off_t size;
 	size_t hdrlen;
blob - a4cf395476aee75f57a0281a2184b91a09e360e3
blob + e3cd58001b76b9cdd34790048274309639cd72c1
--- lib/object.c
+++ lib/object.c
@@ -22,6 +22,7 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include <sys/resource.h>
+#include <sys/mman.h>
 
 #include <errno.h>
 #include <fcntl.h>
@@ -590,13 +591,9 @@ got_object_raw_open(struct got_raw_object **obj, int *
 		err = got_error_from_errno("calloc");
 		goto done;
 	}
+	(*obj)->fd = -1;
 
 	if (outbuf) {
-		(*obj)->f = fmemopen(outbuf, hdrlen + size, "r");
-		if ((*obj)->f == NULL) {
-			err = got_error_from_errno("fdopen");
-			goto done;
-		}
 		(*obj)->data = outbuf;
 	} else {
 		struct stat sb;
@@ -609,14 +606,30 @@ got_object_raw_open(struct got_raw_object **obj, int *
 			err = got_error(GOT_ERR_PRIVSEP_LEN);
 			goto done;
 		}
-
-		(*obj)->f = fdopen(*outfd, "r");
-		if ((*obj)->f == NULL) {
-			err = got_error_from_errno("fdopen");
-			goto done;
+#ifndef GOT_PACK_NO_MMAP
+		if (hdrlen + size > 0) {
+			(*obj)->data = mmap(NULL, hdrlen + size, PROT_READ,
+			    MAP_PRIVATE, *outfd, 0);
+			if ((*obj)->data == MAP_FAILED) {
+				if (errno != ENOMEM) {
+					err = got_error_from_errno("mmap");
+					goto done;
+				}
+				(*obj)->data = NULL;
+			} else {
+				(*obj)->fd = *outfd;
+				*outfd = -1;
+			}
 		}
-		(*obj)->data = NULL;
-		*outfd = -1;
+#endif
+		if (*outfd != -1) {
+			(*obj)->f = fdopen(*outfd, "r");
+			if ((*obj)->f == NULL) {
+				err = got_error_from_errno("fdopen");
+				goto done;
+			}
+			*outfd = -1;
+		}
 	}
 	(*obj)->hdrlen = hdrlen;
 	(*obj)->size = size;
blob - a92f3d54cd59722d9a59cd6d2c715b21bc4c1965
blob + b4f415940ee578ecafa1e8ceca60032674013904
--- lib/object_parse.c
+++ lib/object_parse.c
@@ -21,6 +21,7 @@
 #include <sys/uio.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
+#include <sys/mman.h>
 
 #include <errno.h>
 #include <stdio.h>
@@ -144,9 +145,18 @@ got_object_raw_close(struct got_raw_object *obj)
 			return NULL;
 	}
 
-	if (obj->f != NULL && fclose(obj->f) == EOF && err == NULL)
-		err = got_error_from_errno("fclose");
-	free(obj->data);
+	if (obj->f == NULL) {
+		if (obj->fd != -1) {
+			if (munmap(obj->data, obj->hdrlen + obj->size) == -1)
+				err = got_error_from_errno("munmap");
+			if (close(obj->fd) == -1 && err == NULL)
+				err = got_error_from_errno("close");
+		} else
+			free(obj->data);
+	} else {
+		if (fclose(obj->f) == EOF && err == NULL)
+			err = got_error_from_errno("fclose");
+	}
 	free(obj);
 	return err;
 }
blob - 5e229ab44b13c2e63ed9807f70837898eeda1e43
blob + 1687de72922bff0ffd4c1c6adc734759618703a7
--- lib/pack_create.c
+++ lib/pack_create.c
@@ -224,6 +224,19 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o
 			w = fwrite(buf, 1, bp - buf, f);
 			if (w != bp - buf)
 				return got_ferror(f, GOT_ERR_IO);
+		} else if (o->f == NULL) {
+			n = 0;
+			while (n != d->len) {
+				buf[0] = (d->len - n < 127) ? d->len - n : 127;
+				w = fwrite(buf, 1, 1, f);
+				if (w != 1)
+					return got_ferror(f, GOT_ERR_IO);
+				w = fwrite(o->data + o->hdrlen + d->offset, 1,
+				    buf[0], f);
+				if (w != buf[0])
+					return got_ferror(f, GOT_ERR_IO);
+				n += buf[0];
+			}
 		} else {
 			char content[128];
 			size_t r;
@@ -287,8 +300,13 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in
 			goto done;
 		m->size = raw->size;
 
-		err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
-		    raw->size + raw->hdrlen);
+		if (raw->f == NULL) {
+			err = got_deltify_init_mem(&m->dtab, raw->data,
+			    raw->hdrlen, raw->size + raw->hdrlen);
+		} else {
+			err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
+			    raw->size + raw->hdrlen);
+		}
 		if (err)
 			goto done;
 
@@ -317,10 +335,34 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in
 			    &base->id);
 			if (err)
 				goto done;
-			err = got_deltify(&deltas, &ndeltas,
-			    raw->f, raw->hdrlen, raw->size + raw->hdrlen,
-			    base->dtab, base_raw->f, base_raw->hdrlen,
-			    base_raw->size + base_raw->hdrlen);
+			if (raw->f == NULL && base_raw->f == NULL) {
+				err = got_deltify_mem_mem(&deltas, &ndeltas,
+				    raw->data, raw->hdrlen,
+				    raw->size + raw->hdrlen,
+				    base->dtab, base_raw->data,
+				    base_raw->hdrlen,
+				    base_raw->size + base_raw->hdrlen);
+			} else if (raw->f == NULL) {
+				err = got_deltify_mem_file(&deltas, &ndeltas,
+				    raw->data, raw->hdrlen,
+				    raw->size + raw->hdrlen,
+				    base->dtab, base_raw->f,
+				    base_raw->hdrlen,
+				    base_raw->size + base_raw->hdrlen);
+			} else if (base_raw->f == NULL) {
+				err = got_deltify_file_mem(&deltas, &ndeltas,
+				    raw->f, raw->hdrlen,
+				    raw->size + raw->hdrlen,
+				    base->dtab, base_raw->data,
+				    base_raw->hdrlen,
+				    base_raw->size + base_raw->hdrlen);
+			} else {
+				err = got_deltify(&deltas, &ndeltas,
+				    raw->f, raw->hdrlen,
+				    raw->size + raw->hdrlen,
+				    base->dtab, base_raw->f, base_raw->hdrlen,
+				    base_raw->size + base_raw->hdrlen);
+			}
 			got_object_raw_close(base_raw);
 			base_raw = NULL;
 			if (err)
@@ -1187,14 +1229,23 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
 			if (err)
 				goto done;
 			packfile_size += nh;
-			if (fseeko(raw->f, raw->hdrlen, SEEK_SET) == -1) {
-				err = got_error_from_errno("fseeko");
-				goto done;
+			if (raw->f == NULL) {
+				err = got_deflate_to_file_mmap(&outlen,
+				    raw->data + raw->hdrlen, 0, raw->size,
+				    packfile, &csum);
+				if (err)
+					goto done;
+			} else {
+				if (fseeko(raw->f, raw->hdrlen, SEEK_SET)
+				    == -1) {
+					err = got_error_from_errno("fseeko");
+					goto done;
+				}
+				err = got_deflate_to_file(&outlen, raw->f,
+				    packfile, &csum);
+				if (err)
+					goto done;
 			}
-			err = got_deflate_to_file(&outlen, raw->f, packfile,
-			    &csum);
-			if (err)
-				goto done;
 			packfile_size += outlen;
 		} else {
 			off_t remain;
blob - 872d88379087aa49d66ebbc780ae19c92d4637e9
blob + b4e79453a7e3443c1559612f98ab36e7d8c7ca50
--- regress/deltify/deltify_test.c
+++ regress/deltify/deltify_test.c
@@ -122,6 +122,255 @@ done:
 	return (err == NULL);
 }
 
+static int
+deltify_abc_axc_file_mem(void)
+{
+	const struct got_error *err = NULL;
+	size_t i;
+	uint8_t base_data[3 * GOT_DELTIFY_MAXCHUNK];
+	FILE *derived_file, *result_file;
+	struct got_delta_table *dt;
+	struct got_delta_instruction *deltas;
+	int ndeltas;
+	int have_nblocks = 0;
+
+	derived_file = got_opentemp();
+	if (derived_file == NULL)
+		return 1;
+
+	result_file = got_opentemp();
+	if (result_file == NULL)
+		return 1;
+
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_data[i] = 'a';
+		fputc('a', derived_file);
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_data[GOT_DELTIFY_MAXCHUNK + i] = 'b';
+		fputc('x', derived_file);
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_data[2 * GOT_DELTIFY_MAXCHUNK + i] = 'c';
+		fputc('c', derived_file);
+	}
+
+	rewind(derived_file);
+
+	err = got_deltify_init_mem(&dt, base_data, 0, 3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	for (i = 0; i < dt->nalloc; i++) {
+		if (dt->blocks[i].len > 0)
+			have_nblocks++;
+	}
+	if (have_nblocks != dt->nblocks) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+	err = got_deltify_file_mem(&deltas, &ndeltas, derived_file, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK, dt, base_data, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	if (ndeltas != 3) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'aaaa...' from base file. */
+	if (!(deltas[0].copy == 1 && deltas[0].offset == 0 &&
+	    deltas[0].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'xxxx...' from derived file. */
+	if (!(deltas[1].copy == 0 && deltas[1].offset == GOT_DELTIFY_MAXCHUNK &&
+	    deltas[1].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'ccccc...' from base file. */
+	if (!(deltas[2].copy == 1 &&
+	    deltas[2].offset == 2 * GOT_DELTIFY_MAXCHUNK &&
+	    deltas[2].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+done:
+	got_deltify_free(dt);
+	fclose(derived_file);
+	fclose(result_file);
+	return (err == NULL);
+}
+
+static int
+deltify_abc_axc_mem_file(void)
+{
+	const struct got_error *err = NULL;
+	size_t i;
+	FILE *base_file, *result_file;
+	uint8_t derived_file[3 * GOT_DELTIFY_MAXCHUNK];
+	struct got_delta_table *dt;
+	struct got_delta_instruction *deltas;
+	int ndeltas;
+	int have_nblocks = 0;
+
+	base_file = got_opentemp();
+	if (base_file == NULL)
+		return 1;
+
+	result_file = got_opentemp();
+	if (result_file == NULL)
+		return 1;
+
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		fputc('a', base_file);
+		derived_file[i] = 'a';
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		fputc('b', base_file);
+		derived_file[GOT_DELTIFY_MAXCHUNK + i] = 'x';
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		fputc('c', base_file);
+		derived_file[2 * GOT_DELTIFY_MAXCHUNK + i] = 'c';
+	}
+
+	rewind(base_file);
+
+	err = got_deltify_init(&dt, base_file, 0, 3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	for (i = 0; i < dt->nalloc; i++) {
+		if (dt->blocks[i].len > 0)
+			have_nblocks++;
+	}
+	if (have_nblocks != dt->nblocks) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+	err = got_deltify_mem_file(&deltas, &ndeltas, derived_file, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK, dt, base_file, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	if (ndeltas != 3) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'aaaa...' from base file. */
+	if (!(deltas[0].copy == 1 && deltas[0].offset == 0 &&
+	    deltas[0].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'xxxx...' from derived file. */
+	if (!(deltas[1].copy == 0 && deltas[1].offset == GOT_DELTIFY_MAXCHUNK &&
+	    deltas[1].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'ccccc...' from base file. */
+	if (!(deltas[2].copy == 1 &&
+	    deltas[2].offset == 2 * GOT_DELTIFY_MAXCHUNK &&
+	    deltas[2].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+done:
+	got_deltify_free(dt);
+	fclose(base_file);
+	fclose(result_file);
+	return (err == NULL);
+}
+
+static int
+deltify_abc_axc_mem_mem(void)
+{
+	const struct got_error *err = NULL;
+	size_t i;
+	FILE *result_file;
+	uint8_t base_file[3 * GOT_DELTIFY_MAXCHUNK];
+	uint8_t derived_file[3 * GOT_DELTIFY_MAXCHUNK];
+	struct got_delta_table *dt;
+	struct got_delta_instruction *deltas;
+	int ndeltas;
+	int have_nblocks = 0;
+
+	result_file = got_opentemp();
+	if (result_file == NULL)
+		return 1;
+
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_file[i] = 'a';
+		derived_file[i] = 'a';
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_file[GOT_DELTIFY_MAXCHUNK + i] = 'b';
+		derived_file[GOT_DELTIFY_MAXCHUNK + i] = 'x';
+	}
+	for (i = 0; i < GOT_DELTIFY_MAXCHUNK; i++) {
+		base_file[2 * GOT_DELTIFY_MAXCHUNK + i] = 'c';
+		derived_file[2 * GOT_DELTIFY_MAXCHUNK + i] = 'c';
+	}
+
+	err = got_deltify_init_mem(&dt, base_file, 0, 3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	for (i = 0; i < dt->nalloc; i++) {
+		if (dt->blocks[i].len > 0)
+			have_nblocks++;
+	}
+	if (have_nblocks != dt->nblocks) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+	err = got_deltify_mem_mem(&deltas, &ndeltas, derived_file, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK, dt, base_file, 0,
+	    3 * GOT_DELTIFY_MAXCHUNK);
+	if (err)
+		goto done;
+
+	if (ndeltas != 3) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'aaaa...' from base file. */
+	if (!(deltas[0].copy == 1 && deltas[0].offset == 0 &&
+	    deltas[0].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'xxxx...' from derived file. */
+	if (!(deltas[1].copy == 0 && deltas[1].offset == GOT_DELTIFY_MAXCHUNK &&
+	    deltas[1].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+	/* Copy 'ccccc...' from base file. */
+	if (!(deltas[2].copy == 1 &&
+	    deltas[2].offset == 2 * GOT_DELTIFY_MAXCHUNK &&
+	    deltas[2].len == GOT_DELTIFY_MAXCHUNK)) {
+		err = got_error(GOT_ERR_BAD_DELTA);
+		goto done;
+	}
+
+done:
+	got_deltify_free(dt);
+	fclose(result_file);
+	return (err == NULL);
+}
+
 static int quiet;
 
 #define RUN_TEST(expr, name) \
@@ -172,6 +421,9 @@ main(int argc, char *argv[])
 		err(1, "unveil");
 
 	RUN_TEST(deltify_abc_axc(), "deltify_abc_axc");
+	RUN_TEST(deltify_abc_axc_file_mem(), "deltify_abc_axc_file_mem");
+	RUN_TEST(deltify_abc_axc_mem_file(), "deltify_abc_axc_mem_file");
+	RUN_TEST(deltify_abc_axc_mem_mem(), "deltify_abc_axc_mem_mem");
 
 	return failure ? 1 : 0;
 }