"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: reuse deltas while packing
To:
gameoftrees@openbsd.org
Date:
Sat, 12 Feb 2022 19:33:11 +0100

Download raw body.

Thread
  • Thomas Adam:

    reuse deltas while packing

  • Stefan Sperling:

    reuse deltas while packing

  • On Thu, Jan 27, 2022 at 03:01:04PM +0100, Stefan Sperling wrote:
    > This patch adds initial support for reusing existing deltas while
    > creating new pack files.
    
    I did not get an explicit OK for this yet (unless I missed one?).
    
    But some testing has happened, some related bugs were found and fixed,
    and feedback was positive overall. So I suppose this patch is a step
    in the right direction and I should proceed?
      
    > diff 8669fd5c8f1a790cfe005834fd9fbd34ceef71f6 620a4020edb35fa9c0505d8fdfd619d864d1e2fb
    > blob - 60920514cdf4045702691ca0ad19a5d080a8893e
    > blob + 7a4aaaaec7e976d89c7989136f80818dd4091850
    > --- lib/got_lib_object.h
    > +++ lib/got_lib_object.h
    > @@ -102,6 +102,14 @@ const struct got_error *got_object_open_loose_fd(int *
    >      struct got_repository *);
    >  const struct got_error *got_object_open_packed(struct got_object **,
    >      struct got_object_id *, struct got_repository *);
    > +struct got_pack;
    > +struct got_packidx;
    > +const struct got_error *got_object_open_from_packfile(struct got_object **,
    > +    struct got_object_id *, struct got_pack *, struct got_packidx *, int,
    > +    struct got_repository *);
    > +const struct got_error *got_object_read_raw_delta(uint64_t *, uint64_t *,
    > +    off_t *, off_t *, off_t *, struct got_object_id **, int,
    > +    struct got_packidx *, int, struct got_object_id *, struct got_repository *);
    >  const struct got_error *got_object_read_header_privsep(struct got_object **,
    >      struct got_object_id *, struct got_repository *, int);
    >  const struct got_error *got_object_open(struct got_object **,
    > blob - 750be4962021434fb27d2dcc95867ff2e333a776
    > blob + 5346f7f2ce1559b966b7c6e2f1bde6da40d729a4
    > --- lib/got_lib_object_idset.h
    > +++ lib/got_lib_object_idset.h
    > @@ -31,3 +31,13 @@ const struct got_error *got_object_idset_for_each(stru
    >      const struct got_error *(*cb)(struct got_object_id *, void *, void *),
    >      void *);
    >  int got_object_idset_num_elements(struct got_object_idset *);
    > +
    > +struct got_object_idset_element;
    > +struct got_object_idset_element *got_object_idset_get_element(
    > +    struct got_object_idset *, struct got_object_id *);
    > +void *got_object_idset_get_element_data(struct got_object_idset_element *);
    > +const struct got_error *got_object_idset_for_each_element(struct got_object_idset *,
    > +    const struct got_error *(*cb)(struct got_object_idset_element *, void *), void *);
    > +void got_object_idset_remove_element(struct got_object_idset *,
    > +    struct got_object_idset_element *);
    > +
    > blob - 2a9c135628c6fed4aeef4fb17093f23e0b5df9ae
    > blob + e8fb373e287ee80486d50ed07964d9d39924308d
    > --- lib/got_lib_pack.h
    > +++ lib/got_lib_pack.h
    > @@ -22,6 +22,7 @@ struct got_pack {
    >  	size_t filesize;
    >  	struct got_privsep_child *privsep_child;
    >  	int child_has_tempfiles;
    > +	int child_has_delta_outfd;
    >  	struct got_delta_cache *delta_cache;
    >  };
    >  
    > @@ -96,6 +97,16 @@ struct got_packidx_v2_hdr {
    >  	struct got_packidx_trailer *trailer;
    >  };
    >  
    > +struct got_pack_offset_index {
    > +	uint32_t offset;
    > +	uint32_t idx;
    > +};
    > +
    > +struct got_pack_large_offset_index {
    > +	uint64_t offset;
    > +	uint32_t idx;
    > +};
    > +
    >  /* An open pack index file. */
    >  struct got_packidx {
    >  	char *path_packidx; /* actual on-disk path */
    > @@ -104,6 +115,8 @@ struct got_packidx {
    >  	size_t len;
    >  	size_t nlargeobj;
    >  	struct got_packidx_v2_hdr hdr; /* convenient pointers into map */
    > +	struct got_pack_offset_index *sorted_offsets;
    > +	struct got_pack_large_offset_index *sorted_large_offsets;
    >  };
    >  
    >  struct got_packfile_hdr {
    > @@ -177,6 +190,10 @@ const struct got_error *got_packidx_close(struct got_p
    >  const struct got_error *got_packidx_get_packfile_path(char **, const char *);
    >  off_t got_packidx_get_object_offset(struct got_packidx *, int idx);
    >  int got_packidx_get_object_idx(struct got_packidx *, struct got_object_id *);
    > +const struct got_error *got_packidx_get_offset_idx(int *, struct got_packidx *,
    > +    off_t);
    > +const struct got_error *got_packidx_get_object_id(struct got_object_id *,
    > +    struct got_packidx *, int);
    >  const struct got_error *got_packidx_match_id_str_prefix(
    >      struct got_object_id_queue *, struct got_packidx *, const char *);
    >  
    > @@ -194,5 +211,8 @@ const struct got_error *got_packfile_extract_object(st
    >      struct got_object *, FILE *, FILE *, FILE *);
    >  const struct got_error *got_packfile_extract_object_to_mem(uint8_t **, size_t *,
    >      struct got_object *, struct got_pack *);
    > +const struct got_error *got_packfile_extract_raw_delta(uint8_t **, size_t *,
    > +    off_t *, off_t *, struct got_object_id *, uint64_t *, uint64_t *,
    > +    struct got_pack *, struct got_packidx *, int);
    >  struct got_pack *got_repo_get_cached_pack(struct got_repository *,
    >      const char *);
    > blob - ce62cbf5e948bfee2b29b0f0524208b1a15a93e9
    > blob + 274e89878290befef48084afc0ae191cd5c36b16
    > --- lib/got_lib_privsep.h
    > +++ lib/got_lib_privsep.h
    > @@ -174,6 +174,11 @@ enum got_imsg_type {
    >  	GOT_IMSG_RAW_OBJECT_OUTFD,
    >  	GOT_IMSG_PACKED_RAW_OBJECT_REQUEST,
    >  	GOT_IMSG_RAW_OBJECT,
    > +
    > +	/* Read raw delta data from pack files. */
    > +	GOT_IMSG_RAW_DELTA_OUTFD,
    > +	GOT_IMSG_RAW_DELTA_REQUEST,
    > +	GOT_IMSG_RAW_DELTA,
    >  };
    >  
    >  /* Structure for GOT_IMSG_ERROR. */
    > @@ -262,6 +267,21 @@ struct got_imsg_raw_obj {
    >  	(MAX_IMSGSIZE - IMSG_HEADER_SIZE - sizeof(struct got_imsg_raw_obj))
    >  };
    >  
    > +/* Structure for GOT_IMSG_RAW_DELTA. */
    > +struct got_imsg_raw_delta {
    > +	uint8_t base_id[SHA1_DIGEST_LENGTH];
    > +	uint64_t base_size;
    > +	uint64_t result_size;
    > +	off_t delta_size;
    > +	off_t delta_offset;
    > +	off_t delta_out_offset;
    > +
    > +	/*
    > +	 * Delta data has been written at delta_out_offset to the file
    > +	 * descriptor passed via the GOT_IMSG_RAW_DELTA_OUTFD imsg.
    > +	 */
    > +};
    > +
    >  /* Structure for GOT_IMSG_TAG data. */
    >  struct got_imsg_tag_object {
    >  	uint8_t id[SHA1_DIGEST_LENGTH];
    > @@ -428,6 +448,26 @@ struct got_imsg_packed_object {
    >  	int idx;
    >  } __attribute__((__packed__));
    >  
    > +/*
    > + * Structure for GOT_IMSG_DELTA data.
    > + */
    > +struct got_imsg_delta {
    > +	/* These fields are the same as in struct got_delta. */
    > +	off_t offset;
    > +	size_t tslen;
    > +	int type;
    > +	size_t size;
    > +	off_t data_offset;
    > +};
    > +
    > +/*
    > + * Structure for GOT_IMSG_RAW_DELTA_REQUEST data.
    > + */
    > +struct got_imsg_raw_delta_request {
    > +	uint8_t id[SHA1_DIGEST_LENGTH];
    > +	int idx;
    > +};
    > +
    >  /* Structure for GOT_IMSG_COMMIT_TRAVERSAL_REQUEST  */
    >  struct got_imsg_commit_traversal_request {
    >  	uint8_t id[SHA1_DIGEST_LENGTH];
    > @@ -588,4 +628,12 @@ const struct got_error *got_privsep_recv_traversed_com
    >      struct got_commit_object **, struct got_object_id **,
    >      struct got_object_id_queue *, struct imsgbuf *);
    >  
    > +const struct got_error *got_privsep_send_raw_delta_req(struct imsgbuf *, int,
    > +    struct got_object_id *);
    > +const struct got_error *got_privsep_send_raw_delta_outfd(struct imsgbuf *, int);
    > +const struct got_error *got_privsep_send_raw_delta(struct imsgbuf *, uint64_t,
    > +    uint64_t,  off_t, off_t, off_t, struct got_object_id *);
    > +const struct got_error *got_privsep_recv_raw_delta(uint64_t *, uint64_t *,
    > +    off_t *, off_t *, off_t *, struct got_object_id **, struct imsgbuf *);
    > +
    >  void got_privsep_exec_child(int[2], const char *, const char *);
    > blob - 831cb967da0e76447d97b70ff058416fbf492de4
    > blob + 798762e89557feb475f650e62e549abe41eb43c4
    > --- lib/got_lib_repository.h
    > +++ lib/got_lib_repository.h
    > @@ -122,7 +122,13 @@ const struct got_error*got_repo_cache_raw_object(struc
    >  struct got_raw_object *got_repo_get_cached_raw_object(struct got_repository *,
    >      struct got_object_id *);
    >  int got_repo_is_packidx_filename(const char *, size_t);
    > +int got_repo_check_packidx_bloom_filter(struct got_repository *,
    > +    const char *, struct got_object_id *);
    >  const struct got_error *got_repo_search_packidx(struct got_packidx **, int *,
    >      struct got_repository *, struct got_object_id *);
    > +const struct got_error *got_repo_list_packidx(struct got_pathlist_head *,
    > +    struct got_repository *);
    > +const struct got_error *got_repo_get_packidx(struct got_packidx **, const char *,
    > +    struct got_repository *);
    >  const struct got_error *got_repo_cache_pack(struct got_pack **,
    >      struct got_repository *, const char *, struct got_packidx *);
    > blob - ce95f8795c1e303f174dcdf4d6eeb41563569e6f
    > blob + 8218b2c4c146e832c63f84d4a9f6a43f8b3283f8
    > --- lib/object.c
    > +++ lib/object.c
    > @@ -267,6 +267,7 @@ start_pack_privsep_child(struct got_pack *pack, struct
    >  		return err;
    >  	}
    >  	pack->child_has_tempfiles = 0;
    > +	pack->child_has_delta_outfd = 0;
    >  
    >  	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_fds) == -1) {
    >  		err = got_error_from_errno("socketpair");
    > @@ -376,6 +377,71 @@ done:
    >  	return err;
    >  }
    >  
    > +const struct got_error *
    > +got_object_open_from_packfile(struct got_object **obj, struct got_object_id *id,
    > +    struct got_pack *pack, struct got_packidx *packidx, int obj_idx,
    > +    struct got_repository *repo)
    > +{
    > +	return read_packed_object_privsep(obj, repo, pack, packidx,
    > +	    obj_idx, id);
    > +}
    > +
    > +const struct got_error *
    > +got_object_read_raw_delta(uint64_t *base_size, uint64_t *result_size,
    > +    off_t *delta_size, off_t *delta_offset, off_t *delta_out_offset,
    > +    struct got_object_id **base_id, int delta_cache_fd,
    > +    struct got_packidx *packidx, int obj_idx, struct got_object_id *id,
    > +    struct got_repository *repo)
    > +{
    > +	const struct got_error *err = NULL;
    > +	struct got_pack *pack = NULL;
    > +	char *path_packfile;
    > +
    > +	*base_size = 0;
    > +	*result_size = 0;
    > +	*delta_size = 0;
    > +	*delta_offset = 0;
    > +	*delta_out_offset = 0;
    > +
    > +	err = got_packidx_get_packfile_path(&path_packfile,
    > +	    packidx->path_packidx);
    > +	if (err)
    > +		return err;
    > +
    > +	pack = got_repo_get_cached_pack(repo, path_packfile);
    > +	if (pack == NULL) {
    > +		err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
    > +		if (err)
    > +			return err;
    > +	}
    > +
    > +	if (pack->privsep_child == NULL) {
    > +		err = start_pack_privsep_child(pack, packidx);
    > +		if (err)
    > +			return err;
    > +	}
    > +
    > +	if (!pack->child_has_delta_outfd) {
    > +		int outfd_child;
    > +		outfd_child = dup(delta_cache_fd);
    > +		if (outfd_child == -1)
    > +			return got_error_from_errno("dup");
    > +		err = got_privsep_send_raw_delta_outfd(
    > +		    pack->privsep_child->ibuf, outfd_child);
    > +		if (err)
    > +			return err;
    > +		pack->child_has_delta_outfd = 1;
    > +	}
    > +
    > +	err = got_privsep_send_raw_delta_req(pack->privsep_child->ibuf,
    > +	    obj_idx, id);
    > +	if (err)
    > +		return err;
    > +
    > +	return got_privsep_recv_raw_delta(base_size, result_size, delta_size,
    > +	    delta_offset, delta_out_offset, base_id, pack->privsep_child->ibuf);
    > +}
    > +
    >  static const struct got_error *
    >  request_object(struct got_object **obj, struct got_object_id *id,
    >      struct got_repository *repo, int fd)
    > blob - 152534fe9ad8d3e39e0d2945b284bbae1f218a2f
    > blob + bec28e2592f77e2e43e9df547016ff61d8886b42
    > --- lib/object_idset.c
    > +++ lib/object_idset.c
    > @@ -190,5 +190,42 @@ got_object_idset_num_elements(struct got_object_idset 
    >  	return set->totelem;
    >  }
    >  
    > +struct got_object_idset_element *
    > +got_object_idset_get_element(struct got_object_idset *set, struct got_object_id *id)
    > +{
    > +	return find_element(set, id);
    > +}
    > +
    > +void *
    > +got_object_idset_get_element_data(struct got_object_idset_element *entry)
    > +{
    > +	return entry->data;
    > +}
    > +
    > +const struct got_error *
    > +got_object_idset_for_each_element(struct got_object_idset *set,
    > +    const struct got_error *(*cb)(struct got_object_idset_element *, void *),
    > +    void *arg)
    > +{
    > +	const struct got_error *err;
    > +	struct got_object_idset_element *entry, *tmp;
    > +
    > +	RB_FOREACH_SAFE(entry, got_object_idset_tree, &set->entries, tmp) {
    > +		err = (*cb)(entry, arg);
    > +		if (err)
    > +			return err;
    > +	}
    > +	return NULL;
    > +}
    > +
    > +void
    > +got_object_idset_remove_element(struct got_object_idset *set,
    > +    struct got_object_idset_element *entry)
    > +{
    > +	RB_REMOVE(got_object_idset_tree, &set->entries, entry);
    > +	free(entry);
    > +	set->totelem--;
    > +}
    > +
    >  RB_GENERATE(got_object_idset_tree, got_object_idset_element, entry,
    >      cmp_elements);
    > blob - 8a01d22a8f4fe6686821dab1e44a139ea351a8a1
    > blob + c981da3572edb6919e64e8fa50227b8ab99e0702
    > --- lib/pack.c
    > +++ lib/pack.c
    > @@ -433,6 +433,8 @@ got_packidx_close(struct got_packidx *packidx)
    >  	}
    >  	if (close(packidx->fd) == -1 && err == NULL)
    >  		err = got_error_from_errno("close");
    > +	free(packidx->sorted_offsets);
    > +	free(packidx->sorted_large_offsets);
    >  	free(packidx);
    >  
    >  	return err;
    > @@ -508,7 +510,155 @@ got_packidx_get_object_idx(struct got_packidx *packidx
    >  	return -1;
    >  }
    >  
    > +static int
    > +offset_cmp(const void *pa, const void *pb)
    > +{
    > +	const struct got_pack_offset_index *a, *b;
    > +
    > +	a = (const struct got_pack_offset_index *)pa;
    > +	b = (const struct got_pack_offset_index *)pb;
    > +
    > +	if (a->offset < b->offset)
    > +		return -1;
    > +	else if (a->offset > b->offset)
    > +		return 1;
    > +
    > +	return 0;
    > +}
    > +
    > +static int
    > +large_offset_cmp(const void *pa, const void *pb)
    > +{
    > +	const struct got_pack_large_offset_index *a, *b;
    > +
    > +	a = (const struct got_pack_large_offset_index *)pa;
    > +	b = (const struct got_pack_large_offset_index *)pb;
    > +
    > +	if (a->offset < b->offset)
    > +		return -1;
    > +	else if (a->offset > b->offset)
    > +		return 1;
    > +
    > +	return 0;
    > +}
    > +
    > +static const struct got_error *
    > +build_offset_index(struct got_packidx *p)
    > +{
    > +	uint32_t nobj = be32toh(p->hdr.fanout_table[0xff]);
    > +	unsigned int i, j, k;
    > +
    > +	p->sorted_offsets = calloc(nobj - p->nlargeobj,
    > +	    sizeof(p->sorted_offsets[0]));
    > +	if (p->sorted_offsets == NULL)
    > +		return got_error_from_errno("calloc");
    > +
    > +	if (p->nlargeobj > 0) {
    > +		p->sorted_large_offsets = calloc(p->nlargeobj,
    > +		    sizeof(p->sorted_large_offsets[0]));
    > +		if (p->sorted_large_offsets == NULL)
    > +			return got_error_from_errno("calloc");
    > +	}
    > +
    > +	j = 0;
    > +	k = 0;
    > +	for (i = 0; i < nobj; i++) {
    > +		uint32_t offset = be32toh(p->hdr.offsets[i]);
    > +		if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) {
    > +			uint64_t loffset;
    > +			uint32_t idx;
    > +			idx = offset & GOT_PACKIDX_OFFSET_VAL_MASK;
    > +			if (idx >= p->nlargeobj ||
    > +			    p->nlargeobj == 0 ||
    > +			    p->hdr.large_offsets == NULL)
    > +				return got_error(GOT_ERR_BAD_PACKIDX);
    > +			loffset = be64toh(p->hdr.large_offsets[idx]);
    > +			p->sorted_large_offsets[j].offset = loffset;
    > +			p->sorted_large_offsets[j].idx = i;
    > +			j++;
    > +		} else {
    > +			p->sorted_offsets[k].offset = offset;
    > +			p->sorted_offsets[k].idx = i;
    > +			k++;
    > +		}
    > +	}
    > +	if (j != p->nlargeobj || k != nobj - p->nlargeobj)
    > +		return got_error(GOT_ERR_BAD_PACKIDX);
    > +
    > +	qsort(p->sorted_offsets, nobj - p->nlargeobj,
    > +	    sizeof(p->sorted_offsets[0]), offset_cmp);
    > +
    > +	if (p->sorted_large_offsets)
    > +		qsort(p->sorted_large_offsets, p->nlargeobj,
    > +		    sizeof(p->sorted_large_offsets[0]), large_offset_cmp);
    > +
    > +	return NULL;
    > +}
    > +
    >  const struct got_error *
    > +got_packidx_get_offset_idx(int *idx, struct got_packidx *packidx, off_t offset)
    > +{
    > +	const struct got_error *err;
    > +	uint32_t totobj = be32toh(packidx->hdr.fanout_table[0xff]);
    > +	int i, left, right;
    > +
    > +	*idx = -1;
    > +
    > +	if (packidx->sorted_offsets == NULL) {
    > +		err = build_offset_index(packidx);
    > +		if (err)
    > +			return err;
    > +	}
    > +
    > +	if (offset >= 0x7fffffff) {
    > +		uint64_t lo;
    > +		left = 0, right = packidx->nlargeobj - 1;
    > +		while (left <= right) {
    > +			i = ((left + right) / 2);
    > +			lo = packidx->sorted_large_offsets[i].offset;
    > +			if (lo == offset) {
    > +				*idx = packidx->sorted_large_offsets[i].idx;
    > +				break;
    > +			} else if (offset > lo)
    > +				left = i + 1;
    > +			else if (offset < lo)
    > +				right = i - 1;
    > +		}
    > +	} else {
    > +		uint32_t o;
    > +		left = 0, right = totobj - packidx->nlargeobj - 1;
    > +		while (left <= right) {
    > +			i = ((left + right) / 2);
    > +			o = packidx->sorted_offsets[i].offset;
    > +			if (o == offset) {
    > +				*idx = packidx->sorted_offsets[i].idx;
    > +				break;
    > +			} else if (offset > o)
    > +				left = i + 1;
    > +			else if (offset < o)
    > +				right = i - 1;
    > +		}
    > +	}
    > +
    > +	return NULL;
    > +}
    > +
    > +const struct got_error *
    > +got_packidx_get_object_id(struct got_object_id *id,
    > +    struct got_packidx *packidx, int idx)
    > +{
    > +	uint32_t totobj = be32toh(packidx->hdr.fanout_table[0xff]);
    > +	struct got_packidx_object_id *oid;
    > +
    > +	if (idx < 0 || idx >= totobj)
    > +		return got_error(GOT_ERR_NO_OBJ);
    > +
    > +	oid = &packidx->hdr.sorted_ids[idx];
    > +	memcpy(id->sha1, oid->sha1, SHA1_DIGEST_LENGTH);
    > +	return NULL;
    > +}
    > +
    > +const struct got_error *
    >  got_packidx_match_id_str_prefix(struct got_object_id_queue *matched_ids,
    >      struct got_packidx *packidx, const char *id_str_prefix)
    >  {
    > @@ -1452,3 +1602,84 @@ got_packfile_extract_object_to_mem(uint8_t **buf, size
    >  
    >  	return err;
    >  }
    > +
    > +const struct got_error *
    > +got_packfile_extract_raw_delta(uint8_t **delta_buf, size_t *delta_size,
    > +    off_t *delta_offset, off_t *base_offset, struct got_object_id *base_id,
    > +    uint64_t *base_size, uint64_t *result_size, struct got_pack *pack,
    > +    struct got_packidx *packidx, int idx)
    > +{
    > +	const struct got_error *err = NULL;
    > +	off_t offset;
    > +	uint8_t type;
    > +	uint64_t size;
    > +	size_t tslen, delta_hdrlen;
    > +
    > +	*delta_buf = NULL;
    > +	*delta_size = 0;
    > +	*delta_offset = 0;
    > +	*base_offset = 0;
    > +	*base_size = 0;
    > +	*result_size = 0;
    > +
    > +	offset = got_packidx_get_object_offset(packidx, idx);
    > +	if (offset == (uint64_t)-1)
    > +		return got_error(GOT_ERR_BAD_PACKIDX);
    > +
    > +	if (offset >= pack->filesize)
    > +		return got_error(GOT_ERR_PACK_OFFSET);
    > +
    > +	err = got_pack_parse_object_type_and_size(&type, &size, &tslen,
    > +	    pack, offset);
    > +	if (err)
    > +		return err;
    > +
    > +	if (tslen + size < tslen || offset + size < size ||
    > +	    tslen + offset < tslen)
    > +		return got_error(GOT_ERR_PACK_OFFSET);
    > +
    > +	switch (type) {
    > +	case GOT_OBJ_TYPE_OFFSET_DELTA:
    > +		err = got_pack_parse_offset_delta(base_offset, &delta_hdrlen,
    > +		    pack, offset, tslen);
    > +		if (err)
    > +			return err;
    > +		break;
    > +	case GOT_OBJ_TYPE_REF_DELTA:
    > +		err = got_pack_parse_ref_delta(base_id, pack, offset, tslen);
    > +		if (err)
    > +			return err;
    > +		delta_hdrlen = SHA1_DIGEST_LENGTH;
    > +		break;
    > +	default:
    > +		return got_error_fmt(GOT_ERR_OBJ_TYPE,
    > +		    "non-delta object type %d found at offset %llu",
    > +		    type, offset);
    > +	}
    > +
    > +	if (tslen + delta_hdrlen < delta_hdrlen ||
    > +	    offset + delta_hdrlen < delta_hdrlen)
    > +		return got_error(GOT_ERR_BAD_DELTA);
    > +
    > +	err = read_delta_data(delta_buf, delta_size,
    > +	    offset + tslen + delta_hdrlen, pack);
    > +	if (err)
    > +		return err;
    > +
    > +	if (*delta_size != size) {
    > +		err = got_error(GOT_ERR_BAD_DELTA);
    > +		goto done;
    > +	}
    > +
    > +	err = got_delta_get_sizes(base_size, result_size, *delta_buf, size);
    > +	if (err)
    > +		goto done;
    > +
    > +	*delta_offset = offset;
    > +done:
    > +	if (err) {
    > +		free(*delta_buf);
    > +		*delta_buf = NULL;
    > +	}
    > +	return err;
    > +}
    > blob - f4c9f4cd1e5f2d7ec2bb640a2745e9ef11b8f29a
    > blob + 126be35e5fb8688252b462d73222675dfe1e830d
    > --- lib/pack_create.c
    > +++ lib/pack_create.c
    > @@ -74,6 +74,10 @@ struct got_pack_meta {
    >  	off_t	delta_len;	/* encoded delta length */
    >  	int	nchain;
    >  
    > +	int	have_reused_delta;
    > +	off_t   reused_delta_offset; /* offset of delta in reused pack file */
    > +	struct got_object_id *base_obj_id;
    > +
    >  	/* Only used for delta window */
    >  	struct got_delta_table *dtab;
    >  
    > @@ -124,6 +128,8 @@ clear_meta(struct got_pack_meta *meta)
    >  	meta->path = NULL;
    >  	free(meta->delta_buf);
    >  	meta->delta_buf = NULL;
    > +	free(meta->base_obj_id);
    > +	meta->base_obj_id = NULL;
    >  }
    >  
    >  static void
    > @@ -419,12 +425,229 @@ report_progress(got_pack_progress_cb progress_cb, void
    >  }
    >  
    >  static const struct got_error *
    > -pick_deltas(struct got_pack_meta **meta, int nmeta, int nours,
    > -    FILE *delta_cache, struct got_repository *repo,
    > +add_meta(struct got_pack_meta *m, struct got_pack_metavec *v)
    > +{
    > +	if (v->nmeta == v->metasz){
    > +		size_t newsize = 2 * v->metasz;
    > +		struct got_pack_meta **new;
    > +		new = reallocarray(v->meta, newsize, sizeof(*new));
    > +		if (new == NULL)
    > +			return got_error_from_errno("reallocarray");
    > +		v->meta = new;
    > +		v->metasz = newsize; 
    > +	}
    > +
    > +	v->meta[v->nmeta++] = m;
    > +	return NULL;
    > +}
    > +
    > +static const struct got_error *
    > +reuse_delta(int idx, struct got_pack_meta *m, struct got_pack_metavec *v,
    > +    struct got_object_idset *idset, struct got_pack *pack,
    > +    struct got_packidx *packidx, int delta_cache_fd,
    > +    struct got_repository *repo)
    > +{
    > +	const struct got_error *err = NULL;
    > +	struct got_pack_meta *base = NULL;
    > +	struct got_object_id *base_obj_id = NULL;
    > +	off_t delta_len = 0, delta_offset = 0, delta_cache_offset = 0;
    > +	uint64_t base_size, result_size;
    > +
    > +	if (m->have_reused_delta)
    > +		return NULL;
    > +
    > +	err = got_object_read_raw_delta(&base_size, &result_size, &delta_len,
    > +	    &delta_offset, &delta_cache_offset, &base_obj_id, delta_cache_fd,
    > +	    packidx, idx, &m->id, repo);
    > +	if (err)
    > +		return err;
    > +
    > +	if (delta_offset + delta_len < delta_offset)
    > +		return got_error(GOT_ERR_BAD_PACKFILE);
    > +
    > +	base = got_object_idset_get(idset, base_obj_id);
    > +	if (base == NULL)
    > +		goto done;
    > +
    > +	m->delta_len = delta_len;
    > +	m->delta_offset = delta_cache_offset;
    > +	m->prev = base;
    > +	m->size = result_size;
    > +	m->have_reused_delta = 1;
    > +	m->reused_delta_offset = delta_offset;
    > +	m->base_obj_id = base_obj_id;
    > +	base_obj_id = NULL;
    > +	err = add_meta(m, v);
    > +done:
    > +	free(base_obj_id);
    > +	return err;
    > +}
    > +
    > +static const struct got_error *
    > +find_pack_for_reuse(struct got_packidx **best_packidx,
    > +    struct got_repository *repo)
    > +{
    > +	const struct got_error *err;
    > +	struct got_pathlist_head packidx_paths;
    > +	struct got_pathlist_entry *pe;
    > +	const char *best_packidx_path = NULL;
    > +	int nobj_max = 0;
    > +
    > +	TAILQ_INIT(&packidx_paths);
    > +	*best_packidx = NULL;
    > +
    > +	err = got_repo_list_packidx(&packidx_paths, repo);
    > +	if (err)
    > +		return err;
    > +
    > +	TAILQ_FOREACH(pe, &packidx_paths, entry) {
    > +		const char *path_packidx = pe->path;
    > +		struct got_packidx *packidx;
    > +		int nobj;
    > +
    > +		err = got_repo_get_packidx(&packidx, path_packidx, repo);
    > +		if (err)
    > +			break;
    > +
    > +		nobj = be32toh(packidx->hdr.fanout_table[0xff]);
    > +		if (nobj > nobj_max) {
    > +			best_packidx_path = path_packidx;
    > +			nobj_max = nobj;
    > +		}
    > +	}
    > +
    > +	if (best_packidx_path) {
    > +		err = got_repo_get_packidx(best_packidx, best_packidx_path,
    > +		    repo);
    > +	}
    > +
    > +	TAILQ_FOREACH(pe, &packidx_paths, entry)
    > +		free((void *)pe->path);
    > +	got_pathlist_free(&packidx_paths);
    > +	return err;
    > +}
    > +
    > +struct search_deltas_arg {
    > +	struct got_packidx *packidx;
    > +	struct got_pack *pack;
    > +	struct got_object_idset *idset;
    > +	struct got_pack_metavec *v;
    > +	int delta_cache_fd;
    > +	struct got_repository *repo;
    > +	got_pack_progress_cb progress_cb;
    > +	void *progress_arg;
    > +	struct got_ratelimit *rl;
    > +	got_cancel_cb cancel_cb;
    > +	void *cancel_arg;
    > +	int ncommits;
    > +};
    > +
    > +static const struct got_error *
    > +search_delta_for_object(struct got_object_id *id, void *data, void *arg)
    > +{
    > +	const struct got_error *err;
    > +	struct got_pack_meta *m = data;
    > +	struct search_deltas_arg *a = arg;
    > +	int obj_idx;
    > +	struct got_object *obj = NULL;
    > +
    > +	if (a->cancel_cb) {
    > +		err = (*a->cancel_cb)(a->cancel_arg);
    > +		if (err)
    > +			return err;
    > +	}
    > +
    > +	if (!got_repo_check_packidx_bloom_filter(a->repo,
    > +	    a->packidx->path_packidx, id))
    > +		return NULL;
    > +
    > +	obj_idx = got_packidx_get_object_idx(a->packidx, id);
    > +	if (obj_idx == -1)
    > +		return NULL;
    > +
    > +	/* TODO:
    > +	 * Opening and closing an object just to check its flags
    > +	 * is a bit expensive. We could have an imsg which requests
    > +	 * plain type/size information for an object without doing
    > +	 * work such as traversing the object's entire delta chain
    > +	 * to find the base object type, and other such info which
    > +	 * we don't really need here.
    > +	 */
    > +	err = got_object_open_from_packfile(&obj, &m->id, a->pack,
    > +	    a->packidx, obj_idx, a->repo);
    > +	if (err)
    > +		return err;
    > +
    > +	if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
    > +		reuse_delta(obj_idx, m, a->v, a->idset, a->pack, a->packidx,
    > +		    a->delta_cache_fd, a->repo);
    > +		if (err)
    > +			goto done;
    > +		err = report_progress(a->progress_cb, a->progress_arg, a->rl,
    > +		    0L, a->ncommits, got_object_idset_num_elements(a->idset),
    > +		    a->v->nmeta, 0);
    > +	}
    > +done:
    > +	got_object_close(obj);
    > +	return err;
    > +}
    > +
    > +static const struct got_error *
    > +search_deltas(struct got_pack_metavec *v, struct got_object_idset *idset,
    > +    int delta_cache_fd, int ncommits, struct got_repository *repo,
    >      got_pack_progress_cb progress_cb, void *progress_arg,
    >      struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
    >  {
    >  	const struct got_error *err = NULL;
    > +	char *path_packfile = NULL;
    > +	struct got_packidx *packidx;
    > +	struct got_pack *pack;
    > +	struct search_deltas_arg sda;
    > +
    > +	err = find_pack_for_reuse(&packidx, repo);
    > +	if (err)
    > +		return err;
    > +
    > +	if (packidx == NULL)
    > +		return NULL;
    > +
    > +	err = got_packidx_get_packfile_path(&path_packfile,
    > +	    packidx->path_packidx);
    > +	if (err)
    > +		return err;
    > +
    > +	pack = got_repo_get_cached_pack(repo, path_packfile);
    > +	if (pack == NULL) {
    > +		err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
    > +		if (err)
    > +			goto done;
    > +	}
    > +
    > +	sda.packidx = packidx;
    > +	sda.pack = pack;
    > +	sda.idset = idset;
    > +	sda.v = v;
    > +	sda.delta_cache_fd = delta_cache_fd;
    > +	sda.repo = repo;
    > +	sda.progress_cb = progress_cb;
    > +	sda.progress_arg = progress_arg;
    > +	sda.rl = rl;
    > +	sda.cancel_cb = cancel_cb;
    > +	sda.cancel_arg = cancel_arg;
    > +	sda.ncommits = ncommits;
    > +	err = got_object_idset_for_each(idset, search_delta_for_object, &sda);
    > +done:
    > +	free(path_packfile);
    > +	return err;
    > +}
    > +
    > +static const struct got_error *
    > +pick_deltas(struct got_pack_meta **meta, int nmeta, int ncommits,
    > +    int nreused, FILE *delta_cache, struct got_repository *repo,
    > +    got_pack_progress_cb progress_cb, void *progress_arg,
    > +    struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
    > +{
    > +	const struct got_error *err = NULL;
    >  	struct got_pack_meta *m = NULL, *base = NULL;
    >  	struct got_raw_object *raw = NULL, *base_raw = NULL;
    >  	struct got_delta_instruction *deltas = NULL, *best_deltas = NULL;
    > @@ -443,7 +666,7 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in
    >  				break;
    >  		}
    >  		err = report_progress(progress_cb, progress_arg, rl,
    > -		    0L, nours, nmeta, i, 0);
    > +		    0L, ncommits, nreused + nmeta, nreused + i, 0);
    >  		if (err)
    >  			goto done;
    >  		m = meta[i];
    > @@ -492,6 +715,7 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in
    >  			    &base->id);
    >  			if (err)
    >  				goto done;
    > +
    >  			if (raw->f == NULL && base_raw->f == NULL) {
    >  				err = got_deltify_mem_mem(&deltas, &ndeltas,
    >  				    raw->data, raw->hdrlen,
    > @@ -556,6 +780,15 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in
    >  				    best_ndeltas, best_size, m->prev->size);
    >  			} else {
    >  				m->delta_offset = ftello(delta_cache);
    > +				/*
    > +				 * TODO:
    > +				 * Storing compressed delta data in the delta
    > +				 * cache file would probably be more efficient
    > +				 * than writing uncompressed delta data here
    > +				 * and compressing it while writing the pack
    > +				 * file. This would also allow for reusing
    > +				 * deltas in their compressed form.
    > +				 */
    >  				err = encode_delta(m, raw, best_deltas,
    >  				    best_ndeltas, m->prev->size, delta_cache);
    >  			}
    > @@ -614,12 +847,12 @@ static const int obj_types[] = {
    >  };
    >  
    >  static const struct got_error *
    > -add_meta(struct got_pack_metavec *v, struct got_object_idset *idset,
    > +add_object(int want_meta, struct got_object_idset *idset,
    >      struct got_object_id *id, const char *path, int obj_type,
    >      time_t mtime, int loose_obj_only, struct got_repository *repo)
    >  {
    >  	const struct got_error *err;
    > -	struct got_pack_meta *m;
    > +	struct got_pack_meta *m = NULL;
    >  
    >  	if (loose_obj_only) {
    >  		int is_packed;
    > @@ -630,40 +863,17 @@ add_meta(struct got_pack_metavec *v, struct got_object
    >  			return NULL;
    >  	}
    >  
    > -	err = got_object_idset_add(idset, id, (void *)&obj_types[obj_type]);
    > -	if (err)
    > -		return err;
    > -
    > -	if (v == NULL)
    > -		return NULL;
    > -
    > -	err = alloc_meta(&m, id, path, obj_type, mtime);
    > -	if (err)
    > -		goto done;
    > -
    > -	if (v->nmeta == v->metasz){
    > -		size_t newsize = 2 * v->metasz;
    > -		struct got_pack_meta **new;
    > -		new = reallocarray(v->meta, newsize, sizeof(*new));
    > -		if (new == NULL) {
    > -			err = got_error_from_errno("reallocarray");
    > -			goto done;
    > -		}
    > -		v->meta = new;
    > -		v->metasz = newsize; 
    > +	if (want_meta) {
    > +		err = alloc_meta(&m, id, path, obj_type, mtime);
    > +		if (err)
    > +			return err;
    >  	}
    > -done:
    > -	if (err) {
    > -		clear_meta(m);
    > -		free(m);
    > -	} else
    > -		v->meta[v->nmeta++] = m;
    >  
    > -	return err;
    > +	return got_object_idset_add(idset, id, m);
    >  }
    >  
    >  static const struct got_error *
    > -load_tree_entries(struct got_object_id_queue *ids, struct got_pack_metavec *v,
    > +load_tree_entries(struct got_object_id_queue *ids, int want_meta,
    >      struct got_object_idset *idset, struct got_object_id *tree_id,
    >      const char *dpath, time_t mtime, struct got_repository *repo,
    >      int loose_obj_only, got_cancel_cb cancel_cb, void *cancel_arg)
    > @@ -705,8 +915,8 @@ load_tree_entries(struct got_object_id_queue *ids, str
    >  				break;
    >  			STAILQ_INSERT_TAIL(ids, qid, entry);
    >  		} else if (S_ISREG(mode) || S_ISLNK(mode)) {
    > -			err = add_meta(v, idset, id, p, GOT_OBJ_TYPE_BLOB,
    > -			    mtime, loose_obj_only, repo);
    > +			err = add_object(want_meta, idset, id, p,
    > +			    GOT_OBJ_TYPE_BLOB, mtime, loose_obj_only, repo);
    >  			if (err)
    >  				break;
    >  		}
    > @@ -720,7 +930,7 @@ load_tree_entries(struct got_object_id_queue *ids, str
    >  }
    >  
    >  static const struct got_error *
    > -load_tree(struct got_pack_metavec *v, struct got_object_idset *idset,
    > +load_tree(int want_meta, struct got_object_idset *idset,
    >      struct got_object_id *tree_id, const char *dpath, time_t mtime,
    >      int loose_obj_only, struct got_repository *repo,
    >      got_cancel_cb cancel_cb, void *cancel_arg)
    > @@ -754,15 +964,15 @@ load_tree(struct got_pack_metavec *v, struct got_objec
    >  			continue;
    >  		}
    >  
    > -		err = add_meta(v, idset, qid->id, dpath, GOT_OBJ_TYPE_TREE,
    > -		    mtime, loose_obj_only, repo);
    > +		err = add_object(want_meta, idset, qid->id, dpath,
    > +		    GOT_OBJ_TYPE_TREE, mtime, loose_obj_only, repo);
    >  		if (err) {
    >  			got_object_qid_free(qid);
    >  			break;
    >  		}
    >  
    > -		err = load_tree_entries(&tree_ids, v, idset, qid->id, dpath,
    > -		    mtime, repo, loose_obj_only, cancel_cb, cancel_arg);
    > +		err = load_tree_entries(&tree_ids, want_meta, idset, qid->id,
    > +		    dpath, mtime, repo, loose_obj_only, cancel_cb, cancel_arg);
    >  		got_object_qid_free(qid);
    >  		if (err)
    >  			break;
    > @@ -773,7 +983,7 @@ load_tree(struct got_pack_metavec *v, struct got_objec
    >  }
    >  
    >  static const struct got_error *
    > -load_commit(struct got_pack_metavec *v, struct got_object_idset *idset,
    > +load_commit(int want_meta, struct got_object_idset *idset,
    >      struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
    >      got_cancel_cb cancel_cb, void *cancel_arg)
    >  {
    > @@ -796,13 +1006,13 @@ load_commit(struct got_pack_metavec *v, struct got_obj
    >  	if (err)
    >  		return err;
    >  
    > -	err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_COMMIT,
    > +	err = add_object(want_meta, idset, id, "", GOT_OBJ_TYPE_COMMIT,
    >  	    got_object_commit_get_committer_time(commit),
    >  	    loose_obj_only, repo);
    >  	if (err)
    >  		goto done;
    >  
    > -	err = load_tree(v, idset, got_object_commit_get_tree_id(commit),
    > +	err = load_tree(want_meta, idset, got_object_commit_get_tree_id(commit),
    >  	    "", got_object_commit_get_committer_time(commit),
    >  	    loose_obj_only, repo, cancel_cb, cancel_arg);
    >  done:
    > @@ -811,7 +1021,7 @@ done:
    >  }
    >  
    >  static const struct got_error *
    > -load_tag(struct got_pack_metavec *v, struct got_object_idset *idset,
    > +load_tag(int want_meta, struct got_object_idset *idset,
    >      struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
    >      got_cancel_cb cancel_cb, void *cancel_arg)
    >  {
    > @@ -834,7 +1044,7 @@ load_tag(struct got_pack_metavec *v, struct got_object
    >  	if (err)
    >  		return err;
    >  
    > -	err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_TAG,
    > +	err = add_object(want_meta, idset, id, "", GOT_OBJ_TYPE_TAG,
    >  	    got_object_tag_get_tagger_time(tag),
    >  	    loose_obj_only, repo);
    >  	if (err)
    > @@ -842,13 +1052,14 @@ load_tag(struct got_pack_metavec *v, struct got_object
    >  
    >  	switch (got_object_tag_get_object_type(tag)) {
    >  	case GOT_OBJ_TYPE_COMMIT:
    > -		err = load_commit(v, idset,
    > +		err = load_commit(want_meta, idset,
    >  		    got_object_tag_get_object_id(tag), repo,
    >  		    loose_obj_only, cancel_cb, cancel_arg);
    >  		break;
    >  	case GOT_OBJ_TYPE_TREE:
    > -		err = load_tree(v, idset, got_object_tag_get_object_id(tag),
    > -		    "", got_object_tag_get_tagger_time(tag),
    > +		err = load_tree(want_meta, idset,
    > +		    got_object_tag_get_object_id(tag), "",
    > +		    got_object_tag_get_tagger_time(tag),
    >  		    loose_obj_only, repo, cancel_cb, cancel_arg);
    >  		break;
    >  	default:
    > @@ -1124,7 +1335,7 @@ done:
    >  }
    >  
    >  static const struct got_error *
    > -read_meta(struct got_pack_meta ***meta, int *nmeta,
    > +load_object_ids(struct got_object_idset *idset,
    >      struct got_object_id **theirs, int ntheirs,
    >      struct got_object_id **ours, int nours, struct got_repository *repo,
    >      int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg,
    > @@ -1132,25 +1343,8 @@ read_meta(struct got_pack_meta ***meta, int *nmeta,
    >  {
    >  	const struct got_error *err = NULL;
    >  	struct got_object_id **ids = NULL;
    > -	struct got_object_idset *idset;
    >  	int i, nobj = 0, obj_type;
    > -	struct got_pack_metavec v;
    >  
    > -	*meta = NULL;
    > -	*nmeta = 0;
    > -
    > -	idset = got_object_idset_alloc();
    > -	if (idset == NULL)
    > -		return got_error_from_errno("got_object_idset_alloc");
    > -
    > -	v.nmeta = 0;
    > -	v.metasz = 64;
    > -	v.meta = calloc(v.metasz, sizeof(struct got_pack_meta *));
    > -	if (v.meta == NULL) {
    > -		err = got_error_from_errno("calloc");
    > -		goto done;
    > -	}
    > -
    >  	err = findtwixt(&ids, &nobj, ours, nours, theirs, ntheirs, repo,
    >  	    cancel_cb, cancel_arg);
    >  	if (err || nobj == 0)
    > @@ -1165,79 +1359,81 @@ read_meta(struct got_pack_meta ***meta, int *nmeta,
    >  			return err;
    >  		if (obj_type != GOT_OBJ_TYPE_COMMIT)
    >  			continue;
    > -		err = load_commit(NULL, idset, id, repo,
    > +		err = load_commit(0, idset, id, repo,
    >  		    loose_obj_only, cancel_cb, cancel_arg);
    >  		if (err)
    >  			goto done;
    >  		err = report_progress(progress_cb, progress_arg, rl,
    > -		    0L, nours, v.nmeta, 0, 0);
    > +		    0L, nours, got_object_idset_num_elements(idset),
    > +		    0, 0);
    >  		if (err)
    >  			goto done;
    >  	}
    >  
    >  	for (i = 0; i < ntheirs; i++) {
    >  		struct got_object_id *id = theirs[i];
    > -		int *cached_type;
    > +		struct got_pack_meta *m;
    >  		if (id == NULL)
    >  			continue;
    > -		cached_type = got_object_idset_get(idset, id);
    > -		if (cached_type == NULL) {
    > +		m = got_object_idset_get(idset, id);
    > +		if (m == NULL) {
    >  			err = got_object_get_type(&obj_type, repo, id);
    >  			if (err)
    >  				goto done;
    >  		} else
    > -			obj_type = *cached_type;
    > +			obj_type = m->obj_type;
    >  		if (obj_type != GOT_OBJ_TYPE_TAG)
    >  			continue;
    > -		err = load_tag(NULL, idset, id, repo,
    > +		err = load_tag(0, idset, id, repo,
    >  		    loose_obj_only, cancel_cb, cancel_arg);
    >  		if (err)
    >  			goto done;
    >  		err = report_progress(progress_cb, progress_arg, rl,
    > -		    0L, nours, v.nmeta, 0, 0);
    > +		    0L, nours, got_object_idset_num_elements(idset), 0, 0);
    >  		if (err)
    >  			goto done;
    >  	}
    >  
    >  	for (i = 0; i < nobj; i++) {
    > -		err = load_commit(&v, idset, ids[i], repo,
    > +		err = load_commit(1, idset, ids[i], repo,
    >  		    loose_obj_only, cancel_cb, cancel_arg);
    >  		if (err)
    >  			goto done;
    >  		if (err)
    >  			goto done;
    >  		err = report_progress(progress_cb, progress_arg, rl,
    > -		    0L, nours, v.nmeta, 0, 0);
    > +		    0L, nours, got_object_idset_num_elements(idset), 0, 0);
    >  		if (err)
    >  			goto done;
    >  	}
    >  
    >  	for (i = 0; i < nours; i++) {
    >  		struct got_object_id *id = ours[i];
    > -		int *cached_type;
    > +		struct got_pack_meta *m;
    >  		if (id == NULL)
    >  			continue;
    > -		cached_type = got_object_idset_get(idset, id);
    > -		if (cached_type == NULL) {
    > +		m = got_object_idset_get(idset, id);
    > +		if (m == NULL) {
    >  			err = got_object_get_type(&obj_type, repo, id);
    >  			if (err)
    >  				goto done;
    >  		} else
    > -			obj_type = *cached_type;
    > +			obj_type = m->obj_type;
    >  		if (obj_type != GOT_OBJ_TYPE_TAG)
    >  			continue;
    > -		err = load_tag(&v, idset, id, repo,
    > +		err = load_tag(1, idset, id, repo,
    >  		    loose_obj_only, cancel_cb, cancel_arg);
    >  		if (err)
    >  			goto done;
    >  		err = report_progress(progress_cb, progress_arg, rl,
    > -		    0L, nours, v.nmeta, 0, 0);
    > +		    0L, nours, got_object_idset_num_elements(idset), 0, 0);
    >  		if (err)
    >  			goto done;
    >  	}
    >  
    >  	if (progress_cb) {
    > -		err = progress_cb(progress_arg, 0L, nours, v.nmeta, 0, 0);
    > +		err = progress_cb(progress_arg, 0L, nours,
    > +		    got_object_idset_num_elements(idset), 0, 0);
    >  		if (err)
    >  			goto done;
    >  	}
    > @@ -1246,13 +1442,6 @@ done:
    >  		free(ids[i]);
    >  	}
    >  	free(ids);
    > -	got_object_idset_free(idset);
    > -	if (err == NULL) {
    > -		*meta = v.meta;
    > -		*nmeta = v.nmeta;
    > -	} else
    > -		free(v.meta);
    > -
    >  	return err;
    >  }
    >  
    > @@ -1295,6 +1484,21 @@ write_order_cmp(const void *pa, const void *pb)
    >  	return a->mtime - b->mtime;
    >  }
    >  
    > +static int
    > +reuse_write_order_cmp(const void *pa, const void *pb)
    > +{
    > +	struct got_pack_meta *a, *b;
    > +
    > +	a = *(struct got_pack_meta **)pa;
    > +	b = *(struct got_pack_meta **)pb;
    > +
    > +	if (a->reused_delta_offset < b->reused_delta_offset)
    > +		return -1;
    > +	if (a->reused_delta_offset > b->reused_delta_offset)
    > +		return 1;
    > +	return 0;
    > +}
    > +
    >  static const struct got_error *
    >  packhdr(int *hdrlen, char *hdr, size_t bufsize, int obj_type, size_t len)
    >  {
    > @@ -1337,13 +1541,13 @@ packoff(char *hdr, off_t off)
    >  
    >  static const struct got_error *
    >  deltahdr(off_t *packfile_size, SHA1_CTX *ctx, FILE *packfile,
    > -    struct got_pack_meta *m, int use_offset_deltas)
    > +    struct got_pack_meta *m)
    >  {
    >  	const struct got_error *err;
    >  	char buf[32];
    >  	int nh;
    >  
    > -	if (use_offset_deltas && m->prev->off != 0) {
    > +	if (m->prev->off != 0) {
    >  		err = packhdr(&nh, buf, sizeof(buf),
    >  		    GOT_OBJ_TYPE_OFFSET_DELTA, m->delta_len);
    >  		if (err)
    > @@ -1373,27 +1577,104 @@ deltahdr(off_t *packfile_size, SHA1_CTX *ctx, FILE *pa
    >  }
    >  
    >  static const struct got_error *
    > +write_packed_object(off_t *packfile_size, FILE *packfile,
    > +    FILE *delta_cache, struct got_pack_meta *m, int *outfd,
    > +    SHA1_CTX *ctx, struct got_repository *repo)
    > +{
    > +	const struct got_error *err = NULL;
    > +	struct got_deflate_checksum csum;
    > +	char buf[32];
    > +	int nh;
    > +	struct got_raw_object *raw = NULL;
    > +	off_t outlen;
    > +
    > +	csum.output_sha1 = ctx;
    > +	csum.output_crc = NULL;
    > +
    > +	m->off = ftello(packfile);
    > +	if (m->delta_len == 0) {
    > +		err = got_object_raw_open(&raw, outfd, repo, &m->id);
    > +		if (err)
    > +			goto done;
    > +		err = packhdr(&nh, buf, sizeof(buf),
    > +		    m->obj_type, raw->size);
    > +		if (err)
    > +			goto done;
    > +		err = hwrite(packfile, buf, nh, ctx);
    > +		if (err)
    > +			goto done;
    > +		*packfile_size += nh;
    > +		if (raw->f == NULL) {
    > +			err = got_deflate_to_file_mmap(&outlen,
    > +			    raw->data + raw->hdrlen, 0, raw->size,
    > +			    packfile, &csum);
    > +			if (err)
    > +				goto done;
    > +		} else {
    > +			if (fseeko(raw->f, raw->hdrlen, SEEK_SET)
    > +			    == -1) {
    > +				err = got_error_from_errno("fseeko");
    > +				goto done;
    > +			}
    > +			err = got_deflate_to_file(&outlen, raw->f,
    > +			    raw->size, packfile, &csum);
    > +			if (err)
    > +				goto done;
    > +		}
    > +		*packfile_size += outlen;
    > +		got_object_raw_close(raw);
    > +		raw = NULL;
    > +	} else if (m->delta_buf) {
    > +		err = deltahdr(packfile_size, ctx, packfile, m);
    > +		if (err)
    > +			goto done;
    > +		err = got_deflate_to_file_mmap(&outlen,
    > +		    m->delta_buf, 0, m->delta_len, packfile, &csum);
    > +		if (err)
    > +			goto done;
    > +		*packfile_size += outlen;
    > +		free(m->delta_buf);
    > +		m->delta_buf = NULL;
    > +	} else {
    > +		if (fseeko(delta_cache, m->delta_offset, SEEK_SET)
    > +		    == -1) {
    > +			err = got_error_from_errno("fseeko");
    > +			goto done;
    > +		}
    > +		err = deltahdr(packfile_size, ctx, packfile, m);
    > +		if (err)
    > +			goto done;
    > +		err = got_deflate_to_file(&outlen, delta_cache,
    > +		    m->delta_len, packfile, &csum);
    > +		if (err)
    > +			goto done;
    > +		*packfile_size += outlen;
    > +	}
    > +done:
    > +	if (raw)
    > +		got_object_raw_close(raw);
    > +	return err;
    > +}
    > +
    > +static const struct got_error *
    >  genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delta_cache,
    > -    struct got_pack_meta **meta, int nmeta, int nours,
    > -    int use_offset_deltas, struct got_repository *repo,
    > +    struct got_pack_meta **deltify, int ndeltify,
    > +    struct got_pack_meta **reuse, int nreuse,
    > +    int nours, struct got_repository *repo,
    >      got_pack_progress_cb progress_cb, void *progress_arg,
    >      struct got_ratelimit *rl,
    >      got_cancel_cb cancel_cb, void *cancel_arg)
    >  {
    >  	const struct got_error *err = NULL;
    > -	int i, nh;
    > +	int i;
    >  	SHA1_CTX ctx;
    >  	struct got_pack_meta *m;
    > -	struct got_raw_object *raw = NULL;
    >  	char buf[32];
    >  	size_t n;
    > -	struct got_deflate_checksum csum;
    > -	off_t outlen, packfile_size = 0;
    > +	off_t packfile_size = 0;
    >  	int outfd = -1;
    >  
    >  	SHA1Init(&ctx);
    > -	csum.output_sha1 = &ctx;
    > -	csum.output_crc = NULL;
    >  
    >  	err = hwrite(packfile, "PACK", 4, &ctx);
    >  	if (err)
    > @@ -1402,79 +1683,41 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
    >  	err = hwrite(packfile, buf, 4, &ctx);
    >  	if (err)
    >  		goto done;
    > -	putbe32(buf, nmeta);
    > +	putbe32(buf, ndeltify + nreuse);
    >  	err = hwrite(packfile, buf, 4, &ctx);
    >  	if (err)
    >  		goto done;
    > -	qsort(meta, nmeta, sizeof(struct got_pack_meta *), write_order_cmp);
    > -	for (i = 0; i < nmeta; i++) {
    > +
    > +	qsort(deltify, ndeltify, sizeof(struct got_pack_meta *),
    > +	    write_order_cmp);
    > +	for (i = 0; i < ndeltify; i++) {
    >  		err = report_progress(progress_cb, progress_arg, rl,
    > -		    packfile_size, nours, nmeta, nmeta, i);
    > +		    packfile_size, nours, ndeltify + nreuse,
    > +		    ndeltify + nreuse, i);
    >  		if (err)
    >  			goto done;
    > -		m = meta[i];
    > -		m->off = ftello(packfile);
    > -		if (m->delta_len == 0) {
    > -			err = got_object_raw_open(&raw, &outfd, repo, &m->id);
    > -			if (err)
    > -				goto done;
    > -			err = packhdr(&nh, buf, sizeof(buf),
    > -			    m->obj_type, raw->size);
    > -			if (err)
    > -				goto done;
    > -			err = hwrite(packfile, buf, nh, &ctx);
    > -			if (err)
    > -				goto done;
    > -			packfile_size += nh;
    > -			if (raw->f == NULL) {
    > -				err = got_deflate_to_file_mmap(&outlen,
    > -				    raw->data + raw->hdrlen, 0, raw->size,
    > -				    packfile, &csum);
    > -				if (err)
    > -					goto done;
    > -			} else {
    > -				if (fseeko(raw->f, raw->hdrlen, SEEK_SET)
    > -				    == -1) {
    > -					err = got_error_from_errno("fseeko");
    > -					goto done;
    > -				}
    > -				err = got_deflate_to_file(&outlen, raw->f,
    > -				    raw->size, packfile, &csum);
    > -				if (err)
    > -					goto done;
    > -			}
    > -			packfile_size += outlen;
    > -			got_object_raw_close(raw);
    > -			raw = NULL;
    > -		} else if (m->delta_buf) {
    > -			err = deltahdr(&packfile_size, &ctx, packfile,
    > -			    m, use_offset_deltas);
    > -			if (err)
    > -				goto done;
    > - 			err = got_deflate_to_file_mmap(&outlen,
    > -			    m->delta_buf, 0, m->delta_len, packfile, &csum);
    > - 			if (err)
    > - 				goto done;
    > - 			packfile_size += outlen;
    > -			free(m->delta_buf);
    > -			m->delta_buf = NULL;
    > -		} else {
    > -			if (fseeko(delta_cache, m->delta_offset, SEEK_SET)
    > -			    == -1) {
    > -				err = got_error_from_errno("fseeko");
    > -				goto done;
    > -			}
    > -			err = deltahdr(&packfile_size, &ctx, packfile,
    > -			    m, use_offset_deltas);
    > -			if (err)
    > -				goto done;
    > -			err = got_deflate_to_file(&outlen, delta_cache,
    > -			    m->delta_len, packfile, &csum);
    > -			if (err)
    > -				goto done;
    > -			packfile_size += outlen;
    > -		}
    > +		m = deltify[i];
    > +		err = write_packed_object(&packfile_size, packfile,
    > +		    delta_cache, m, &outfd, &ctx, repo);
    > +		if (err)
    > +			goto done;
    >  	}
    > +
    > +	qsort(reuse, nreuse, sizeof(struct got_pack_meta *),
    > +	    reuse_write_order_cmp);
    > +	for (i = 0; i < nreuse; i++) {
    > +		err = report_progress(progress_cb, progress_arg, rl,
    > +		    packfile_size, nours, ndeltify + nreuse,
    > +		    ndeltify + nreuse, ndeltify + i);
    > +		if (err)
    > +			goto done;
    > +		m = reuse[i];
    > +		err = write_packed_object(&packfile_size, packfile,
    > +		    delta_cache, m, &outfd, &ctx, repo);
    > +		if (err)
    > +			goto done;
    > +	}
    > +
    >  	SHA1Final(pack_sha1, &ctx);
    >  	n = fwrite(pack_sha1, 1, SHA1_DIGEST_LENGTH, packfile);
    >  	if (n != SHA1_DIGEST_LENGTH)
    > @@ -1483,18 +1726,50 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt
    >  	packfile_size += sizeof(struct got_packfile_hdr);
    >  	if (progress_cb) {
    >  		err = progress_cb(progress_arg, packfile_size, nours,
    > -		    nmeta, nmeta, nmeta);
    > +		    ndeltify + nreuse, ndeltify + nreuse,
    > +		    ndeltify + nreuse);
    >  		if (err)
    >  			goto done;
    >  	}
    >  done:
    > -	if (raw)
    > -		got_object_raw_close(raw);
    >  	if (outfd != -1 && close(outfd) == -1 && err == NULL)
    >  		err = got_error_from_errno("close");
    >  	return err;
    >  }
    >  
    > +static const struct got_error *
    > +remove_unused_object(struct got_object_idset_element *entry, void *arg)
    > +{
    > +	struct got_object_idset *idset = arg;
    > +
    > +	if (got_object_idset_get_element_data(entry) == NULL)
    > +		got_object_idset_remove_element(idset, entry);
    > +
    > +	return NULL;
    > +}
    > +
    > +static const struct got_error *
    > +remove_reused_object(struct got_object_idset_element *entry, void *arg)
    > +{
    > +	struct got_object_idset *idset = arg;
    > +	struct got_pack_meta *m;
    > +
    > +	m = got_object_idset_get_element_data(entry);
    > +	if (m->have_reused_delta)
    > +		got_object_idset_remove_element(idset, entry);
    > +
    > +	return NULL;
    > +}
    > +
    > +static const struct got_error *
    > +add_meta_idset_cb(struct got_object_id *id, void *data, void *arg)
    > +{
    > +	struct got_pack_meta *m = data;
    > +	struct got_pack_metavec *v = arg;
    > +
    > +	return add_meta(m, v);
    > +}
    > +
    >  const struct got_error *
    >  got_pack_create(uint8_t *packsha1, FILE *packfile,
    >      struct got_object_id **theirs, int ntheirs,
    > @@ -1504,32 +1779,88 @@ got_pack_create(uint8_t *packsha1, FILE *packfile,
    >      got_cancel_cb cancel_cb, void *cancel_arg)
    >  {
    >  	const struct got_error *err;
    > -	struct got_pack_meta **meta;
    > -	int nmeta;
    > +	int delta_cache_fd = -1;
    >  	FILE *delta_cache = NULL;
    > +	struct got_object_idset *idset;
    >  	struct got_ratelimit rl;
    > +	struct got_pack_metavec deltify, reuse;
    >  
    > +	memset(&deltify, 0, sizeof(deltify));
    > +	memset(&reuse, 0, sizeof(reuse));
    > +
    >  	got_ratelimit_init(&rl, 0, 500);
    >  
    > -	err = read_meta(&meta, &nmeta, theirs, ntheirs, ours, nours, repo,
    > -	    loose_obj_only, progress_cb, progress_arg, &rl,
    > +	idset = got_object_idset_alloc();
    > +	if (idset == NULL)
    > +		return got_error_from_errno("got_object_idset_alloc");
    > +
    > +	err = load_object_ids(idset, theirs, ntheirs, ours, nours,
    > +	    repo, loose_obj_only, progress_cb, progress_arg, &rl,
    >  	    cancel_cb, cancel_arg);
    >  	if (err)
    >  		return err;
    >  
    > -	if (nmeta == 0 && !allow_empty) {
    > +	err = got_object_idset_for_each_element(idset,
    > +	    remove_unused_object, idset);
    > +	if (err)
    > +		goto done;
    > +
    > +	if (got_object_idset_num_elements(idset) == 0 && !allow_empty) {
    >  		err = got_error(GOT_ERR_CANNOT_PACK);
    >  		goto done;
    >  	}
    >  
    > -	delta_cache = got_opentemp();
    > -	if (delta_cache == NULL) {
    > +	delta_cache_fd = got_opentempfd();
    > +	if (delta_cache_fd == -1) {
    >  		err = got_error_from_errno("got_opentemp");
    >  		goto done;
    >  	}
    >  
    > -	if (nmeta > 0) {
    > -		err = pick_deltas(meta, nmeta, nours, delta_cache, repo,
    > +	reuse.metasz = 64;
    > +	reuse.meta = calloc(reuse.metasz,
    > +	    sizeof(struct got_pack_meta *));
    > +	if (reuse.meta == NULL) {
    > +		err = got_error_from_errno("calloc");
    > +		goto done;
    > +	}
    > +
    > +	err = search_deltas(&reuse, idset, delta_cache_fd, nours, repo,
    > +	    progress_cb, progress_arg, &rl, cancel_cb, cancel_arg);
    > +	if (err)
    > +		goto done;
    > +	if (reuse.nmeta > 0) {
    > +		err = got_object_idset_for_each_element(idset,
    > +		    remove_reused_object, idset);
    > +		if (err)
    > +			goto done;
    > +	}
    > +
    > +	delta_cache = fdopen(delta_cache_fd, "a+");
    > +	if (delta_cache == NULL) {
    > +		err = got_error_from_errno("fdopen");
    > +		goto done;
    > +	}
    > +	delta_cache_fd = -1;
    > +
    > +	if (fseeko(delta_cache, 0L, SEEK_END) == -1) {
    > +		err = got_error_from_errno("fseeko");
    > +		goto done;
    > +	}
    > +
    > +	deltify.meta = calloc(got_object_idset_num_elements(idset),
    > +	    sizeof(struct got_pack_meta *));
    > +	if (deltify.meta == NULL) {
    > +		err = got_error_from_errno("calloc");
    > +		goto done;
    > +	}
    > +	deltify.metasz = got_object_idset_num_elements(idset);
    > +
    > +	err = got_object_idset_for_each(idset, add_meta_idset_cb, &deltify);
    > +	if (err)
    > +		goto done;
    > +	if (deltify.nmeta > 0) {
    > +		err = pick_deltas(deltify.meta, deltify.nmeta, nours,
    > +		    reuse.nmeta, delta_cache, repo,
    >  		    progress_cb, progress_arg, &rl, cancel_cb, cancel_arg);
    >  		if (err)
    >  			goto done;
    > @@ -1539,12 +1870,17 @@ got_pack_create(uint8_t *packsha1, FILE *packfile,
    >  		}
    >  	}
    >  
    > -	err = genpack(packsha1, packfile, delta_cache, meta, nmeta, nours, 1,
    > -	    repo, progress_cb, progress_arg, &rl, cancel_cb, cancel_arg);
    > +	err = genpack(packsha1, packfile, delta_cache, deltify.meta,
    > +	    deltify.nmeta, reuse.meta, reuse.nmeta, nours, repo,
    > +	    progress_cb, progress_arg, &rl, cancel_cb, cancel_arg);
    >  	if (err)
    >  		goto done;
    >  done:
    > -	free_nmeta(meta, nmeta);
    > +	free_nmeta(deltify.meta, deltify.nmeta);
    > +	free_nmeta(reuse.meta, reuse.nmeta);
    > +	got_object_idset_free(idset);
    > +	if (delta_cache_fd != -1 && close(delta_cache_fd) == -1 && err == NULL)
    > +		err = got_error_from_errno("close");
    >  	if (delta_cache && fclose(delta_cache) == EOF && err == NULL)
    >  		err = got_error_from_errno("fclose");
    >  	return err;
    > blob - 9efd7ae9b91bb9c96b3d3a23e15d29ff1e58f89f
    > blob + 48e50b65411ba0b6ffccd2853fe7f30044f9f398
    > --- lib/privsep.c
    > +++ lib/privsep.c
    > @@ -2728,6 +2728,107 @@ got_privsep_recv_traversed_commits(struct got_commit_o
    >  }
    >  
    >  const struct got_error *
    > +got_privsep_send_raw_delta_req(struct imsgbuf *ibuf, int idx,
    > +    struct got_object_id *id)
    > +{
    > +	struct got_imsg_raw_delta_request dreq;
    > +
    > +	dreq.idx = idx;
    > +	memcpy(dreq.id, id->sha1, SHA1_DIGEST_LENGTH);
    > +
    > +	if (imsg_compose(ibuf, GOT_IMSG_RAW_DELTA_REQUEST, 0, 0, -1,
    > +	    &dreq, sizeof(dreq)) == -1)
    > +		return got_error_from_errno("imsg_compose RAW_DELTA_REQUEST");
    > +
    > +	return flush_imsg(ibuf);
    > +}
    > +
    > +const struct got_error *
    > +got_privsep_send_raw_delta_outfd(struct imsgbuf *ibuf, int fd)
    > +{
    > +	return send_fd(ibuf, GOT_IMSG_RAW_DELTA_OUTFD, fd);
    > +}
    > +
    > +const struct got_error *
    > +got_privsep_send_raw_delta(struct imsgbuf *ibuf, uint64_t base_size,
    > +    uint64_t result_size,  off_t delta_size, off_t delta_offset,
    > +    off_t delta_out_offset, struct got_object_id *base_id)
    > +{
    > +	struct got_imsg_raw_delta idelta;
    > +	int ret;
    > +
    > +	idelta.base_size = base_size;
    > +	idelta.result_size = result_size;
    > +	idelta.delta_size = delta_size;
    > +	idelta.delta_offset = delta_offset;
    > +	idelta.delta_out_offset = delta_out_offset;
    > +	memcpy(idelta.base_id, base_id->sha1, SHA1_DIGEST_LENGTH);
    > +
    > +	ret = imsg_compose(ibuf, GOT_IMSG_RAW_DELTA, 0, 0, -1,
    > +	    &idelta, sizeof(idelta));
    > +	if (ret == -1)
    > +		return got_error_from_errno("imsg_compose RAW_DELTA");
    > +
    > +	return flush_imsg(ibuf);
    > +}
    > +
    > +const struct got_error *
    > +got_privsep_recv_raw_delta(uint64_t *base_size, uint64_t *result_size,
    > +    off_t *delta_size, off_t *delta_offset, off_t *delta_out_offset,
    > +    struct got_object_id **base_id, struct imsgbuf *ibuf)
    > +{
    > +	const struct got_error *err = NULL;
    > +	struct imsg imsg;
    > +	struct got_imsg_raw_delta *delta;
    > +	size_t datalen;
    > +
    > +	*base_size = 0;
    > +	*result_size = 0;
    > +	*delta_size = 0;
    > +	*delta_offset = 0;
    > +	*delta_out_offset = 0;
    > +	*base_id = NULL;
    > +
    > +	err = got_privsep_recv_imsg(&imsg, ibuf, 0);
    > +	if (err)
    > +		return err;
    > +
    > +	datalen = imsg.hdr.len - IMSG_HEADER_SIZE;
    > +
    > +	switch (imsg.hdr.type) {
    > +	case GOT_IMSG_RAW_DELTA:
    > +		if (datalen != sizeof(*delta)) {
    > +			err = got_error(GOT_ERR_PRIVSEP_LEN);
    > +			break;
    > +		}
    > +		delta = imsg.data;
    > +		*base_size = delta->base_size;
    > +		*result_size = delta->result_size;
    > +		*delta_size = delta->delta_size;
    > +		*delta_offset = delta->delta_offset;
    > +		*delta_out_offset = delta->delta_out_offset;
    > +		*base_id = calloc(1, sizeof(**base_id));
    > +		if (*base_id == NULL) {
    > +			err = got_error_from_errno("malloc");
    > +			break;
    > +		}
    > +		memcpy((*base_id)->sha1, delta->base_id, SHA1_DIGEST_LENGTH);
    > +		break;
    > +	default:
    > +		err = got_error(GOT_ERR_PRIVSEP_MSG);
    > +		break;
    > +	}
    > +
    > +	imsg_free(&imsg);
    > +
    > +	if (err) {
    > +		free(*base_id);
    > +		*base_id = NULL;
    > +	}
    > +	return err;
    > +}
    > +
    > +const struct got_error *
    >  got_privsep_unveil_exec_helpers(void)
    >  {
    >  	const char *helpers[] = {
    > blob - 40f5562db7ad3596203bf083a5084f34f1eb1b05
    > blob + 255d035fd6d1b3bd7f0720a14a8463799f261c61
    > --- lib/repository.c
    > +++ lib/repository.c
    > @@ -1020,8 +1020,8 @@ get_packidx_bloom_filter(struct got_repository *repo,
    >  	    &repo->packidx_bloom_filters, &key);
    >  }
    >  
    > -static int
    > -check_packidx_bloom_filter(struct got_repository *repo,
    > +int
    > +got_repo_check_packidx_bloom_filter(struct got_repository *repo,
    >      const char *path_packidx, struct got_object_id *id)
    >  {
    >  	struct got_packidx_bloom_filter *bf;
    > @@ -1103,7 +1103,7 @@ got_repo_search_packidx(struct got_packidx **packidx, 
    >  	for (i = 0; i < repo->pack_cache_size; i++) {
    >  		if (repo->packidx_cache[i] == NULL)
    >  			break;
    > -		if (!check_packidx_bloom_filter(repo,
    > +		if (!got_repo_check_packidx_bloom_filter(repo,
    >  		    repo->packidx_cache[i]->path_packidx, id))
    >  			continue; /* object will not be found in this index */
    >  		*idx = got_packidx_get_object_idx(repo->packidx_cache[i], id);
    > @@ -1154,7 +1154,8 @@ got_repo_search_packidx(struct got_packidx **packidx, 
    >  			goto done;
    >  		}
    >  
    > -		if (!check_packidx_bloom_filter(repo, path_packidx, id)) {
    > +		if (!got_repo_check_packidx_bloom_filter(repo,
    > +		    path_packidx, id)) {
    >  			free(path_packidx);
    >  			continue; /* object will not be found in this index */
    >  		}
    > @@ -1205,6 +1206,92 @@ done:
    >  	return err;
    >  }
    >  
    > +const struct got_error *
    > +got_repo_list_packidx(struct got_pathlist_head *packidx_paths,
    > +    struct got_repository *repo)
    > +{
    > +	const struct got_error *err = NULL;
    > +	DIR *packdir = NULL;
    > +	struct dirent *dent;
    > +	char *path_packidx = NULL;
    > +	int packdir_fd;
    > +
    > +	packdir_fd = openat(got_repo_get_fd(repo),
    > +	    GOT_OBJECTS_PACK_DIR, O_DIRECTORY | O_CLOEXEC);
    > +	if (packdir_fd == -1) {
    > +		return got_error_from_errno_fmt("openat: %s/%s",
    > +		    got_repo_get_path_git_dir(repo),
    > +		    GOT_OBJECTS_PACK_DIR);
    > +	}
    > +
    > +	packdir = fdopendir(packdir_fd);
    > +	if (packdir == NULL) {
    > +		err = got_error_from_errno("fdopendir");
    > +		goto done;
    > +	}
    > +
    > +	while ((dent = readdir(packdir)) != NULL) {
    > +		if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen))
    > +			continue;
    > +
    > +		if (asprintf(&path_packidx, "%s/%s", GOT_OBJECTS_PACK_DIR,
    > +		    dent->d_name) == -1) {
    > +			err = got_error_from_errno("asprintf");
    > +			path_packidx = NULL;
    > +			break;
    > +		}
    > +
    > +		err = got_pathlist_append(packidx_paths, path_packidx, NULL);
    > +		if (err)
    > +			break;
    > +	}
    > +done:
    > +	if (err)
    > +		free(path_packidx);
    > +	if (packdir && closedir(packdir) != 0 && err == NULL)
    > +		err = got_error_from_errno("closedir");
    > +	return err;
    > +}
    > +
    > +const struct got_error *
    > +got_repo_get_packidx(struct got_packidx **packidx, const char *path_packidx,
    > +    struct got_repository *repo)
    > +{
    > +	const struct got_error *err;
    > +	size_t i;
    > +
    > +	*packidx = NULL;
    > +
    > +	/* Search pack index cache. */
    > +	for (i = 0; i < repo->pack_cache_size; i++) {
    > +		if (repo->packidx_cache[i] == NULL)
    > +			break;
    > +		if (strcmp(repo->packidx_cache[i]->path_packidx,
    > +		    path_packidx) == 0) {
    > +			*packidx = repo->packidx_cache[i];
    > +			return NULL;
    > +		}
    > +	}
    > +	/* No luck. Search the filesystem. */
    > +
    > +	err = got_packidx_open(packidx, got_repo_get_fd(repo),
    > +	    path_packidx, 0);
    > +	if (err)
    > +		return err;
    > +
    > +	err = add_packidx_bloom_filter(repo, *packidx, path_packidx);
    > +	if (err)
    > +		goto done;
    > +
    > +	err = cache_packidx(repo, *packidx, path_packidx);
    > +done:
    > +	if (err) {
    > +		got_packidx_close(*packidx);
    > +		*packidx = NULL;
    > +	}
    > +	return err;
    > +}
    > +
    >  static const struct got_error *
    >  read_packfile_hdr(int fd, struct got_packidx *packidx)
    >  {
    > blob - f9be16f797d60019bfb883aac0e9b41c7beb846a
    > blob + 422da924a77fc3864bcdfbc5e266f568fab8cef3
    > --- lib/repository_admin.c
    > +++ lib/repository_admin.c
    > @@ -140,7 +140,8 @@ const struct got_error *
    >  got_repo_pack_objects(FILE **packfile, struct got_object_id **pack_hash,
    >      struct got_reflist_head *include_refs,
    >      struct got_reflist_head *exclude_refs, struct got_repository *repo,
    > -    int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg,
    > +    int loose_obj_only,
    > +    got_pack_progress_cb progress_cb, void *progress_arg,
    >      got_cancel_cb cancel_cb, void *cancel_arg)
    >  {
    >  	const struct got_error *err = NULL;
    > blob - 1ea0d617c52732faf35724fe601e28a24b03992f
    > blob + a14d051c87c9e8f5b75dd5f4bcf67486dfde5f4c
    > --- libexec/got-read-pack/got-read-pack.c
    > +++ libexec/got-read-pack/got-read-pack.c
    > @@ -289,11 +289,10 @@ done:
    >  }
    >  
    >  static const struct got_error *
    > -receive_tempfile(FILE **basefile, FILE **accumfile, struct imsg *imsg,
    > +receive_tempfile(FILE **f, const char *mode, struct imsg *imsg,
    >      struct imsgbuf *ibuf)
    >  {
    >  	size_t datalen;
    > -	FILE **f;
    >  
    >  	datalen = imsg->hdr.len - IMSG_HEADER_SIZE;
    >  	if (datalen != 0)
    > @@ -302,14 +301,7 @@ receive_tempfile(FILE **basefile, FILE **accumfile, st
    >  	if (imsg->fd == -1)
    >  		return got_error(GOT_ERR_PRIVSEP_NO_FD);
    >  
    > -	if (*basefile == NULL)
    > -		f = basefile;
    > -	else if (*accumfile == NULL)
    > -		f = accumfile;
    > -	else
    > -		return got_error(GOT_ERR_PRIVSEP_MSG);
    > -
    > -	*f = fdopen(imsg->fd, "w+");
    > +	*f = fdopen(imsg->fd, mode);
    >  	if (*f == NULL)
    >  		return got_error_from_errno("fdopen");
    >  	imsg->fd = -1;
    > @@ -854,9 +846,80 @@ done:
    >  	return err;
    >  }
    >  
    > +static const struct got_error *
    > +get_base_object_id(struct got_object_id *base_id, struct got_packidx *packidx,
    > +    off_t base_offset)
    > +{
    > +	const struct got_error *err;
    > +	int idx;
    >  
    > +	err = got_packidx_get_offset_idx(&idx, packidx, base_offset); 
    > +	if (err)
    > +		return err;
    > +	if (idx == -1)
    > +		return got_error(GOT_ERR_BAD_PACKIDX);
    >  
    > +	return got_packidx_get_object_id(base_id, packidx, idx);
    > +}
    > +
    >  static const struct got_error *
    > +raw_delta_request(struct imsg *imsg, struct imsgbuf *ibuf,
    > +    FILE *delta_outfile, struct got_pack *pack,
    > +    struct got_packidx *packidx)
    > +{
    > +	const struct got_error *err = NULL;
    > +	struct got_imsg_raw_delta_request req;
    > +	size_t datalen, delta_size;
    > +	off_t delta_offset;
    > +	uint8_t *delta_buf = NULL;
    > +	struct got_object_id id, base_id;
    > +	off_t base_offset, delta_out_offset = 0;
    > +	uint64_t base_size = 0, result_size = 0;
    > +	size_t w;
    > +
    > +	datalen = imsg->hdr.len - IMSG_HEADER_SIZE;
    > +	if (datalen != sizeof(req))
    > +		return got_error(GOT_ERR_PRIVSEP_LEN);
    > +	memcpy(&req, imsg->data, sizeof(req));
    > +	memcpy(id.sha1, req.id, SHA1_DIGEST_LENGTH);
    > +
    > +	imsg->fd = -1;
    > +
    > +	err = got_packfile_extract_raw_delta(&delta_buf, &delta_size,
    > +	    &delta_offset, &base_offset, &base_id, &base_size, &result_size,
    > +	    pack, packidx, req.idx);
    > +	if (err)
    > +		goto done;
    > +
    > +	/*
    > +	 * If this is an offset delta we must determine the base
    > +	 * object ID ourselves.
    > +	 */
    > +	if (base_offset != 0) {
    > +		err = get_base_object_id(&base_id, packidx, base_offset);
    > +		if (err)
    > +			goto done;
    > +	}
    > +
    > +	delta_out_offset = ftello(delta_outfile);
    > +	w = fwrite(delta_buf, 1, delta_size, delta_outfile);
    > +	if (w != delta_size) {
    > +		err = got_ferror(delta_outfile, GOT_ERR_IO);
    > +		goto done;
    > +	}
    > +	if (fflush(delta_outfile) == -1) {
    > +		err = got_error_from_errno("fflush");
    > +		goto done;
    > +	}
    > +
    > +	err = got_privsep_send_raw_delta(ibuf, base_size, result_size,
    > +	    delta_size, delta_offset, delta_out_offset, &base_id);
    > +done:
    > +	free(delta_buf);
    > +	return err;
    > +}
    > +
    > +static const struct got_error *
    >  receive_packidx(struct got_packidx **packidx, struct imsgbuf *ibuf)
    >  {
    >  	const struct got_error *err = NULL;
    > @@ -1009,7 +1072,7 @@ main(int argc, char *argv[])
    >  	struct got_packidx *packidx = NULL;
    >  	struct got_pack *pack = NULL;
    >  	struct got_object_cache objcache;
    > -	FILE *basefile = NULL, *accumfile = NULL;
    > +	FILE *basefile = NULL, *accumfile = NULL, *delta_outfile = NULL;
    >  
    >  	//static int attached;
    >  	//while (!attached) sleep(1);
    > @@ -1066,8 +1129,14 @@ main(int argc, char *argv[])
    >  
    >  		switch (imsg.hdr.type) {
    >  		case GOT_IMSG_TMPFD:
    > -			err = receive_tempfile(&basefile, &accumfile,
    > -			    &imsg, &ibuf);
    > +			if (basefile == NULL) {
    > +				err = receive_tempfile(&basefile, "w+",
    > +				   &imsg, &ibuf);
    > +			} else if (accumfile == NULL) {
    > +				err = receive_tempfile(&accumfile, "w+",
    > +				   &imsg, &ibuf);
    > +			} else
    > +				err = got_error(GOT_ERR_PRIVSEP_MSG);
    >  			break;
    >  		case GOT_IMSG_PACKED_OBJECT_REQUEST:
    >  			err = object_request(&imsg, &ibuf, pack, packidx,
    > @@ -1081,6 +1150,22 @@ main(int argc, char *argv[])
    >  			err = raw_object_request(&imsg, &ibuf, pack, packidx,
    >  			    &objcache, basefile, accumfile);
    >  			break;
    > +		case GOT_IMSG_RAW_DELTA_OUTFD:
    > +			if (delta_outfile != NULL) {
    > +				err = got_error(GOT_ERR_PRIVSEP_MSG);
    > +				break;
    > +			}
    > +			err = receive_tempfile(&delta_outfile, "w",
    > +			    &imsg, &ibuf);
    > +			break;
    > +		case GOT_IMSG_RAW_DELTA_REQUEST:
    > +			if (delta_outfile == NULL) {
    > +				err = got_error(GOT_ERR_PRIVSEP_NO_FD);
    > +				break;
    > +			}
    > +			err = raw_delta_request(&imsg, &ibuf, delta_outfile,
    > +			    pack, packidx);
    > +			break;
    >  		case GOT_IMSG_COMMIT_REQUEST:
    >  			err = commit_request(&imsg, &ibuf, pack, packidx,
    >  			    &objcache);
    > @@ -1127,6 +1212,8 @@ main(int argc, char *argv[])
    >  		err = got_error_from_errno("fclose");
    >  	if (accumfile && fclose(accumfile) == EOF && err == NULL)
    >  		err = got_error_from_errno("fclose");
    > +	if (delta_outfile && fclose(delta_outfile) == EOF && err == NULL)
    > +		err = got_error_from_errno("fclose");
    >  	if (err) {
    >  		if (!sigint_received && err->code != GOT_ERR_PRIVSEP_PIPE) {
    >  			fprintf(stderr, "%s: %s\n", getprogname(), err->msg);
    > 
    > 
    
    
    
  • Thomas Adam:

    reuse deltas while packing

  • Stefan Sperling:

    reuse deltas while packing