Download raw body.
object enumeration in got-read-pack
Stefan Sperling <stsp@stsp.name> wrote:
> On Thu, May 19, 2022 at 06:14:07PM +0200, Stefan Sperling wrote:
> > This patch adds object enumeration support to got-read-pack.
> >
> > The initial step of packing becomes faster if a large pack file
> > is available which contains most of the commits and trees which
> > need to be loaded in order to enumerate the set of objects to be
> > packed.
> > We gain speed by cutting imsg traffic between the main process and
> > got-read-pack during this phase. The main process longer needs to
> > send requests for individual objects as long as got-read-pack is
> > able to enumerate them.
> >
> > This seems solid in my testing. More testing would be very welcome.
> >
> > ok?
ok with the ibuf_free dropped.
> New diff rebased on top of latest 'main', which now has the delfitication
> path fixes which were extracted from the previous version of this diff.
unfortunately I broke this again with the arc4random diff... apologize.
I'm attaching a rebased diff (I just had to propagate the `seed' into
some more functions via the new load_packed_obj_arg struct.) The diff
doesn't adress _any_ of my comments, it's just your diff rebased hoping
it saves you some time :)
some comments/questions inline.
> diff 93edda4de370be75a04c393842d95f45a00089ab e7ba67af0e5a394d8046b8b0cbba18d379f9246b
> blob - 709eec07d085ee013da4bb4ebb657e8f97fa16b4
> blob + 60f97103b1abf937d5f4d34671be29fb3599df48
> --- lib/got_lib_object.h
> +++ lib/got_lib_object.h
> @@ -133,3 +133,13 @@ const struct got_error *got_object_tree_entry_dup(stru
> const struct got_error *got_traverse_packed_commits(
> struct got_object_id_queue *, struct got_object_id *, const char *,
> struct got_repository *);
> +
> +typedef const struct got_error *(*got_object_enumerate_commit_cb)(void *,
> + time_t, struct got_object_id *, struct got_repository *);
> +typedef const struct got_error *(*got_object_enumerate_tree_cb)(void *,
> + struct got_tree_object *, time_t, struct got_object_id *, const char *,
> + struct got_repository *);
> +
> +const struct got_error *got_object_enumerate(got_object_enumerate_commit_cb,
> + got_object_enumerate_tree_cb, void *, struct got_object_id **, int,
> + struct got_packidx *, struct got_repository *);
> blob - e719a95bde6bbe971668bb22ea3a72e2990ad927
> blob + c6c29a099c6a9b6dc6883641ef3ef96ce7f47156
> --- lib/got_lib_privsep.h
> +++ lib/got_lib_privsep.h
> @@ -145,6 +145,11 @@ enum got_imsg_type {
> GOT_IMSG_COMMIT_TRAVERSAL_REQUEST,
> GOT_IMSG_TRAVERSED_COMMITS,
> GOT_IMSG_COMMIT_TRAVERSAL_DONE,
> + GOT_IMSG_OBJECT_ENUMERATION_REQUEST,
> + GOT_IMSG_ENUMERATED_COMMIT,
> + GOT_IMSG_ENUMERATED_TREE,
> + GOT_IMSG_TREE_ENUMERATION_DONE,
> + GOT_IMSG_OBJECT_ENUMERATION_DONE,
>
> /* Message sending file descriptor to a temporary file. */
> GOT_IMSG_TMPFD,
> @@ -556,6 +561,22 @@ struct got_imsg_traversed_commits {
> /* Followed by ncommit IDs of SHA1_DIGEST_LENGTH each */
> } __attribute__((__packed__));
>
> +/* Structure for GOT_IMSG_ENUMERATED_COMMIT */
> +struct got_imsg_enumerated_commit {
> + uint8_t id[SHA1_DIGEST_LENGTH];
> + time_t mtime;
> +} __attribute__((__packed__));
> +
> +/* Structure for GOT_IMSG_ENUMERATED_TREE */
> +struct got_imsg_enumerated_tree {
> + uint8_t id[SHA1_DIGEST_LENGTH]; /* tree ID */
> + int nentries; /* number of tree entries */
> +
> + /* Followed by tree's path in remaining data of imsg buffer. */
> +
> + /* Followed by nentries * GOT_IMSG_TREE_ENTRY messages. */
> +} __attribute__((__packed__));
> +
> /*
> * Structure for GOT_IMSG_GOTCONFIG_REMOTE and
> * GOT_IMSG_GOTCONFIG_REMOTE data.
> @@ -721,6 +742,18 @@ const struct got_error *got_privsep_send_commit_traver
> const struct got_error *got_privsep_recv_traversed_commits(
> struct got_commit_object **, struct got_object_id **,
> struct got_object_id_queue *, struct imsgbuf *);
> +const struct got_error *got_privsep_send_enumerated_tree(size_t *,
> + struct imsgbuf *, struct got_object_id *, const char *,
> + struct got_parsed_tree_entry *, int);
> +const struct got_error *got_privsep_send_object_enumeration_request(
> + struct imsgbuf *);
> +const struct got_error *got_privsep_send_object_enumeration_done(
> + struct imsgbuf *);
> +const struct got_error *got_privsep_send_enumerated_commit(struct imsgbuf *,
> + struct got_object_id *, time_t);
> +const struct got_error *got_privsep_recv_enumerated_objects(struct imsgbuf *,
> + got_object_enumerate_commit_cb, got_object_enumerate_tree_cb, void *,
> + struct got_repository *);
>
> const struct got_error *got_privsep_send_raw_delta_req(struct imsgbuf *, int,
> struct got_object_id *);
> blob - 2d612890612d7d8a8e30549c38659cd083a2e41e
> blob + 1b025984a415471435ce11fea6762fde7d0051ce
> --- lib/object.c
> +++ lib/object.c
> @@ -60,6 +60,10 @@
> #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
> #endif
>
> +#ifndef nitems
> +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
> +#endif
> +
> struct got_object_id *
> got_object_get_id(struct got_object *obj)
> {
> @@ -2390,3 +2394,70 @@ done:
> free(changed_commit_id);
> return err;
> }
> +
> +const struct got_error *
> +got_object_enumerate(got_object_enumerate_commit_cb cb_commit,
> + got_object_enumerate_tree_cb cb_tree, void *cb_arg,
> + struct got_object_id **commit_ids, int ncommits,
> + struct got_packidx *packidx, struct got_repository *repo)
> +{
> + const struct got_error *err = NULL;
> + struct got_object_id *ids[GOT_IMSG_OBJ_ID_LIST_MAX_NIDS];
> + struct got_pack *pack;
> + char *path_packfile = NULL;
> + int i, j = 0;
> +
> + err = got_packidx_get_packfile_path(&path_packfile,
> + packidx->path_packidx);
> + if (err)
> + return err;
> +
> + pack = got_repo_get_cached_pack(repo, path_packfile);
> + if (pack == NULL) {
> + err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
> + if (err)
> + goto done;
> + }
> +
> + if (pack->privsep_child == NULL) {
> + err = start_pack_privsep_child(pack, packidx);
> + if (err)
> + goto done;
> + }
> +
> + err = got_privsep_send_object_enumeration_request(
> + pack->privsep_child->ibuf);
> + if (err)
> + goto done;
> +
> + /*
> + * XXX This is stupid. Consider adding a function which
> + * does the chunking internally?
> + */
> + for (i = 0; i < ncommits; i++) {
> + j = i % nitems(ids);
> + ids[j] = commit_ids[i];
> + if (j >= nitems(ids) - 1) {
> + err = got_privsep_send_object_idlist(
> + pack->privsep_child->ibuf, ids, j + 1);
> + if (err)
> + goto done;
> + }
> + }
> + if (j > 0) {
> + err = got_privsep_send_object_idlist(
> + pack->privsep_child->ibuf, ids, j + 1);
> + if (err)
> + goto done;
> + }
> +
> + err = got_privsep_send_object_idlist_done(pack->privsep_child->ibuf);
> + if (err)
> + goto done;
> +
> + err = got_privsep_recv_enumerated_objects(pack->privsep_child->ibuf,
> + cb_commit, cb_tree, cb_arg, repo);
> +done:
> + free(path_packfile);
> + return err;
> +}
> blob - 203398831c0a6d3ffc31d9eb8e34611330f3cfb4
> blob + d7765ff55c98f991367c2f24425c9a32c2877fb0
> --- lib/pack_create.c
> +++ lib/pack_create.c
> @@ -900,21 +900,16 @@ add_object(int want_meta, struct got_object_idset *ids
> static const struct got_error *
> load_tree_entries(struct got_object_id_queue *ids, int want_meta,
> struct got_object_idset *idset, struct got_object_idset *idset_exclude,
> - struct got_object_id *tree_id,
> + struct got_tree_object *tree,
> const char *dpath, time_t mtime, struct got_repository *repo,
> int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
> got_pack_progress_cb progress_cb, void *progress_arg,
> struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
> {
> const struct got_error *err;
> - struct got_tree_object *tree;
> char *p = NULL;
> int i;
>
> - err = got_object_open_as_tree(&tree, repo, tree_id);
> - if (err)
> - return err;
> -
> (*ntrees)++;
> err = report_progress(progress_cb, progress_arg, rl,
> *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0);
> @@ -936,8 +931,16 @@ load_tree_entries(struct got_object_id_queue *ids, int
> got_object_idset_contains(idset, id) ||
> got_object_idset_contains(idset_exclude, id))
> continue;
> -
> - if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "",
> +
> + /*
> + * If got-read-pack is crawling trees for us then
> + * we are only here to collect blob IDs.
> + */
> + if (ids == NULL && S_ISDIR(mode))
> + continue;
> +
> + if (asprintf(&p, "%s%s%s", dpath,
> + got_path_is_root_dir(dpath) ? "" : "/",
> got_tree_entry_get_name(e)) == -1) {
> err = got_error_from_errno("asprintf");
> break;
> @@ -967,7 +970,6 @@ load_tree_entries(struct got_object_id_queue *ids, int
> }
> }
>
> - got_object_tree_close(tree);
> free(p);
> return err;
> }
> @@ -984,6 +986,7 @@ load_tree(int want_meta, struct got_object_idset *idse
> const struct got_error *err = NULL;
> struct got_object_id_queue tree_ids;
> struct got_object_qid *qid;
> + struct got_tree_object *tree = NULL;
>
> if (got_object_idset_contains(idset, tree_id) ||
> got_object_idset_contains(idset_exclude, tree_id))
> @@ -1031,20 +1034,31 @@ load_tree(int want_meta, struct got_object_idset *idse
> break;
> }
>
> + err = got_object_open_as_tree(&tree, repo, &qid->id);
> + if (err) {
> + free(qid->data);
> + got_object_qid_free(qid);
> + break;
> + }
> +
> err = load_tree_entries(&tree_ids, want_meta, idset,
> - idset_exclude, &qid->id,
> - path, mtime, repo, loose_obj_only, ncolored, nfound,
> - ntrees, progress_cb, progress_arg, rl,
> + idset_exclude, tree, path, mtime, repo, loose_obj_only,
> + ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
> cancel_cb, cancel_arg);
> free(qid->data);
> got_object_qid_free(qid);
> if (err)
> break;
> +
> + got_object_tree_close(tree);
> + tree = NULL;
> }
>
> STAILQ_FOREACH(qid, &tree_ids, entry)
> free(qid->data);
> got_object_id_queue_free(&tree_ids);
> + if (tree)
> + got_object_tree_close(tree);
> return err;
> }
>
> @@ -1448,7 +1462,195 @@ done:
> return err;
> }
>
> +struct load_packed_obj_arg {
> + /* output parameters: */
> + struct got_object_id *id;
> + char *dpath;
> + time_t mtime;
> +
> + /* input parameters: */
> + int want_meta;
> + struct got_object_idset *idset;
> + struct got_object_idset *idset_exclude;
> + int loose_obj_only;
> + int *ncolored;
> + int *nfound;
> + int *ntrees;
> + got_pack_progress_cb progress_cb;
> + void *progress_arg;
> + struct got_ratelimit *rl;
> + got_cancel_cb cancel_cb;
> + void *cancel_arg;
> +};
> +
> static const struct got_error *
> +load_packed_commit_id(void *arg, time_t mtime, struct got_object_id *id,
> + struct got_repository *repo)
> +{
> + struct load_packed_obj_arg *a = arg;
> +
> + if (got_object_idset_contains(a->idset, id) ||
> + got_object_idset_contains(a->idset_exclude, id))
> + return NULL;
> +
> + return add_object(a->want_meta,
> + a->want_meta ? a->idset : a->idset_exclude,
> + id, "", GOT_OBJ_TYPE_COMMIT, mtime, a->loose_obj_only, repo,
> + a->ncolored, a->nfound, a->ntrees,
> + a->progress_cb, a->progress_arg, a->rl);
> +}
> +
> +static const struct got_error *
> +load_packed_tree_ids(void *arg, struct got_tree_object *tree, time_t mtime,
> + struct got_object_id *id, const char *dpath, struct got_repository *repo)
> +{
> + const struct got_error *err;
> + struct load_packed_obj_arg *a = arg;
> + const char *relpath;
> +
> + /*
> + * When we receive a tree's ID and path but not the tree itself,
> + * this tree object was not found in the pack file. This is the
> + * last time we are being called for this optimized traversal.
> + * Return from here and switch to loading objects the slow way.
> + */
> + if (tree == NULL) {
I got_object_id_dup fails in this if body aren't we leaking those dup'ed
ids?
If i'm reading correctly we're coming here from load_packed_object_ids
which allocates load_packed_obj_arg on the stack and bails out on error.
> + free(a->id);
> + a->id = got_object_id_dup(id);
> + if (a->id == NULL)
> + return got_error_from_errno("got_object_id_dup");
> +
> + free(a->dpath);
> + a->dpath = strdup(dpath);
> + if (a->dpath == NULL)
> + return got_error_from_errno("strdup");
> +
> + a->mtime = mtime;
> + return NULL;
> + }
> +
> + if (got_object_idset_contains(a->idset, id) ||
> + got_object_idset_contains(a->idset_exclude, id))
> + return NULL;
> +
> + relpath = dpath;
> + while (relpath[0] == '/')
> + relpath++;
> +
> + err = add_object(a->want_meta,
> + a->want_meta ? a->idset : a->idset_exclude,
> + id, relpath, GOT_OBJ_TYPE_TREE, mtime, a->loose_obj_only,
> + repo, a->ncolored, a->nfound, a->ntrees,
> + a->progress_cb, a->progress_arg, a->rl);
> + if (err)
> + return err;
> +
> + return load_tree_entries(NULL, a->want_meta, a->idset,
> + a->idset_exclude, tree, dpath, mtime, repo,
> + a->loose_obj_only, a->ncolored, a->nfound, a->ntrees,
> + a->progress_cb, a->progress_arg, a->rl,
> + a->cancel_cb, a->cancel_arg);
> +}
> +
> +static const struct got_error *
> +load_packed_object_ids(struct got_object_id **commits, int ncommits,
> + int want_meta, struct got_object_idset *idset,
> + struct got_object_idset *idset_exclude, int loose_obj_only,
> + struct got_repository *repo, struct got_packidx *packidx,
> + int *ncolored, int *nfound, int *ntrees,
> + got_pack_progress_cb progress_cb, void *progress_arg,
> + struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
> +{
> + const struct got_error *err = NULL;
> + struct load_packed_obj_arg lpa;
> +
> + memset(&lpa, 0, sizeof(lpa));
> + lpa.want_meta = want_meta;
> + lpa.idset = idset;
> + lpa.idset_exclude = idset_exclude;
> + lpa.loose_obj_only = loose_obj_only;
> + lpa.ncolored = ncolored;
> + lpa.nfound = nfound;
> + lpa.ntrees = ntrees;
> + lpa.progress_cb = progress_cb;
> + lpa.progress_arg = progress_arg;
> + lpa.rl = rl;
> + lpa.cancel_cb = cancel_cb;
> + lpa.cancel_arg = cancel_arg;
> +
> + /* Attempt to load objects via got-read-pack, as far as possible. */
> + err = got_object_enumerate(load_packed_commit_id,
> + load_packed_tree_ids, &lpa, commits, ncommits, packidx, repo);
> + if (err)
> + return err;
> +
> + if (lpa.id == NULL)
> + return NULL;
> +
> + /*
> + * An incomplete tree hierarchy was present in the pack file
> + * and caused loading to be aborted midway through a commit.
> + * Continue loading trees the slow way.
> + */
> + err = load_tree(want_meta, idset, idset_exclude,
> + lpa.id, lpa.dpath, lpa.mtime, repo, loose_obj_only,
> + ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
> + cancel_cb, cancel_arg);
> + free(lpa.id);
> + free(lpa.dpath);
> + return err;
> +}
> +
> +static const struct got_error *
> +find_pack_for_enumeration(struct got_packidx **best_packidx,
> + struct got_object_id **ids, int nids, struct got_repository *repo)
> +{
> + const struct got_error *err = NULL;
> + struct got_pathlist_entry *pe;
> + const char *best_packidx_path = NULL;
> + int nobj_max = 0;
> + int ncommits_max = 0;
> +
> + *best_packidx = NULL;
> +
> + /*
> + * Find the largest pack which contains at least some of the
> + * commits and tags we are interested in.
> + */
> + TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
> + const char *path_packidx = pe->path;
> + struct got_packidx *packidx;
> + int nobj, i, idx, ncommits = 0;
> +
> + err = got_repo_get_packidx(&packidx, path_packidx, repo);
> + if (err)
> + break;
> +
> + nobj = be32toh(packidx->hdr.fanout_table[0xff]);
> + if (nobj <= nobj_max)
> + continue;
> +
> + for (i = 0; i < nids; i++) {
> + idx = got_packidx_get_object_idx(packidx, ids[i]);
> + if (idx != -1)
> + ncommits++;
> + }
> + if (ncommits > ncommits_max) {
> + best_packidx_path = path_packidx;
> + nobj_max = nobj;
> + ncommits_max = ncommits;
> + }
> + }
> +
nit: maybe drop braces here?
> + if (best_packidx_path) {
> + err = got_repo_get_packidx(best_packidx, best_packidx_path,
> + repo);
> + }
> +
> + return err;
> +}
> +
> +static const struct got_error *
> load_object_ids(int *ncolored, int *nfound, int *ntrees,
> struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs,
> struct got_object_id **ours, int nours, struct got_repository *repo,
> @@ -1457,6 +1659,7 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
> {
> const struct got_error *err = NULL;
> struct got_object_id **ids = NULL;
> + struct got_packidx *packidx = NULL;
> int i, nobj = 0, obj_type;
> struct got_object_idset *idset_exclude;
>
> @@ -1473,6 +1676,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
> if (err)
> goto done;
>
> + err = find_pack_for_enumeration(&packidx, theirs, ntheirs, repo);
> + if (err)
> + goto done;
> + if (packidx) {
> + err = load_packed_object_ids(theirs, ntheirs, 0,
> + idset, idset_exclude, loose_obj_only, repo, packidx,
> + ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
> + cancel_cb, cancel_arg);
> + if (err)
> + goto done;
> + }
> +
> for (i = 0; i < ntheirs; i++) {
> struct got_object_id *id = theirs[i];
> if (id == NULL)
> @@ -1497,6 +1712,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
> }
> }
>
> + err = find_pack_for_enumeration(&packidx, ids, nobj, repo);
> + if (err)
> + goto done;
> + if (packidx) {
> + err = load_packed_object_ids(ids, nobj, 1,
> + idset, idset_exclude, loose_obj_only, repo, packidx,
> + ncolored, nfound, ntrees,
> + progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
> + if (err)
> + goto done;
> + }
> +
> for (i = 0; i < nobj; i++) {
> err = load_commit(1, idset, idset_exclude,
> ids[i], repo, loose_obj_only, ncolored, nfound, ntrees,
> blob - 782f94ad26528e54d59fa5855bd1a0a4f674588b
> blob + d5f10725785b576a723c85e2b6322e71e49f192a
> --- lib/privsep.c
> +++ lib/privsep.c
> @@ -1443,8 +1443,8 @@ got_privsep_recv_commit(struct got_commit_object **com
> }
>
> static const struct got_error *
> -send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries,
> - int idx0, int idxN, size_t len)
> +send_tree_entries_batch(struct imsgbuf *ibuf,
> + struct got_parsed_tree_entry *entries, int idx0, int idxN, size_t len)
> {
> struct ibuf *wbuf;
> struct got_imsg_tree_entries ientries;
> @@ -1479,21 +1479,14 @@ send_tree_entries(struct imsgbuf *ibuf, struct got_par
> return NULL;
> }
>
> -const struct got_error *
> -got_privsep_send_tree(struct imsgbuf *ibuf,
> - struct got_parsed_tree_entry *entries, int nentries)
> +static const struct got_error *
> +send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries,
> + int nentries)
> {
> const struct got_error *err = NULL;
> - struct got_imsg_tree_object itree;
> - size_t entries_len;
> int i, j;
> + size_t entries_len = sizeof(struct got_imsg_tree_entries);
>
> - itree.nentries = nentries;
> - if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree))
> - == -1)
> - return got_error_from_errno("imsg_compose TREE");
> -
> - entries_len = sizeof(struct got_imsg_tree_entries);
> i = 0;
> for (j = 0; j < nentries; j++) {
> struct got_parsed_tree_entry *pte = &entries[j];
> @@ -1501,7 +1494,7 @@ got_privsep_send_tree(struct imsgbuf *ibuf,
>
> if (j > 0 &&
> entries_len + len > MAX_IMSGSIZE - IMSG_HEADER_SIZE) {
> - err = send_tree_entries(ibuf, entries,
> + err = send_tree_entries_batch(ibuf, entries,
> i, j - 1, entries_len);
> if (err)
> return err;
> @@ -1513,14 +1506,98 @@ got_privsep_send_tree(struct imsgbuf *ibuf,
> }
>
> if (j > 0) {
> - err = send_tree_entries(ibuf, entries, i, j - 1, entries_len);
> + err = send_tree_entries_batch(ibuf, entries, i, j - 1,
> + entries_len);
> if (err)
> return err;
> }
>
> + return NULL;
> +}
> +
> +const struct got_error *
> +got_privsep_send_tree(struct imsgbuf *ibuf,
> + struct got_parsed_tree_entry *entries, int nentries)
> +{
> + const struct got_error *err = NULL;
> + struct got_imsg_tree_object itree;
> +
> + itree.nentries = nentries;
> + if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree))
> + == -1)
> + return got_error_from_errno("imsg_compose TREE");
> +
> + err = send_tree_entries(ibuf, entries, nentries);
> + if (err)
> + return err;
> +
> return flush_imsg(ibuf);
> }
>
> +
> +static const struct got_error *
> +recv_tree_entries(void *data, size_t datalen, struct got_tree_object *tree,
> + int *nentries)
> +{
> + const struct got_error *err = NULL;
> + struct got_imsg_tree_entries *ientries;
> + struct got_tree_entry *te;
> + size_t te_offset;
> + size_t i;
> +
> + if (datalen <= sizeof(*ientries) ||
> + datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE)
> + return got_error(GOT_ERR_PRIVSEP_LEN);
> +
> + ientries = (struct got_imsg_tree_entries *)data;
nit: and maybe here too?
> + if (ientries->nentries > INT_MAX) {
> + return got_error_msg(GOT_ERR_NO_SPACE,
> + "too many tree entries");
> + }
> +
> + te_offset = sizeof(*ientries);
> + for (i = 0; i < ientries->nentries; i++) {
> + struct got_imsg_tree_entry ite;
> + const char *te_name;
> + uint8_t *buf = (uint8_t *)data + te_offset;
> +
> + if (te_offset >= datalen) {
> + err = got_error(GOT_ERR_PRIVSEP_LEN);
> + break;
> + }
> +
> + /* Might not be aligned, size is ~32 bytes. */
> + memcpy(&ite, buf, sizeof(ite));
> +
> + if (ite.namelen >= sizeof(te->name)) {
> + err = got_error(GOT_ERR_PRIVSEP_LEN);
> + break;
> + }
nit: I think you can leave this in a single line (it would be 70
characters long) now that it's not nested anymore. FWIW i sometimes
find easier to mentally parse blocks like these if they're on a single
line.
> + if (te_offset + sizeof(ite) + ite.namelen >
> + datalen) {
> + err = got_error(GOT_ERR_PRIVSEP_LEN);
> + break;
> + }
> +
> + if (*nentries >= tree->nentries) {
> + err = got_error(GOT_ERR_PRIVSEP_LEN);
> + break;
> + }
> + te = &tree->entries[*nentries];
> + te_name = buf + sizeof(ite);
> + memcpy(te->name, te_name, ite.namelen);
> + te->name[ite.namelen] = '\0';
> + memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH);
> + te->mode = ite.mode;
> + te->idx = *nentries;
> + (*nentries)++;
> +
> + te_offset += sizeof(ite) + ite.namelen;
> + }
> +
> + return err;
> +}
> +
> const struct got_error *
> got_privsep_recv_tree(struct got_tree_object **tree, struct imsgbuf *ibuf)
> {
> @@ -1529,7 +1606,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
> MIN(sizeof(struct got_imsg_error),
> sizeof(struct got_imsg_tree_object));
> struct got_imsg_tree_object *itree;
> - size_t i;
> int nentries = 0;
>
> *tree = NULL;
> @@ -1542,9 +1618,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
> struct imsg imsg;
> size_t n;
> size_t datalen;
> - struct got_imsg_tree_entries *ientries;
> - struct got_tree_entry *te = NULL;
> - size_t te_offset;
>
> n = imsg_get(ibuf, &imsg);
> if (n == 0) {
> @@ -1611,56 +1684,8 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
> err = got_error(GOT_ERR_PRIVSEP_MSG);
> break;
> }
> - if (datalen <= sizeof(*ientries) ||
> - datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) {
> - err = got_error(GOT_ERR_PRIVSEP_LEN);
> - break;
> - }
> -
> - ientries = imsg.data;
> - if (ientries->nentries > INT_MAX) {
> - err = got_error_msg(GOT_ERR_NO_SPACE,
> - "too many tree entries");
> - break;
> - }
> - te_offset = sizeof(*ientries);
> - for (i = 0; i < ientries->nentries; i++) {
> - struct got_imsg_tree_entry ite;
> - const char *te_name;
> - uint8_t *buf = imsg.data + te_offset;
> -
> - if (te_offset >= datalen) {
> - err = got_error(GOT_ERR_PRIVSEP_LEN);
> - break;
> - }
> -
> - /* Might not be aligned, size is ~32 bytes. */
> - memcpy(&ite, buf, sizeof(ite));
> -
> - if (ite.namelen >= sizeof(te->name)) {
> - err = got_error(GOT_ERR_PRIVSEP_LEN);
> - break;
> - }
> - if (te_offset + sizeof(ite) + ite.namelen >
> - datalen) {
> - err = got_error(GOT_ERR_PRIVSEP_LEN);
> - break;
> - }
> - if (nentries >= (*tree)->nentries) {
> - err = got_error(GOT_ERR_PRIVSEP_LEN);
> - break;
> - }
> - te = &(*tree)->entries[nentries];
> - te_name = buf + sizeof(ite);
> - memcpy(te->name, te_name, ite.namelen);
> - te->name[ite.namelen] = '\0';
> - memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH);
> - te->mode = ite.mode;
> - te->idx = nentries;
> - nentries++;
> -
> - te_offset += sizeof(ite) + ite.namelen;
> - }
> + err = recv_tree_entries(imsg.data, datalen,
> + *tree, &nentries);
> break;
> default:
> err = got_error(GOT_ERR_PRIVSEP_MSG);
> @@ -2731,6 +2756,278 @@ got_privsep_recv_traversed_commits(struct got_commit_o
> }
>
> const struct got_error *
> +got_privsep_send_enumerated_tree(size_t *totlen, struct imsgbuf *ibuf,
> + struct got_object_id *tree_id, const char *path,
> + struct got_parsed_tree_entry *entries, int nentries)
> +{
> + const struct got_error *err = NULL;
> + struct ibuf *wbuf;
> + size_t path_len = strlen(path);
> + size_t msglen;
> +
> + msglen = sizeof(struct got_imsg_enumerated_tree) + path_len;
> + wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_TREE, 0, 0, msglen);
> + if (wbuf == NULL)
> + return got_error_from_errno("imsg_create ENUMERATED_TREE");
> +
> + if (imsg_add(wbuf, tree_id->sha1, SHA1_DIGEST_LENGTH) == -1) {
> + err = got_error_from_errno("imsg_add ENUMERATED_TREE");
we shouldn't call ibuf_free in the imsg_add error case, right?
(the other imsg_add calls are fine, these one here are probably a
leftover)
> + ibuf_free(wbuf);
> + return err;
> + }
> + if (imsg_add(wbuf, &nentries, sizeof(nentries)) == -1) {
> + err = got_error_from_errno("imsg_add ENUMERATED_TREE");
> + ibuf_free(wbuf);
> + return err;
> + }
> + if (imsg_add(wbuf, path, path_len) == -1) {
> + err = got_error_from_errno("imsg_add ENUMERATED_TREE");
> + ibuf_free(wbuf);
> + return err;
> + }
> +
> + wbuf->fd = -1;
> + imsg_close(ibuf, wbuf);
> +
> + if (entries) {
> + err = send_tree_entries(ibuf, entries, nentries);
> + if (err)
> + return err;
> + }
> +
> + return flush_imsg(ibuf);
> +}
> +
> +const struct got_error *
> +got_privsep_send_object_enumeration_request(struct imsgbuf *ibuf)
> +{
> + if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_REQUEST,
> + 0, 0, -1, NULL, 0) == -1)
> + return got_error_from_errno("imsg_compose "
> + "OBJECT_ENUMERATION_REQUEST");
> +
> + return flush_imsg(ibuf);
> +}
> +
> +const struct got_error *
> +got_privsep_send_object_enumeration_done(struct imsgbuf *ibuf)
> +{
> + if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_DONE,
> + 0, 0, -1, NULL, 0) == -1)
> + return got_error_from_errno("imsg_compose "
> + "OBJECT_ENUMERATION_DONE");
> +
> + return flush_imsg(ibuf);
> +}
> +
> +const struct got_error *
> +got_privsep_send_enumerated_commit(struct imsgbuf *ibuf,
> + struct got_object_id *id, time_t mtime)
> +{
> + struct ibuf *wbuf;
> +
> + wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_COMMIT, 0, 0,
> + sizeof(struct got_imsg_enumerated_commit) + SHA1_DIGEST_LENGTH);
> + if (wbuf == NULL)
> + return got_error_from_errno("imsg_create ENUMERATED_COMMIT");
> +
> + /* Keep in sync with struct got_imsg_enumerated_commit! */
> + if (imsg_add(wbuf, id, SHA1_DIGEST_LENGTH) == -1)
> + return got_error_from_errno("imsg_add ENUMERATED_COMMIT");
> + if (imsg_add(wbuf, &mtime, sizeof(mtime)) == -1)
> + return got_error_from_errno("imsg_add ENUMERATED_COMMIT");
> +
> + wbuf->fd = -1;
> + imsg_close(ibuf, wbuf);
> + /* Don't flush yet, tree entries or ENUMERATION_DONE will follow. */
> + return NULL;
> +}
> +
> +const struct got_error *
> +got_privsep_recv_enumerated_objects(struct imsgbuf *ibuf,
> + got_object_enumerate_commit_cb cb_commit,
> + got_object_enumerate_tree_cb cb_tree, void *cb_arg,
> + struct got_repository *repo)
> +{
> + const struct got_error *err = NULL;
> + struct imsg imsg;
> + struct got_imsg_enumerated_commit *icommit = NULL;
> + struct got_object_id commit_id;
> + int have_commit = 0;
> + time_t mtime = 0;
> + struct got_tree_object tree;
> + struct got_imsg_enumerated_tree *itree;
> + struct got_object_id tree_id;
> + char *path = NULL, *canon_path = NULL;
> + size_t datalen, path_len;
> + int nentries = -1;
> + int done = 0;
> +
> + memset(&tree, 0, sizeof(tree));
> +
> + while (!done) {
> + err = got_privsep_recv_imsg(&imsg, ibuf, 0);
> + if (err)
> + break;
> +
> + datalen = imsg.hdr.len - IMSG_HEADER_SIZE;
> + switch (imsg.hdr.type) {
> + case GOT_IMSG_ENUMERATED_COMMIT:
> + if (have_commit && nentries != -1) {
> + err = got_error(GOT_ERR_PRIVSEP_MSG);
> + break;
> + }
> + if (datalen != sizeof(*icommit)) {
> + err = got_error(GOT_ERR_PRIVSEP_LEN);
> + break;
> + }
> + icommit = (struct got_imsg_enumerated_commit *)imsg.data;
> + memcpy(commit_id.sha1, icommit->id, SHA1_DIGEST_LENGTH);
> + mtime = icommit->mtime;
> + err = cb_commit(cb_arg, mtime, &commit_id, repo);
> + if (err)
> + break;
> + have_commit = 1;
> + break;
> + case GOT_IMSG_ENUMERATED_TREE:
> + /* Should be preceeded by GOT_IMSG_ENUMERATED_COMMIT. */
> + if (!have_commit) {
> + err = got_error(GOT_ERR_PRIVSEP_MSG);
> + break;
> + }
> + if (datalen < sizeof(*itree)) {
> + err = got_error(GOT_ERR_PRIVSEP_LEN);
> + break;
> + }
> + itree = imsg.data;
> + path_len = datalen - sizeof(*itree);
> + if (path_len == 0) {
> + err = got_error(GOT_ERR_PRIVSEP_LEN);
> + break;
> + }
> + memcpy(tree_id.sha1, itree->id, sizeof(tree_id.sha1));
> + free(path);
> + path = malloc(path_len + 1);
> + if (path == NULL) {
> + err = got_error_from_errno("malloc");
> + break;
> + }
> + free(canon_path);
> + canon_path = malloc(path_len + 1);
> + if (canon_path == NULL) {
> + err = got_error_from_errno("malloc");
> + break;
> + }
> + memcpy(path, (uint8_t *)imsg.data + sizeof(*itree),
> + path_len);
> + path[path_len] = '\0';
> + if (!got_path_is_absolute(path)) {
> + err = got_error(GOT_ERR_BAD_PATH);
> + break;
> + }
> + if (got_path_is_root_dir(path)) {
> + /* XXX check what got_canonpath() does wrong */
> + canon_path[0] = '/';
> + canon_path[1] = '\0';
> + } else {
> + err = got_canonpath(path, canon_path,
> + path_len + 1);
> + if (err)
> + break;
> + }
> + if (strcmp(path, canon_path) != 0) {
> + err = got_error(GOT_ERR_BAD_PATH);
> + break;
> + }
> + if (nentries != -1) {
> + err = got_error(GOT_ERR_PRIVSEP_MSG);
> + break;
> + }
> + if (itree->nentries < -1) {
> + err = got_error(GOT_ERR_PRIVSEP_MSG);
> + break;
> + }
> + if (itree->nentries == -1) {
> + /* Tree was not found in pack file. */
> + err = cb_tree(cb_arg, NULL, mtime, &tree_id,
> + path, repo);
> + break;
> + }
> + if (itree->nentries > INT_MAX) {
> + err = got_error(GOT_ERR_PRIVSEP_LEN);
> + break;
> + }
> + tree.entries = calloc(itree->nentries,
> + sizeof(struct got_tree_entry));
> + if (tree.entries == NULL) {
> + err = got_error_from_errno("calloc");
> + break;
> + }
> + if (itree->nentries == 0) {
> + err = cb_tree(cb_arg, &tree, mtime, &tree_id,
> + path, repo);
> + if (err)
> + break;
> +
> + /* Prepare for next tree. */
> + free(tree.entries);
> + memset(&tree, 0, sizeof(tree));
> + nentries = -1;
> + } else {
> + tree.nentries = itree->nentries;
> + nentries = 0;
> + }
> + break;
> + case GOT_IMSG_TREE_ENTRIES:
> + /* Should be preceeded by GOT_IMSG_ENUMERATED_TREE. */
> + if (nentries <= -1) {
> + err = got_error(GOT_ERR_PRIVSEP_MSG);
> + break;
> + }
> + err = recv_tree_entries(imsg.data, datalen,
> + &tree, &nentries);
> + if (err)
> + break;
> + if (tree.nentries == nentries) {
> + err = cb_tree(cb_arg, &tree, mtime, &tree_id,
> + path, repo);
> + if (err)
> + break;
> +
> + /* Prepare for next tree. */
> + free(tree.entries);
> + memset(&tree, 0, sizeof(tree));
> + nentries = -1;
> + }
> + break;
> + case GOT_IMSG_TREE_ENUMERATION_DONE:
> + /* All trees have been found and traversed. */
> + if (path == NULL || nentries != -1) {
> + err = got_error(GOT_ERR_PRIVSEP_MSG);
> + break;
> + }
> + have_commit = 0;
> + break;
> + case GOT_IMSG_OBJECT_ENUMERATION_DONE:
> + done = 1;
> + break;
> + default:
> + err = got_error(GOT_ERR_PRIVSEP_MSG);
> + break;
> + }
> +
> + imsg_free(&imsg);
> + if (err)
> + break;
> + }
> +
> + free(path);
> + free(canon_path);
> + free(tree.entries);
> + return err;
> +}
> +
> +const struct got_error *
> got_privsep_send_raw_delta_req(struct imsgbuf *ibuf, int idx,
> struct got_object_id *id)
> {
> blob - ea9a7e564cb840af0d6a61c8c8e0cf5ed3d93147
> blob + 41edfaa4585e69c5e5bdf199212200f20b694c25
> --- libexec/got-read-pack/got-read-pack.c
> +++ libexec/got-read-pack/got-read-pack.c
> @@ -14,6 +14,7 @@
> * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> */
>
> +#include <sys/stat.h>
> #include <sys/types.h>
> #include <sys/queue.h>
> #include <sys/uio.h>
> @@ -587,7 +588,6 @@ send_commit_traversal_done(struct imsgbuf *ibuf)
> return got_privsep_flush_imsg(ibuf);
> }
>
> -
> static const struct got_error *
> commit_traversal_request(struct imsg *imsg, struct imsgbuf *ibuf,
> struct got_pack *pack, struct got_packidx *packidx,
> @@ -1022,6 +1022,31 @@ recv_object_ids(struct got_object_idset *idset, struct
> }
>
> static const struct got_error *
> +recv_object_id_queue(struct got_object_id_queue *queue, struct imsgbuf *ibuf)
> +{
> + const struct got_error *err = NULL;
> + int done = 0;
> + struct got_object_qid *qid;
> + struct got_object_id *ids;
> + size_t nids, i;
> +
> + for (;;) {
> + err = got_privsep_recv_object_idlist(&done, &ids, &nids, ibuf);
> + if (err || done)
> + break;
> + for (i = 0; i < nids; i++) {
> + err = got_object_qid_alloc_partial(&qid);
> + if (err)
> + return err;
> + memcpy(&qid->id, &ids[i], sizeof(qid->id));
> + STAILQ_INSERT_TAIL(queue, qid, entry);
> + }
> + }
> +
> + return err;
> +}
> +
> +static const struct got_error *
> delta_reuse_request(struct imsg *imsg, struct imsgbuf *ibuf,
> FILE *delta_outfile, struct got_pack *pack, struct got_packidx *packidx)
> {
> @@ -1132,6 +1157,281 @@ done:
> }
>
> static const struct got_error *
> +send_tree_enumeration_done(struct imsgbuf *ibuf)
> +{
> + if (imsg_compose(ibuf, GOT_IMSG_TREE_ENUMERATION_DONE, 0, 0, -1,
> + NULL, 0) == -1)
> + return got_error_from_errno("imsg_compose TREE_ENUMERATION_DONE");
> +
> + return got_privsep_flush_imsg(ibuf);
> +}
> +
> +static const struct got_error *
> +enumerate_tree(struct imsgbuf *ibuf, size_t *totlen,
> + struct got_object_id *tree_id,
> + const char *path, struct got_pack *pack, struct got_packidx *packidx,
> + struct got_object_cache *objcache, struct got_object_idset *idset)
> +{
> + const struct got_error *err = NULL;
> + struct got_object_id_queue ids;
> + struct got_object_qid *qid;
> + uint8_t *buf = NULL;
> + struct got_parsed_tree_entry *entries = NULL;
> +
> + STAILQ_INIT(&ids);
> +
> + err = got_object_qid_alloc_partial(&qid);
> + if (err)
> + return err;
> + memcpy(&qid->id.sha1, tree_id, SHA1_DIGEST_LENGTH);
> + qid->data = strdup(path);
> + if (qid->data == NULL) {
> + err = got_error_from_errno("strdup");
> + goto done;
> + }
> + STAILQ_INSERT_TAIL(&ids, qid, entry);
> + qid = NULL;
> +
> + do {
> + const char *path;
> + int idx, nentries, i;
> +
> + if (sigint_received) {
> + err = got_error(GOT_ERR_CANCELLED);
> + goto done;
> + }
> +
> + qid = STAILQ_FIRST(&ids);
> + STAILQ_REMOVE_HEAD(&ids, entry);
> + path = qid->data;
> +
> + idx = got_packidx_get_object_idx(packidx, &qid->id);
> + if (idx == -1) {
> + err = got_privsep_send_enumerated_tree(totlen, ibuf,
> + &qid->id, path, NULL, -1);
> + break;
> + }
> +
> + err = open_tree(&buf, &entries, &nentries,
> + pack, packidx, idx, &qid->id, objcache);
> + if (err) {
> + if (err->code != GOT_ERR_NO_OBJ)
> + goto done;
> + }
> +
> + err = got_privsep_send_enumerated_tree(totlen,
> + ibuf, &qid->id, path, entries, nentries);
> + if (err)
> + goto done;
> +
> + err = got_object_idset_add(idset, &qid->id, NULL);
> + if (err)
> + goto done;
> +
> + for (i = 0; i < nentries; i++) {
> + struct got_object_qid *eqid = NULL;
> + struct got_parsed_tree_entry *pte = &entries[i];
> + char *p;
> +
> + if (!S_ISDIR(pte->mode))
> + continue;
> +
> + err = got_object_qid_alloc_partial(&eqid);
> + if (err)
> + goto done;
> + memcpy(eqid->id.sha1, pte->id, sizeof(eqid->id.sha1));
> +
> + if (got_object_idset_contains(idset, &eqid->id)) {
> + got_object_qid_free(eqid);
> + continue;
> + }
> +
> + if (asprintf(&p, "%s%s%s", path,
> + got_path_is_root_dir(path) ? "" : "/",
> + pte->name) == -1) {
> + err = got_error_from_errno("asprintf");
> + got_object_qid_free(eqid);
> + goto done;
> + }
> + eqid->data = p;
> + STAILQ_INSERT_TAIL(&ids, eqid, entry);
> + idx = got_packidx_get_object_idx(packidx, &eqid->id);
> + if (idx == -1)
> + break;
> + }
> +
> + free(qid->data);
> + got_object_qid_free(qid);
> + qid = NULL;
> +
> + free(entries);
> + entries = NULL;
> + free(buf);
> + buf = NULL;
> + } while (!STAILQ_EMPTY(&ids));
> +
> + err = send_tree_enumeration_done(ibuf);
> +done:
> + free(buf);
> + if (qid)
> + free(qid->data);
> + got_object_qid_free(qid);
> + got_object_id_queue_free(&ids);
> + free(entries);
> + if (err) {
> + if (err->code == GOT_ERR_PRIVSEP_PIPE)
> + err = NULL;
> + else
> + got_privsep_send_error(ibuf, err);
> + }
> +
> + return err;
> +}
> +
> +static const struct got_error *
> +enumeration_request(struct imsg *imsg, struct imsgbuf *ibuf,
> + struct got_pack *pack, struct got_packidx *packidx,
> + struct got_object_cache *objcache)
> +{
> + const struct got_error *err = NULL;
> + struct got_object_id_queue commit_ids;
> + const struct got_object_id_queue *parents = NULL;
> + struct got_object_qid *qid = NULL;
> + struct got_object *obj = NULL;
> + struct got_commit_object *commit = NULL;
> + struct got_object_id *tree_id = NULL;
> + size_t totlen = 0;
> + struct got_object_idset *idset;
> + int idx;
> +
> + STAILQ_INIT(&commit_ids);
> +
> + idset = got_object_idset_alloc();
> + if (idset == NULL)
> + return got_error_from_errno("got_object_idset_alloc");
> +
> + err = recv_object_id_queue(&commit_ids, ibuf);
> + if (err)
> + goto done;
> +
> + while (!STAILQ_EMPTY(&commit_ids)) {
> + if (sigint_received) {
> + err = got_error(GOT_ERR_CANCELLED);
> + goto done;
> + }
> +
> + qid = STAILQ_FIRST(&commit_ids);
> + STAILQ_REMOVE_HEAD(&commit_ids, entry);
> +
> + if (got_object_idset_contains(idset, &qid->id)) {
> + got_object_qid_free(qid);
> + qid = NULL;
> + continue;
> + }
> +
> + idx = got_packidx_get_object_idx(packidx, &qid->id);
> + if (idx == -1)
> + break;
> +
> + err = open_object(&obj, pack, packidx, idx, &qid->id,
> + objcache);
> + if (err)
> + goto done;
> + if (obj->type == GOT_OBJ_TYPE_TAG) {
> + struct got_tag_object *tag;
> + uint8_t *buf;
> + size_t len;
> + err = got_packfile_extract_object_to_mem(&buf,
> + &len, obj, pack);
> + if (err)
> + goto done;
> + obj->size = len;
> + err = got_object_parse_tag(&tag, buf, len);
> + if (err) {
> + free(buf);
> + goto done;
> + }
> + err = open_commit(&commit, pack, packidx, idx,
> + &tag->id, objcache);
> + got_object_tag_close(tag);
> + free(buf);
> + if (err)
> + goto done;
> + } else if (obj->type == GOT_OBJ_TYPE_COMMIT) {
> + err = open_commit(&commit, pack, packidx, idx,
> + &qid->id, objcache);
> + if (err)
> + goto done;
> + } else {
> + err = got_error(GOT_ERR_OBJ_TYPE);
> + goto done;
> + }
> + got_object_close(obj);
> + obj = NULL;
> +
> + err = got_privsep_send_enumerated_commit(ibuf, &qid->id,
> + got_object_commit_get_committer_time(commit));
> + if (err)
> + goto done;
> +
> + tree_id = got_object_commit_get_tree_id(commit);
> + idx = got_packidx_get_object_idx(packidx, tree_id);
> + if (idx == -1) {
> + err = got_privsep_send_enumerated_tree(&totlen, ibuf,
> + tree_id, "", NULL, -1);
> + if (err)
> + goto done;
> + break;
> + }
> +
> + if (got_object_idset_contains(idset, tree_id)) {
> + got_object_qid_free(qid);
> + qid = NULL;
> + continue;
> + }
> +
> + err = enumerate_tree(ibuf, &totlen, tree_id, "/",
> + pack, packidx, objcache, idset);
> + if (err)
> + goto done;
> +
> + got_object_qid_free(qid);
> + qid = NULL;
> +
> + parents = got_object_commit_get_parent_ids(commit);
> + if (parents) {
> + struct got_object_qid *pid;
> + STAILQ_FOREACH(pid, parents, entry) {
> + if (got_object_idset_contains(idset, &pid->id))
> + continue;
> + err = got_object_qid_alloc_partial(&qid);
> + if (err)
> + goto done;
> + memcpy(&qid->id, &pid->id, sizeof(qid->id));
> + STAILQ_INSERT_TAIL(&commit_ids, qid, entry);
> + qid = NULL;
> + }
> + }
> +
> + got_object_commit_close(commit);
> + commit = NULL;
> + }
> +
> + err = got_privsep_send_object_enumeration_done(ibuf);
> + if (err)
> + goto done;
> +done:
> + if (obj)
> + got_object_close(obj);
> + if (commit)
> + got_object_commit_close(commit);
> + got_object_qid_free(qid);
> + got_object_id_queue_free(&commit_ids);
> + got_object_idset_free(idset);
> + return err;
> +}
> +
> +static const struct got_error *
> receive_pack(struct got_pack **packp, struct imsgbuf *ibuf)
> {
> const struct got_error *err = NULL;
> @@ -1344,6 +1644,10 @@ main(int argc, char *argv[])
> err = commit_traversal_request(&imsg, &ibuf, pack,
> packidx, &objcache);
> break;
> + case GOT_IMSG_OBJECT_ENUMERATION_REQUEST:
> + err = enumeration_request(&imsg, &ibuf, pack,
> + packidx, &objcache);
> + break;
> default:
> err = got_error(GOT_ERR_PRIVSEP_MSG);
> break;
diff d6a28ffe187127e3247254d7e242bb52d66eb26b /home/op/w/got
blob - 709eec07d085ee013da4bb4ebb657e8f97fa16b4
file + lib/got_lib_object.h
--- lib/got_lib_object.h
+++ lib/got_lib_object.h
@@ -133,3 +133,13 @@ const struct got_error *got_object_tree_entry_dup(stru
const struct got_error *got_traverse_packed_commits(
struct got_object_id_queue *, struct got_object_id *, const char *,
struct got_repository *);
+
+typedef const struct got_error *(*got_object_enumerate_commit_cb)(void *,
+ time_t, struct got_object_id *, struct got_repository *);
+typedef const struct got_error *(*got_object_enumerate_tree_cb)(void *,
+ struct got_tree_object *, time_t, struct got_object_id *, const char *,
+ struct got_repository *);
+
+const struct got_error *got_object_enumerate(got_object_enumerate_commit_cb,
+ got_object_enumerate_tree_cb, void *, struct got_object_id **, int,
+ struct got_packidx *, struct got_repository *);
blob - e719a95bde6bbe971668bb22ea3a72e2990ad927
file + lib/got_lib_privsep.h
--- lib/got_lib_privsep.h
+++ lib/got_lib_privsep.h
@@ -145,6 +145,11 @@ enum got_imsg_type {
GOT_IMSG_COMMIT_TRAVERSAL_REQUEST,
GOT_IMSG_TRAVERSED_COMMITS,
GOT_IMSG_COMMIT_TRAVERSAL_DONE,
+ GOT_IMSG_OBJECT_ENUMERATION_REQUEST,
+ GOT_IMSG_ENUMERATED_COMMIT,
+ GOT_IMSG_ENUMERATED_TREE,
+ GOT_IMSG_TREE_ENUMERATION_DONE,
+ GOT_IMSG_OBJECT_ENUMERATION_DONE,
/* Message sending file descriptor to a temporary file. */
GOT_IMSG_TMPFD,
@@ -556,6 +561,22 @@ struct got_imsg_traversed_commits {
/* Followed by ncommit IDs of SHA1_DIGEST_LENGTH each */
} __attribute__((__packed__));
+/* Structure for GOT_IMSG_ENUMERATED_COMMIT */
+struct got_imsg_enumerated_commit {
+ uint8_t id[SHA1_DIGEST_LENGTH];
+ time_t mtime;
+} __attribute__((__packed__));
+
+/* Structure for GOT_IMSG_ENUMERATED_TREE */
+struct got_imsg_enumerated_tree {
+ uint8_t id[SHA1_DIGEST_LENGTH]; /* tree ID */
+ int nentries; /* number of tree entries */
+
+ /* Followed by tree's path in remaining data of imsg buffer. */
+
+ /* Followed by nentries * GOT_IMSG_TREE_ENTRY messages. */
+} __attribute__((__packed__));
+
/*
* Structure for GOT_IMSG_GOTCONFIG_REMOTE and
* GOT_IMSG_GOTCONFIG_REMOTE data.
@@ -721,6 +742,18 @@ const struct got_error *got_privsep_send_commit_traver
const struct got_error *got_privsep_recv_traversed_commits(
struct got_commit_object **, struct got_object_id **,
struct got_object_id_queue *, struct imsgbuf *);
+const struct got_error *got_privsep_send_enumerated_tree(size_t *,
+ struct imsgbuf *, struct got_object_id *, const char *,
+ struct got_parsed_tree_entry *, int);
+const struct got_error *got_privsep_send_object_enumeration_request(
+ struct imsgbuf *);
+const struct got_error *got_privsep_send_object_enumeration_done(
+ struct imsgbuf *);
+const struct got_error *got_privsep_send_enumerated_commit(struct imsgbuf *,
+ struct got_object_id *, time_t);
+const struct got_error *got_privsep_recv_enumerated_objects(struct imsgbuf *,
+ got_object_enumerate_commit_cb, got_object_enumerate_tree_cb, void *,
+ struct got_repository *);
const struct got_error *got_privsep_send_raw_delta_req(struct imsgbuf *, int,
struct got_object_id *);
blob - 2d612890612d7d8a8e30549c38659cd083a2e41e
file + lib/object.c
--- lib/object.c
+++ lib/object.c
@@ -60,6 +60,10 @@
#define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
#endif
+#ifndef nitems
+#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
+#endif
+
struct got_object_id *
got_object_get_id(struct got_object *obj)
{
@@ -2390,3 +2394,70 @@ done:
free(changed_commit_id);
return err;
}
+
+const struct got_error *
+got_object_enumerate(got_object_enumerate_commit_cb cb_commit,
+ got_object_enumerate_tree_cb cb_tree, void *cb_arg,
+ struct got_object_id **commit_ids, int ncommits,
+ struct got_packidx *packidx, struct got_repository *repo)
+{
+ const struct got_error *err = NULL;
+ struct got_object_id *ids[GOT_IMSG_OBJ_ID_LIST_MAX_NIDS];
+ struct got_pack *pack;
+ char *path_packfile = NULL;
+ int i, j = 0;
+
+ err = got_packidx_get_packfile_path(&path_packfile,
+ packidx->path_packidx);
+ if (err)
+ return err;
+
+ pack = got_repo_get_cached_pack(repo, path_packfile);
+ if (pack == NULL) {
+ err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
+ if (err)
+ goto done;
+ }
+
+ if (pack->privsep_child == NULL) {
+ err = start_pack_privsep_child(pack, packidx);
+ if (err)
+ goto done;
+ }
+
+ err = got_privsep_send_object_enumeration_request(
+ pack->privsep_child->ibuf);
+ if (err)
+ goto done;
+
+ /*
+ * XXX This is stupid. Consider adding a function which
+ * does the chunking internally?
+ */
+ for (i = 0; i < ncommits; i++) {
+ j = i % nitems(ids);
+ ids[j] = commit_ids[i];
+ if (j >= nitems(ids) - 1) {
+ err = got_privsep_send_object_idlist(
+ pack->privsep_child->ibuf, ids, j + 1);
+ if (err)
+ goto done;
+ }
+ }
+ if (j > 0) {
+ err = got_privsep_send_object_idlist(
+ pack->privsep_child->ibuf, ids, j + 1);
+ if (err)
+ goto done;
+ }
+
+ err = got_privsep_send_object_idlist_done(pack->privsep_child->ibuf);
+ if (err)
+ goto done;
+
+ err = got_privsep_recv_enumerated_objects(pack->privsep_child->ibuf,
+ cb_commit, cb_tree, cb_arg, repo);
+done:
+ free(path_packfile);
+ return err;
+}
blob - bb8a404c064277aaedb6f0a5bd168e7765442df7
file + lib/pack_create.c
--- lib/pack_create.c
+++ lib/pack_create.c
@@ -903,21 +903,16 @@ add_object(int want_meta, struct got_object_idset *ids
static const struct got_error *
load_tree_entries(struct got_object_id_queue *ids, int want_meta,
struct got_object_idset *idset, struct got_object_idset *idset_exclude,
- struct got_object_id *tree_id,
+ struct got_tree_object *tree,
const char *dpath, time_t mtime, uint32_t seed, struct got_repository *repo,
int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
got_pack_progress_cb progress_cb, void *progress_arg,
struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
{
const struct got_error *err;
- struct got_tree_object *tree;
char *p = NULL;
int i;
- err = got_object_open_as_tree(&tree, repo, tree_id);
- if (err)
- return err;
-
(*ntrees)++;
err = report_progress(progress_cb, progress_arg, rl,
*ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0);
@@ -939,8 +934,16 @@ load_tree_entries(struct got_object_id_queue *ids, int
got_object_idset_contains(idset, id) ||
got_object_idset_contains(idset_exclude, id))
continue;
-
- if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "",
+
+ /*
+ * If got-read-pack is crawling trees for us then
+ * we are only here to collect blob IDs.
+ */
+ if (ids == NULL && S_ISDIR(mode))
+ continue;
+
+ if (asprintf(&p, "%s%s%s", dpath,
+ got_path_is_root_dir(dpath) ? "" : "/",
got_tree_entry_get_name(e)) == -1) {
err = got_error_from_errno("asprintf");
break;
@@ -970,7 +973,6 @@ load_tree_entries(struct got_object_id_queue *ids, int
}
}
- got_object_tree_close(tree);
free(p);
return err;
}
@@ -987,6 +989,7 @@ load_tree(int want_meta, struct got_object_idset *idse
const struct got_error *err = NULL;
struct got_object_id_queue tree_ids;
struct got_object_qid *qid;
+ struct got_tree_object *tree = NULL;
if (got_object_idset_contains(idset, tree_id) ||
got_object_idset_contains(idset_exclude, tree_id))
@@ -1034,20 +1037,31 @@ load_tree(int want_meta, struct got_object_idset *idse
break;
}
+ err = got_object_open_as_tree(&tree, repo, &qid->id);
+ if (err) {
+ free(qid->data);
+ got_object_qid_free(qid);
+ break;
+ }
+
err = load_tree_entries(&tree_ids, want_meta, idset,
- idset_exclude, &qid->id,
- path, mtime, seed, repo, loose_obj_only, ncolored, nfound,
- ntrees, progress_cb, progress_arg, rl,
- cancel_cb, cancel_arg);
+ idset_exclude, tree, path, mtime, seed, repo,
+ loose_obj_only, ncolored, nfound, ntrees, progress_cb,
+ progress_arg, rl, cancel_cb, cancel_arg);
free(qid->data);
got_object_qid_free(qid);
if (err)
break;
+
+ got_object_tree_close(tree);
+ tree = NULL;
}
STAILQ_FOREACH(qid, &tree_ids, entry)
free(qid->data);
got_object_id_queue_free(&tree_ids);
+ if (tree)
+ got_object_tree_close(tree);
return err;
}
@@ -1451,7 +1465,197 @@ done:
return err;
}
+struct load_packed_obj_arg {
+ /* output parameters: */
+ struct got_object_id *id;
+ char *dpath;
+ time_t mtime;
+
+ /* input parameters: */
+ int want_meta;
+ struct got_object_idset *idset;
+ struct got_object_idset *idset_exclude;
+ uint32_t seed;
+ int loose_obj_only;
+ int *ncolored;
+ int *nfound;
+ int *ntrees;
+ got_pack_progress_cb progress_cb;
+ void *progress_arg;
+ struct got_ratelimit *rl;
+ got_cancel_cb cancel_cb;
+ void *cancel_arg;
+};
+
static const struct got_error *
+load_packed_commit_id(void *arg, time_t mtime, struct got_object_id *id,
+ struct got_repository *repo)
+{
+ struct load_packed_obj_arg *a = arg;
+
+ if (got_object_idset_contains(a->idset, id) ||
+ got_object_idset_contains(a->idset_exclude, id))
+ return NULL;
+
+ return add_object(a->want_meta,
+ a->want_meta ? a->idset : a->idset_exclude,
+ id, "", GOT_OBJ_TYPE_COMMIT, mtime, a->seed, a->loose_obj_only,
+ repo, a->ncolored, a->nfound, a->ntrees,
+ a->progress_cb, a->progress_arg, a->rl);
+}
+
+static const struct got_error *
+load_packed_tree_ids(void *arg, struct got_tree_object *tree, time_t mtime,
+ struct got_object_id *id, const char *dpath, struct got_repository *repo)
+{
+ const struct got_error *err;
+ struct load_packed_obj_arg *a = arg;
+ const char *relpath;
+
+ /*
+ * When we receive a tree's ID and path but not the tree itself,
+ * this tree object was not found in the pack file. This is the
+ * last time we are being called for this optimized traversal.
+ * Return from here and switch to loading objects the slow way.
+ */
+ if (tree == NULL) {
+ free(a->id);
+ a->id = got_object_id_dup(id);
+ if (a->id == NULL)
+ return got_error_from_errno("got_object_id_dup");
+
+ free(a->dpath);
+ a->dpath = strdup(dpath);
+ if (a->dpath == NULL)
+ return got_error_from_errno("strdup");
+
+ a->mtime = mtime;
+ return NULL;
+ }
+
+ if (got_object_idset_contains(a->idset, id) ||
+ got_object_idset_contains(a->idset_exclude, id))
+ return NULL;
+
+ relpath = dpath;
+ while (relpath[0] == '/')
+ relpath++;
+
+ err = add_object(a->want_meta,
+ a->want_meta ? a->idset : a->idset_exclude,
+ id, relpath, GOT_OBJ_TYPE_TREE, mtime, a->seed, a->loose_obj_only,
+ repo, a->ncolored, a->nfound, a->ntrees,
+ a->progress_cb, a->progress_arg, a->rl);
+ if (err)
+ return err;
+
+ return load_tree_entries(NULL, a->want_meta, a->idset,
+ a->idset_exclude, tree, dpath, mtime, a->seed, repo,
+ a->loose_obj_only, a->ncolored, a->nfound, a->ntrees,
+ a->progress_cb, a->progress_arg, a->rl,
+ a->cancel_cb, a->cancel_arg);
+}
+
+static const struct got_error *
+load_packed_object_ids(struct got_object_id **commits, int ncommits,
+ int want_meta, struct got_object_idset *idset,
+ struct got_object_idset *idset_exclude, int loose_obj_only,
+ uint32_t seed, struct got_repository *repo, struct got_packidx *packidx,
+ int *ncolored, int *nfound, int *ntrees,
+ got_pack_progress_cb progress_cb, void *progress_arg,
+ struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
+{
+ const struct got_error *err = NULL;
+ struct load_packed_obj_arg lpa;
+
+ memset(&lpa, 0, sizeof(lpa));
+ lpa.want_meta = want_meta;
+ lpa.idset = idset;
+ lpa.idset_exclude = idset_exclude;
+ lpa.seed = seed;
+ lpa.loose_obj_only = loose_obj_only;
+ lpa.ncolored = ncolored;
+ lpa.nfound = nfound;
+ lpa.ntrees = ntrees;
+ lpa.progress_cb = progress_cb;
+ lpa.progress_arg = progress_arg;
+ lpa.rl = rl;
+ lpa.cancel_cb = cancel_cb;
+ lpa.cancel_arg = cancel_arg;
+
+ /* Attempt to load objects via got-read-pack, as far as possible. */
+ err = got_object_enumerate(load_packed_commit_id,
+ load_packed_tree_ids, &lpa, commits, ncommits, packidx, repo);
+ if (err)
+ return err;
+
+ if (lpa.id == NULL)
+ return NULL;
+
+ /*
+ * An incomplete tree hierarchy was present in the pack file
+ * and caused loading to be aborted midway through a commit.
+ * Continue loading trees the slow way.
+ */
+ err = load_tree(want_meta, idset, idset_exclude,
+ lpa.id, lpa.dpath, lpa.mtime, seed, repo, loose_obj_only,
+ ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
+ cancel_cb, cancel_arg);
+ free(lpa.id);
+ free(lpa.dpath);
+ return err;
+}
+
+static const struct got_error *
+find_pack_for_enumeration(struct got_packidx **best_packidx,
+ struct got_object_id **ids, int nids, struct got_repository *repo)
+{
+ const struct got_error *err = NULL;
+ struct got_pathlist_entry *pe;
+ const char *best_packidx_path = NULL;
+ int nobj_max = 0;
+ int ncommits_max = 0;
+
+ *best_packidx = NULL;
+
+ /*
+ * Find the largest pack which contains at least some of the
+ * commits and tags we are interested in.
+ */
+ TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
+ const char *path_packidx = pe->path;
+ struct got_packidx *packidx;
+ int nobj, i, idx, ncommits = 0;
+
+ err = got_repo_get_packidx(&packidx, path_packidx, repo);
+ if (err)
+ break;
+
+ nobj = be32toh(packidx->hdr.fanout_table[0xff]);
+ if (nobj <= nobj_max)
+ continue;
+
+ for (i = 0; i < nids; i++) {
+ idx = got_packidx_get_object_idx(packidx, ids[i]);
+ if (idx != -1)
+ ncommits++;
+ }
+ if (ncommits > ncommits_max) {
+ best_packidx_path = path_packidx;
+ nobj_max = nobj;
+ ncommits_max = ncommits;
+ }
+ }
+
+ if (best_packidx_path) {
+ err = got_repo_get_packidx(best_packidx, best_packidx_path,
+ repo);
+ }
+
+ return err;
+}
+
+static const struct got_error *
load_object_ids(int *ncolored, int *nfound, int *ntrees,
struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs,
struct got_object_id **ours, int nours, struct got_repository *repo,
@@ -1461,6 +1665,7 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
{
const struct got_error *err = NULL;
struct got_object_id **ids = NULL;
+ struct got_packidx *packidx = NULL;
int i, nobj = 0, obj_type;
struct got_object_idset *idset_exclude;
@@ -1477,6 +1682,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
if (err)
goto done;
+ err = find_pack_for_enumeration(&packidx, theirs, ntheirs, repo);
+ if (err)
+ goto done;
+ if (packidx) {
+ err = load_packed_object_ids(theirs, ntheirs, 0,
+ idset, idset_exclude, loose_obj_only, seed, repo, packidx,
+ ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
+ cancel_cb, cancel_arg);
+ if (err)
+ goto done;
+ }
+
for (i = 0; i < ntheirs; i++) {
struct got_object_id *id = theirs[i];
if (id == NULL)
@@ -1501,6 +1718,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
}
}
+ err = find_pack_for_enumeration(&packidx, ids, nobj, repo);
+ if (err)
+ goto done;
+ if (packidx) {
+ err = load_packed_object_ids(ids, nobj, 1,
+ idset, idset_exclude, loose_obj_only, seed, repo, packidx,
+ ncolored, nfound, ntrees,
+ progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
+ if (err)
+ goto done;
+ }
+
for (i = 0; i < nobj; i++) {
err = load_commit(1, idset, idset_exclude, ids[i], repo,
seed, loose_obj_only, ncolored, nfound, ntrees,
blob - 782f94ad26528e54d59fa5855bd1a0a4f674588b
file + lib/privsep.c
--- lib/privsep.c
+++ lib/privsep.c
@@ -1443,8 +1443,8 @@ got_privsep_recv_commit(struct got_commit_object **com
}
static const struct got_error *
-send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries,
- int idx0, int idxN, size_t len)
+send_tree_entries_batch(struct imsgbuf *ibuf,
+ struct got_parsed_tree_entry *entries, int idx0, int idxN, size_t len)
{
struct ibuf *wbuf;
struct got_imsg_tree_entries ientries;
@@ -1479,21 +1479,14 @@ send_tree_entries(struct imsgbuf *ibuf, struct got_par
return NULL;
}
-const struct got_error *
-got_privsep_send_tree(struct imsgbuf *ibuf,
- struct got_parsed_tree_entry *entries, int nentries)
+static const struct got_error *
+send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries,
+ int nentries)
{
const struct got_error *err = NULL;
- struct got_imsg_tree_object itree;
- size_t entries_len;
int i, j;
+ size_t entries_len = sizeof(struct got_imsg_tree_entries);
- itree.nentries = nentries;
- if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree))
- == -1)
- return got_error_from_errno("imsg_compose TREE");
-
- entries_len = sizeof(struct got_imsg_tree_entries);
i = 0;
for (j = 0; j < nentries; j++) {
struct got_parsed_tree_entry *pte = &entries[j];
@@ -1501,7 +1494,7 @@ got_privsep_send_tree(struct imsgbuf *ibuf,
if (j > 0 &&
entries_len + len > MAX_IMSGSIZE - IMSG_HEADER_SIZE) {
- err = send_tree_entries(ibuf, entries,
+ err = send_tree_entries_batch(ibuf, entries,
i, j - 1, entries_len);
if (err)
return err;
@@ -1513,14 +1506,98 @@ got_privsep_send_tree(struct imsgbuf *ibuf,
}
if (j > 0) {
- err = send_tree_entries(ibuf, entries, i, j - 1, entries_len);
+ err = send_tree_entries_batch(ibuf, entries, i, j - 1,
+ entries_len);
if (err)
return err;
}
+ return NULL;
+}
+
+const struct got_error *
+got_privsep_send_tree(struct imsgbuf *ibuf,
+ struct got_parsed_tree_entry *entries, int nentries)
+{
+ const struct got_error *err = NULL;
+ struct got_imsg_tree_object itree;
+
+ itree.nentries = nentries;
+ if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree))
+ == -1)
+ return got_error_from_errno("imsg_compose TREE");
+
+ err = send_tree_entries(ibuf, entries, nentries);
+ if (err)
+ return err;
+
return flush_imsg(ibuf);
}
+
+static const struct got_error *
+recv_tree_entries(void *data, size_t datalen, struct got_tree_object *tree,
+ int *nentries)
+{
+ const struct got_error *err = NULL;
+ struct got_imsg_tree_entries *ientries;
+ struct got_tree_entry *te;
+ size_t te_offset;
+ size_t i;
+
+ if (datalen <= sizeof(*ientries) ||
+ datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE)
+ return got_error(GOT_ERR_PRIVSEP_LEN);
+
+ ientries = (struct got_imsg_tree_entries *)data;
+ if (ientries->nentries > INT_MAX) {
+ return got_error_msg(GOT_ERR_NO_SPACE,
+ "too many tree entries");
+ }
+
+ te_offset = sizeof(*ientries);
+ for (i = 0; i < ientries->nentries; i++) {
+ struct got_imsg_tree_entry ite;
+ const char *te_name;
+ uint8_t *buf = (uint8_t *)data + te_offset;
+
+ if (te_offset >= datalen) {
+ err = got_error(GOT_ERR_PRIVSEP_LEN);
+ break;
+ }
+
+ /* Might not be aligned, size is ~32 bytes. */
+ memcpy(&ite, buf, sizeof(ite));
+
+ if (ite.namelen >= sizeof(te->name)) {
+ err = got_error(GOT_ERR_PRIVSEP_LEN);
+ break;
+ }
+ if (te_offset + sizeof(ite) + ite.namelen >
+ datalen) {
+ err = got_error(GOT_ERR_PRIVSEP_LEN);
+ break;
+ }
+
+ if (*nentries >= tree->nentries) {
+ err = got_error(GOT_ERR_PRIVSEP_LEN);
+ break;
+ }
+ te = &tree->entries[*nentries];
+ te_name = buf + sizeof(ite);
+ memcpy(te->name, te_name, ite.namelen);
+ te->name[ite.namelen] = '\0';
+ memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH);
+ te->mode = ite.mode;
+ te->idx = *nentries;
+ (*nentries)++;
+
+ te_offset += sizeof(ite) + ite.namelen;
+ }
+
+ return err;
+}
+
const struct got_error *
got_privsep_recv_tree(struct got_tree_object **tree, struct imsgbuf *ibuf)
{
@@ -1529,7 +1606,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
MIN(sizeof(struct got_imsg_error),
sizeof(struct got_imsg_tree_object));
struct got_imsg_tree_object *itree;
- size_t i;
int nentries = 0;
*tree = NULL;
@@ -1542,9 +1618,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
struct imsg imsg;
size_t n;
size_t datalen;
- struct got_imsg_tree_entries *ientries;
- struct got_tree_entry *te = NULL;
- size_t te_offset;
n = imsg_get(ibuf, &imsg);
if (n == 0) {
@@ -1611,56 +1684,8 @@ got_privsep_recv_tree(struct got_tree_object **tree, s
err = got_error(GOT_ERR_PRIVSEP_MSG);
break;
}
- if (datalen <= sizeof(*ientries) ||
- datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) {
- err = got_error(GOT_ERR_PRIVSEP_LEN);
- break;
- }
-
- ientries = imsg.data;
- if (ientries->nentries > INT_MAX) {
- err = got_error_msg(GOT_ERR_NO_SPACE,
- "too many tree entries");
- break;
- }
- te_offset = sizeof(*ientries);
- for (i = 0; i < ientries->nentries; i++) {
- struct got_imsg_tree_entry ite;
- const char *te_name;
- uint8_t *buf = imsg.data + te_offset;
-
- if (te_offset >= datalen) {
- err = got_error(GOT_ERR_PRIVSEP_LEN);
- break;
- }
-
- /* Might not be aligned, size is ~32 bytes. */
- memcpy(&ite, buf, sizeof(ite));
-
- if (ite.namelen >= sizeof(te->name)) {
- err = got_error(GOT_ERR_PRIVSEP_LEN);
- break;
- }
- if (te_offset + sizeof(ite) + ite.namelen >
- datalen) {
- err = got_error(GOT_ERR_PRIVSEP_LEN);
- break;
- }
- if (nentries >= (*tree)->nentries) {
- err = got_error(GOT_ERR_PRIVSEP_LEN);
- break;
- }
- te = &(*tree)->entries[nentries];
- te_name = buf + sizeof(ite);
- memcpy(te->name, te_name, ite.namelen);
- te->name[ite.namelen] = '\0';
- memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH);
- te->mode = ite.mode;
- te->idx = nentries;
- nentries++;
-
- te_offset += sizeof(ite) + ite.namelen;
- }
+ err = recv_tree_entries(imsg.data, datalen,
+ *tree, &nentries);
break;
default:
err = got_error(GOT_ERR_PRIVSEP_MSG);
@@ -2731,6 +2756,278 @@ got_privsep_recv_traversed_commits(struct got_commit_o
}
const struct got_error *
+got_privsep_send_enumerated_tree(size_t *totlen, struct imsgbuf *ibuf,
+ struct got_object_id *tree_id, const char *path,
+ struct got_parsed_tree_entry *entries, int nentries)
+{
+ const struct got_error *err = NULL;
+ struct ibuf *wbuf;
+ size_t path_len = strlen(path);
+ size_t msglen;
+
+ msglen = sizeof(struct got_imsg_enumerated_tree) + path_len;
+ wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_TREE, 0, 0, msglen);
+ if (wbuf == NULL)
+ return got_error_from_errno("imsg_create ENUMERATED_TREE");
+
+ if (imsg_add(wbuf, tree_id->sha1, SHA1_DIGEST_LENGTH) == -1) {
+ err = got_error_from_errno("imsg_add ENUMERATED_TREE");
+ ibuf_free(wbuf);
+ return err;
+ }
+ if (imsg_add(wbuf, &nentries, sizeof(nentries)) == -1) {
+ err = got_error_from_errno("imsg_add ENUMERATED_TREE");
+ ibuf_free(wbuf);
+ return err;
+ }
+ if (imsg_add(wbuf, path, path_len) == -1) {
+ err = got_error_from_errno("imsg_add ENUMERATED_TREE");
+ ibuf_free(wbuf);
+ return err;
+ }
+
+ wbuf->fd = -1;
+ imsg_close(ibuf, wbuf);
+
+ if (entries) {
+ err = send_tree_entries(ibuf, entries, nentries);
+ if (err)
+ return err;
+ }
+
+ return flush_imsg(ibuf);
+}
+
+const struct got_error *
+got_privsep_send_object_enumeration_request(struct imsgbuf *ibuf)
+{
+ if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_REQUEST,
+ 0, 0, -1, NULL, 0) == -1)
+ return got_error_from_errno("imsg_compose "
+ "OBJECT_ENUMERATION_REQUEST");
+
+ return flush_imsg(ibuf);
+}
+
+const struct got_error *
+got_privsep_send_object_enumeration_done(struct imsgbuf *ibuf)
+{
+ if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_DONE,
+ 0, 0, -1, NULL, 0) == -1)
+ return got_error_from_errno("imsg_compose "
+ "OBJECT_ENUMERATION_DONE");
+
+ return flush_imsg(ibuf);
+}
+
+const struct got_error *
+got_privsep_send_enumerated_commit(struct imsgbuf *ibuf,
+ struct got_object_id *id, time_t mtime)
+{
+ struct ibuf *wbuf;
+
+ wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_COMMIT, 0, 0,
+ sizeof(struct got_imsg_enumerated_commit) + SHA1_DIGEST_LENGTH);
+ if (wbuf == NULL)
+ return got_error_from_errno("imsg_create ENUMERATED_COMMIT");
+
+ /* Keep in sync with struct got_imsg_enumerated_commit! */
+ if (imsg_add(wbuf, id, SHA1_DIGEST_LENGTH) == -1)
+ return got_error_from_errno("imsg_add ENUMERATED_COMMIT");
+ if (imsg_add(wbuf, &mtime, sizeof(mtime)) == -1)
+ return got_error_from_errno("imsg_add ENUMERATED_COMMIT");
+
+ wbuf->fd = -1;
+ imsg_close(ibuf, wbuf);
+ /* Don't flush yet, tree entries or ENUMERATION_DONE will follow. */
+ return NULL;
+}
+
+const struct got_error *
+got_privsep_recv_enumerated_objects(struct imsgbuf *ibuf,
+ got_object_enumerate_commit_cb cb_commit,
+ got_object_enumerate_tree_cb cb_tree, void *cb_arg,
+ struct got_repository *repo)
+{
+ const struct got_error *err = NULL;
+ struct imsg imsg;
+ struct got_imsg_enumerated_commit *icommit = NULL;
+ struct got_object_id commit_id;
+ int have_commit = 0;
+ time_t mtime = 0;
+ struct got_tree_object tree;
+ struct got_imsg_enumerated_tree *itree;
+ struct got_object_id tree_id;
+ char *path = NULL, *canon_path = NULL;
+ size_t datalen, path_len;
+ int nentries = -1;
+ int done = 0;
+
+ memset(&tree, 0, sizeof(tree));
+
+ while (!done) {
+ err = got_privsep_recv_imsg(&imsg, ibuf, 0);
+ if (err)
+ break;
+
+ datalen = imsg.hdr.len - IMSG_HEADER_SIZE;
+ switch (imsg.hdr.type) {
+ case GOT_IMSG_ENUMERATED_COMMIT:
+ if (have_commit && nentries != -1) {
+ err = got_error(GOT_ERR_PRIVSEP_MSG);
+ break;
+ }
+ if (datalen != sizeof(*icommit)) {
+ err = got_error(GOT_ERR_PRIVSEP_LEN);
+ break;
+ }
+ icommit = (struct got_imsg_enumerated_commit *)imsg.data;
+ memcpy(commit_id.sha1, icommit->id, SHA1_DIGEST_LENGTH);
+ mtime = icommit->mtime;
+ err = cb_commit(cb_arg, mtime, &commit_id, repo);
+ if (err)
+ break;
+ have_commit = 1;
+ break;
+ case GOT_IMSG_ENUMERATED_TREE:
+ /* Should be preceeded by GOT_IMSG_ENUMERATED_COMMIT. */
+ if (!have_commit) {
+ err = got_error(GOT_ERR_PRIVSEP_MSG);
+ break;
+ }
+ if (datalen < sizeof(*itree)) {
+ err = got_error(GOT_ERR_PRIVSEP_LEN);
+ break;
+ }
+ itree = imsg.data;
+ path_len = datalen - sizeof(*itree);
+ if (path_len == 0) {
+ err = got_error(GOT_ERR_PRIVSEP_LEN);
+ break;
+ }
+ memcpy(tree_id.sha1, itree->id, sizeof(tree_id.sha1));
+ free(path);
+ path = malloc(path_len + 1);
+ if (path == NULL) {
+ err = got_error_from_errno("malloc");
+ break;
+ }
+ free(canon_path);
+ canon_path = malloc(path_len + 1);
+ if (canon_path == NULL) {
+ err = got_error_from_errno("malloc");
+ break;
+ }
+ memcpy(path, (uint8_t *)imsg.data + sizeof(*itree),
+ path_len);
+ path[path_len] = '\0';
+ if (!got_path_is_absolute(path)) {
+ err = got_error(GOT_ERR_BAD_PATH);
+ break;
+ }
+ if (got_path_is_root_dir(path)) {
+ /* XXX check what got_canonpath() does wrong */
+ canon_path[0] = '/';
+ canon_path[1] = '\0';
+ } else {
+ err = got_canonpath(path, canon_path,
+ path_len + 1);
+ if (err)
+ break;
+ }
+ if (strcmp(path, canon_path) != 0) {
+ err = got_error(GOT_ERR_BAD_PATH);
+ break;
+ }
+ if (nentries != -1) {
+ err = got_error(GOT_ERR_PRIVSEP_MSG);
+ break;
+ }
+ if (itree->nentries < -1) {
+ err = got_error(GOT_ERR_PRIVSEP_MSG);
+ break;
+ }
+ if (itree->nentries == -1) {
+ /* Tree was not found in pack file. */
+ err = cb_tree(cb_arg, NULL, mtime, &tree_id,
+ path, repo);
+ break;
+ }
+ if (itree->nentries > INT_MAX) {
+ err = got_error(GOT_ERR_PRIVSEP_LEN);
+ break;
+ }
+ tree.entries = calloc(itree->nentries,
+ sizeof(struct got_tree_entry));
+ if (tree.entries == NULL) {
+ err = got_error_from_errno("calloc");
+ break;
+ }
+ if (itree->nentries == 0) {
+ err = cb_tree(cb_arg, &tree, mtime, &tree_id,
+ path, repo);
+ if (err)
+ break;
+
+ /* Prepare for next tree. */
+ free(tree.entries);
+ memset(&tree, 0, sizeof(tree));
+ nentries = -1;
+ } else {
+ tree.nentries = itree->nentries;
+ nentries = 0;
+ }
+ break;
+ case GOT_IMSG_TREE_ENTRIES:
+ /* Should be preceeded by GOT_IMSG_ENUMERATED_TREE. */
+ if (nentries <= -1) {
+ err = got_error(GOT_ERR_PRIVSEP_MSG);
+ break;
+ }
+ err = recv_tree_entries(imsg.data, datalen,
+ &tree, &nentries);
+ if (err)
+ break;
+ if (tree.nentries == nentries) {
+ err = cb_tree(cb_arg, &tree, mtime, &tree_id,
+ path, repo);
+ if (err)
+ break;
+
+ /* Prepare for next tree. */
+ free(tree.entries);
+ memset(&tree, 0, sizeof(tree));
+ nentries = -1;
+ }
+ break;
+ case GOT_IMSG_TREE_ENUMERATION_DONE:
+ /* All trees have been found and traversed. */
+ if (path == NULL || nentries != -1) {
+ err = got_error(GOT_ERR_PRIVSEP_MSG);
+ break;
+ }
+ have_commit = 0;
+ break;
+ case GOT_IMSG_OBJECT_ENUMERATION_DONE:
+ done = 1;
+ break;
+ default:
+ err = got_error(GOT_ERR_PRIVSEP_MSG);
+ break;
+ }
+
+ imsg_free(&imsg);
+ if (err)
+ break;
+ }
+
+ free(path);
+ free(canon_path);
+ free(tree.entries);
+ return err;
+}
+
+const struct got_error *
got_privsep_send_raw_delta_req(struct imsgbuf *ibuf, int idx,
struct got_object_id *id)
{
blob - ea9a7e564cb840af0d6a61c8c8e0cf5ed3d93147
file + libexec/got-read-pack/got-read-pack.c
--- libexec/got-read-pack/got-read-pack.c
+++ libexec/got-read-pack/got-read-pack.c
@@ -14,6 +14,7 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include <sys/stat.h>
#include <sys/types.h>
#include <sys/queue.h>
#include <sys/uio.h>
@@ -587,7 +588,6 @@ send_commit_traversal_done(struct imsgbuf *ibuf)
return got_privsep_flush_imsg(ibuf);
}
-
static const struct got_error *
commit_traversal_request(struct imsg *imsg, struct imsgbuf *ibuf,
struct got_pack *pack, struct got_packidx *packidx,
@@ -1022,6 +1022,31 @@ recv_object_ids(struct got_object_idset *idset, struct
}
static const struct got_error *
+recv_object_id_queue(struct got_object_id_queue *queue, struct imsgbuf *ibuf)
+{
+ const struct got_error *err = NULL;
+ int done = 0;
+ struct got_object_qid *qid;
+ struct got_object_id *ids;
+ size_t nids, i;
+
+ for (;;) {
+ err = got_privsep_recv_object_idlist(&done, &ids, &nids, ibuf);
+ if (err || done)
+ break;
+ for (i = 0; i < nids; i++) {
+ err = got_object_qid_alloc_partial(&qid);
+ if (err)
+ return err;
+ memcpy(&qid->id, &ids[i], sizeof(qid->id));
+ STAILQ_INSERT_TAIL(queue, qid, entry);
+ }
+ }
+
+ return err;
+}
+
+static const struct got_error *
delta_reuse_request(struct imsg *imsg, struct imsgbuf *ibuf,
FILE *delta_outfile, struct got_pack *pack, struct got_packidx *packidx)
{
@@ -1132,6 +1157,281 @@ done:
}
static const struct got_error *
+send_tree_enumeration_done(struct imsgbuf *ibuf)
+{
+ if (imsg_compose(ibuf, GOT_IMSG_TREE_ENUMERATION_DONE, 0, 0, -1,
+ NULL, 0) == -1)
+ return got_error_from_errno("imsg_compose TREE_ENUMERATION_DONE");
+
+ return got_privsep_flush_imsg(ibuf);
+}
+
+static const struct got_error *
+enumerate_tree(struct imsgbuf *ibuf, size_t *totlen,
+ struct got_object_id *tree_id,
+ const char *path, struct got_pack *pack, struct got_packidx *packidx,
+ struct got_object_cache *objcache, struct got_object_idset *idset)
+{
+ const struct got_error *err = NULL;
+ struct got_object_id_queue ids;
+ struct got_object_qid *qid;
+ uint8_t *buf = NULL;
+ struct got_parsed_tree_entry *entries = NULL;
+
+ STAILQ_INIT(&ids);
+
+ err = got_object_qid_alloc_partial(&qid);
+ if (err)
+ return err;
+ memcpy(&qid->id.sha1, tree_id, SHA1_DIGEST_LENGTH);
+ qid->data = strdup(path);
+ if (qid->data == NULL) {
+ err = got_error_from_errno("strdup");
+ goto done;
+ }
+ STAILQ_INSERT_TAIL(&ids, qid, entry);
+ qid = NULL;
+
+ do {
+ const char *path;
+ int idx, nentries, i;
+
+ if (sigint_received) {
+ err = got_error(GOT_ERR_CANCELLED);
+ goto done;
+ }
+
+ qid = STAILQ_FIRST(&ids);
+ STAILQ_REMOVE_HEAD(&ids, entry);
+ path = qid->data;
+
+ idx = got_packidx_get_object_idx(packidx, &qid->id);
+ if (idx == -1) {
+ err = got_privsep_send_enumerated_tree(totlen, ibuf,
+ &qid->id, path, NULL, -1);
+ break;
+ }
+
+ err = open_tree(&buf, &entries, &nentries,
+ pack, packidx, idx, &qid->id, objcache);
+ if (err) {
+ if (err->code != GOT_ERR_NO_OBJ)
+ goto done;
+ }
+
+ err = got_privsep_send_enumerated_tree(totlen,
+ ibuf, &qid->id, path, entries, nentries);
+ if (err)
+ goto done;
+
+ err = got_object_idset_add(idset, &qid->id, NULL);
+ if (err)
+ goto done;
+
+ for (i = 0; i < nentries; i++) {
+ struct got_object_qid *eqid = NULL;
+ struct got_parsed_tree_entry *pte = &entries[i];
+ char *p;
+
+ if (!S_ISDIR(pte->mode))
+ continue;
+
+ err = got_object_qid_alloc_partial(&eqid);
+ if (err)
+ goto done;
+ memcpy(eqid->id.sha1, pte->id, sizeof(eqid->id.sha1));
+
+ if (got_object_idset_contains(idset, &eqid->id)) {
+ got_object_qid_free(eqid);
+ continue;
+ }
+
+ if (asprintf(&p, "%s%s%s", path,
+ got_path_is_root_dir(path) ? "" : "/",
+ pte->name) == -1) {
+ err = got_error_from_errno("asprintf");
+ got_object_qid_free(eqid);
+ goto done;
+ }
+ eqid->data = p;
+ STAILQ_INSERT_TAIL(&ids, eqid, entry);
+ idx = got_packidx_get_object_idx(packidx, &eqid->id);
+ if (idx == -1)
+ break;
+ }
+
+ free(qid->data);
+ got_object_qid_free(qid);
+ qid = NULL;
+
+ free(entries);
+ entries = NULL;
+ free(buf);
+ buf = NULL;
+ } while (!STAILQ_EMPTY(&ids));
+
+ err = send_tree_enumeration_done(ibuf);
+done:
+ free(buf);
+ if (qid)
+ free(qid->data);
+ got_object_qid_free(qid);
+ got_object_id_queue_free(&ids);
+ free(entries);
+ if (err) {
+ if (err->code == GOT_ERR_PRIVSEP_PIPE)
+ err = NULL;
+ else
+ got_privsep_send_error(ibuf, err);
+ }
+
+ return err;
+}
+
+static const struct got_error *
+enumeration_request(struct imsg *imsg, struct imsgbuf *ibuf,
+ struct got_pack *pack, struct got_packidx *packidx,
+ struct got_object_cache *objcache)
+{
+ const struct got_error *err = NULL;
+ struct got_object_id_queue commit_ids;
+ const struct got_object_id_queue *parents = NULL;
+ struct got_object_qid *qid = NULL;
+ struct got_object *obj = NULL;
+ struct got_commit_object *commit = NULL;
+ struct got_object_id *tree_id = NULL;
+ size_t totlen = 0;
+ struct got_object_idset *idset;
+ int idx;
+
+ STAILQ_INIT(&commit_ids);
+
+ idset = got_object_idset_alloc();
+ if (idset == NULL)
+ return got_error_from_errno("got_object_idset_alloc");
+
+ err = recv_object_id_queue(&commit_ids, ibuf);
+ if (err)
+ goto done;
+
+ while (!STAILQ_EMPTY(&commit_ids)) {
+ if (sigint_received) {
+ err = got_error(GOT_ERR_CANCELLED);
+ goto done;
+ }
+
+ qid = STAILQ_FIRST(&commit_ids);
+ STAILQ_REMOVE_HEAD(&commit_ids, entry);
+
+ if (got_object_idset_contains(idset, &qid->id)) {
+ got_object_qid_free(qid);
+ qid = NULL;
+ continue;
+ }
+
+ idx = got_packidx_get_object_idx(packidx, &qid->id);
+ if (idx == -1)
+ break;
+
+ err = open_object(&obj, pack, packidx, idx, &qid->id,
+ objcache);
+ if (err)
+ goto done;
+ if (obj->type == GOT_OBJ_TYPE_TAG) {
+ struct got_tag_object *tag;
+ uint8_t *buf;
+ size_t len;
+ err = got_packfile_extract_object_to_mem(&buf,
+ &len, obj, pack);
+ if (err)
+ goto done;
+ obj->size = len;
+ err = got_object_parse_tag(&tag, buf, len);
+ if (err) {
+ free(buf);
+ goto done;
+ }
+ err = open_commit(&commit, pack, packidx, idx,
+ &tag->id, objcache);
+ got_object_tag_close(tag);
+ free(buf);
+ if (err)
+ goto done;
+ } else if (obj->type == GOT_OBJ_TYPE_COMMIT) {
+ err = open_commit(&commit, pack, packidx, idx,
+ &qid->id, objcache);
+ if (err)
+ goto done;
+ } else {
+ err = got_error(GOT_ERR_OBJ_TYPE);
+ goto done;
+ }
+ got_object_close(obj);
+ obj = NULL;
+
+ err = got_privsep_send_enumerated_commit(ibuf, &qid->id,
+ got_object_commit_get_committer_time(commit));
+ if (err)
+ goto done;
+
+ tree_id = got_object_commit_get_tree_id(commit);
+ idx = got_packidx_get_object_idx(packidx, tree_id);
+ if (idx == -1) {
+ err = got_privsep_send_enumerated_tree(&totlen, ibuf,
+ tree_id, "", NULL, -1);
+ if (err)
+ goto done;
+ break;
+ }
+
+ if (got_object_idset_contains(idset, tree_id)) {
+ got_object_qid_free(qid);
+ qid = NULL;
+ continue;
+ }
+
+ err = enumerate_tree(ibuf, &totlen, tree_id, "/",
+ pack, packidx, objcache, idset);
+ if (err)
+ goto done;
+
+ got_object_qid_free(qid);
+ qid = NULL;
+
+ parents = got_object_commit_get_parent_ids(commit);
+ if (parents) {
+ struct got_object_qid *pid;
+ STAILQ_FOREACH(pid, parents, entry) {
+ if (got_object_idset_contains(idset, &pid->id))
+ continue;
+ err = got_object_qid_alloc_partial(&qid);
+ if (err)
+ goto done;
+ memcpy(&qid->id, &pid->id, sizeof(qid->id));
+ STAILQ_INSERT_TAIL(&commit_ids, qid, entry);
+ qid = NULL;
+ }
+ }
+
+ got_object_commit_close(commit);
+ commit = NULL;
+ }
+
+ err = got_privsep_send_object_enumeration_done(ibuf);
+ if (err)
+ goto done;
+done:
+ if (obj)
+ got_object_close(obj);
+ if (commit)
+ got_object_commit_close(commit);
+ got_object_qid_free(qid);
+ got_object_id_queue_free(&commit_ids);
+ got_object_idset_free(idset);
+ return err;
+}
+
+static const struct got_error *
receive_pack(struct got_pack **packp, struct imsgbuf *ibuf)
{
const struct got_error *err = NULL;
@@ -1344,6 +1644,10 @@ main(int argc, char *argv[])
err = commit_traversal_request(&imsg, &ibuf, pack,
packidx, &objcache);
break;
+ case GOT_IMSG_OBJECT_ENUMERATION_REQUEST:
+ err = enumeration_request(&imsg, &ibuf, pack,
+ packidx, &objcache);
+ break;
default:
err = got_error(GOT_ERR_PRIVSEP_MSG);
break;
object enumeration in got-read-pack