Download raw body.
object enumeration in got-read-pack
Stefan Sperling <stsp@stsp.name> wrote: > On Thu, May 19, 2022 at 06:14:07PM +0200, Stefan Sperling wrote: > > This patch adds object enumeration support to got-read-pack. > > > > The initial step of packing becomes faster if a large pack file > > is available which contains most of the commits and trees which > > need to be loaded in order to enumerate the set of objects to be > > packed. > > We gain speed by cutting imsg traffic between the main process and > > got-read-pack during this phase. The main process longer needs to > > send requests for individual objects as long as got-read-pack is > > able to enumerate them. > > > > This seems solid in my testing. More testing would be very welcome. > > > > ok? ok with the ibuf_free dropped. > New diff rebased on top of latest 'main', which now has the delfitication > path fixes which were extracted from the previous version of this diff. unfortunately I broke this again with the arc4random diff... apologize. I'm attaching a rebased diff (I just had to propagate the `seed' into some more functions via the new load_packed_obj_arg struct.) The diff doesn't adress _any_ of my comments, it's just your diff rebased hoping it saves you some time :) some comments/questions inline. > diff 93edda4de370be75a04c393842d95f45a00089ab e7ba67af0e5a394d8046b8b0cbba18d379f9246b > blob - 709eec07d085ee013da4bb4ebb657e8f97fa16b4 > blob + 60f97103b1abf937d5f4d34671be29fb3599df48 > --- lib/got_lib_object.h > +++ lib/got_lib_object.h > @@ -133,3 +133,13 @@ const struct got_error *got_object_tree_entry_dup(stru > const struct got_error *got_traverse_packed_commits( > struct got_object_id_queue *, struct got_object_id *, const char *, > struct got_repository *); > + > +typedef const struct got_error *(*got_object_enumerate_commit_cb)(void *, > + time_t, struct got_object_id *, struct got_repository *); > +typedef const struct got_error *(*got_object_enumerate_tree_cb)(void *, > + struct got_tree_object *, time_t, struct got_object_id *, const char *, > + struct got_repository *); > + > +const struct got_error *got_object_enumerate(got_object_enumerate_commit_cb, > + got_object_enumerate_tree_cb, void *, struct got_object_id **, int, > + struct got_packidx *, struct got_repository *); > blob - e719a95bde6bbe971668bb22ea3a72e2990ad927 > blob + c6c29a099c6a9b6dc6883641ef3ef96ce7f47156 > --- lib/got_lib_privsep.h > +++ lib/got_lib_privsep.h > @@ -145,6 +145,11 @@ enum got_imsg_type { > GOT_IMSG_COMMIT_TRAVERSAL_REQUEST, > GOT_IMSG_TRAVERSED_COMMITS, > GOT_IMSG_COMMIT_TRAVERSAL_DONE, > + GOT_IMSG_OBJECT_ENUMERATION_REQUEST, > + GOT_IMSG_ENUMERATED_COMMIT, > + GOT_IMSG_ENUMERATED_TREE, > + GOT_IMSG_TREE_ENUMERATION_DONE, > + GOT_IMSG_OBJECT_ENUMERATION_DONE, > > /* Message sending file descriptor to a temporary file. */ > GOT_IMSG_TMPFD, > @@ -556,6 +561,22 @@ struct got_imsg_traversed_commits { > /* Followed by ncommit IDs of SHA1_DIGEST_LENGTH each */ > } __attribute__((__packed__)); > > +/* Structure for GOT_IMSG_ENUMERATED_COMMIT */ > +struct got_imsg_enumerated_commit { > + uint8_t id[SHA1_DIGEST_LENGTH]; > + time_t mtime; > +} __attribute__((__packed__)); > + > +/* Structure for GOT_IMSG_ENUMERATED_TREE */ > +struct got_imsg_enumerated_tree { > + uint8_t id[SHA1_DIGEST_LENGTH]; /* tree ID */ > + int nentries; /* number of tree entries */ > + > + /* Followed by tree's path in remaining data of imsg buffer. */ > + > + /* Followed by nentries * GOT_IMSG_TREE_ENTRY messages. */ > +} __attribute__((__packed__)); > + > /* > * Structure for GOT_IMSG_GOTCONFIG_REMOTE and > * GOT_IMSG_GOTCONFIG_REMOTE data. > @@ -721,6 +742,18 @@ const struct got_error *got_privsep_send_commit_traver > const struct got_error *got_privsep_recv_traversed_commits( > struct got_commit_object **, struct got_object_id **, > struct got_object_id_queue *, struct imsgbuf *); > +const struct got_error *got_privsep_send_enumerated_tree(size_t *, > + struct imsgbuf *, struct got_object_id *, const char *, > + struct got_parsed_tree_entry *, int); > +const struct got_error *got_privsep_send_object_enumeration_request( > + struct imsgbuf *); > +const struct got_error *got_privsep_send_object_enumeration_done( > + struct imsgbuf *); > +const struct got_error *got_privsep_send_enumerated_commit(struct imsgbuf *, > + struct got_object_id *, time_t); > +const struct got_error *got_privsep_recv_enumerated_objects(struct imsgbuf *, > + got_object_enumerate_commit_cb, got_object_enumerate_tree_cb, void *, > + struct got_repository *); > > const struct got_error *got_privsep_send_raw_delta_req(struct imsgbuf *, int, > struct got_object_id *); > blob - 2d612890612d7d8a8e30549c38659cd083a2e41e > blob + 1b025984a415471435ce11fea6762fde7d0051ce > --- lib/object.c > +++ lib/object.c > @@ -60,6 +60,10 @@ > #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) > #endif > > +#ifndef nitems > +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) > +#endif > + > struct got_object_id * > got_object_get_id(struct got_object *obj) > { > @@ -2390,3 +2394,70 @@ done: > free(changed_commit_id); > return err; > } > + > +const struct got_error * > +got_object_enumerate(got_object_enumerate_commit_cb cb_commit, > + got_object_enumerate_tree_cb cb_tree, void *cb_arg, > + struct got_object_id **commit_ids, int ncommits, > + struct got_packidx *packidx, struct got_repository *repo) > +{ > + const struct got_error *err = NULL; > + struct got_object_id *ids[GOT_IMSG_OBJ_ID_LIST_MAX_NIDS]; > + struct got_pack *pack; > + char *path_packfile = NULL; > + int i, j = 0; > + > + err = got_packidx_get_packfile_path(&path_packfile, > + packidx->path_packidx); > + if (err) > + return err; > + > + pack = got_repo_get_cached_pack(repo, path_packfile); > + if (pack == NULL) { > + err = got_repo_cache_pack(&pack, repo, path_packfile, packidx); > + if (err) > + goto done; > + } > + > + if (pack->privsep_child == NULL) { > + err = start_pack_privsep_child(pack, packidx); > + if (err) > + goto done; > + } > + > + err = got_privsep_send_object_enumeration_request( > + pack->privsep_child->ibuf); > + if (err) > + goto done; > + > + /* > + * XXX This is stupid. Consider adding a function which > + * does the chunking internally? > + */ > + for (i = 0; i < ncommits; i++) { > + j = i % nitems(ids); > + ids[j] = commit_ids[i]; > + if (j >= nitems(ids) - 1) { > + err = got_privsep_send_object_idlist( > + pack->privsep_child->ibuf, ids, j + 1); > + if (err) > + goto done; > + } > + } > + if (j > 0) { > + err = got_privsep_send_object_idlist( > + pack->privsep_child->ibuf, ids, j + 1); > + if (err) > + goto done; > + } > + > + err = got_privsep_send_object_idlist_done(pack->privsep_child->ibuf); > + if (err) > + goto done; > + > + err = got_privsep_recv_enumerated_objects(pack->privsep_child->ibuf, > + cb_commit, cb_tree, cb_arg, repo); > +done: > + free(path_packfile); > + return err; > +} > blob - 203398831c0a6d3ffc31d9eb8e34611330f3cfb4 > blob + d7765ff55c98f991367c2f24425c9a32c2877fb0 > --- lib/pack_create.c > +++ lib/pack_create.c > @@ -900,21 +900,16 @@ add_object(int want_meta, struct got_object_idset *ids > static const struct got_error * > load_tree_entries(struct got_object_id_queue *ids, int want_meta, > struct got_object_idset *idset, struct got_object_idset *idset_exclude, > - struct got_object_id *tree_id, > + struct got_tree_object *tree, > const char *dpath, time_t mtime, struct got_repository *repo, > int loose_obj_only, int *ncolored, int *nfound, int *ntrees, > got_pack_progress_cb progress_cb, void *progress_arg, > struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) > { > const struct got_error *err; > - struct got_tree_object *tree; > char *p = NULL; > int i; > > - err = got_object_open_as_tree(&tree, repo, tree_id); > - if (err) > - return err; > - > (*ntrees)++; > err = report_progress(progress_cb, progress_arg, rl, > *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0); > @@ -936,8 +931,16 @@ load_tree_entries(struct got_object_id_queue *ids, int > got_object_idset_contains(idset, id) || > got_object_idset_contains(idset_exclude, id)) > continue; > - > - if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "", > + > + /* > + * If got-read-pack is crawling trees for us then > + * we are only here to collect blob IDs. > + */ > + if (ids == NULL && S_ISDIR(mode)) > + continue; > + > + if (asprintf(&p, "%s%s%s", dpath, > + got_path_is_root_dir(dpath) ? "" : "/", > got_tree_entry_get_name(e)) == -1) { > err = got_error_from_errno("asprintf"); > break; > @@ -967,7 +970,6 @@ load_tree_entries(struct got_object_id_queue *ids, int > } > } > > - got_object_tree_close(tree); > free(p); > return err; > } > @@ -984,6 +986,7 @@ load_tree(int want_meta, struct got_object_idset *idse > const struct got_error *err = NULL; > struct got_object_id_queue tree_ids; > struct got_object_qid *qid; > + struct got_tree_object *tree = NULL; > > if (got_object_idset_contains(idset, tree_id) || > got_object_idset_contains(idset_exclude, tree_id)) > @@ -1031,20 +1034,31 @@ load_tree(int want_meta, struct got_object_idset *idse > break; > } > > + err = got_object_open_as_tree(&tree, repo, &qid->id); > + if (err) { > + free(qid->data); > + got_object_qid_free(qid); > + break; > + } > + > err = load_tree_entries(&tree_ids, want_meta, idset, > - idset_exclude, &qid->id, > - path, mtime, repo, loose_obj_only, ncolored, nfound, > - ntrees, progress_cb, progress_arg, rl, > + idset_exclude, tree, path, mtime, repo, loose_obj_only, > + ncolored, nfound, ntrees, progress_cb, progress_arg, rl, > cancel_cb, cancel_arg); > free(qid->data); > got_object_qid_free(qid); > if (err) > break; > + > + got_object_tree_close(tree); > + tree = NULL; > } > > STAILQ_FOREACH(qid, &tree_ids, entry) > free(qid->data); > got_object_id_queue_free(&tree_ids); > + if (tree) > + got_object_tree_close(tree); > return err; > } > > @@ -1448,7 +1462,195 @@ done: > return err; > } > > +struct load_packed_obj_arg { > + /* output parameters: */ > + struct got_object_id *id; > + char *dpath; > + time_t mtime; > + > + /* input parameters: */ > + int want_meta; > + struct got_object_idset *idset; > + struct got_object_idset *idset_exclude; > + int loose_obj_only; > + int *ncolored; > + int *nfound; > + int *ntrees; > + got_pack_progress_cb progress_cb; > + void *progress_arg; > + struct got_ratelimit *rl; > + got_cancel_cb cancel_cb; > + void *cancel_arg; > +}; > + > static const struct got_error * > +load_packed_commit_id(void *arg, time_t mtime, struct got_object_id *id, > + struct got_repository *repo) > +{ > + struct load_packed_obj_arg *a = arg; > + > + if (got_object_idset_contains(a->idset, id) || > + got_object_idset_contains(a->idset_exclude, id)) > + return NULL; > + > + return add_object(a->want_meta, > + a->want_meta ? a->idset : a->idset_exclude, > + id, "", GOT_OBJ_TYPE_COMMIT, mtime, a->loose_obj_only, repo, > + a->ncolored, a->nfound, a->ntrees, > + a->progress_cb, a->progress_arg, a->rl); > +} > + > +static const struct got_error * > +load_packed_tree_ids(void *arg, struct got_tree_object *tree, time_t mtime, > + struct got_object_id *id, const char *dpath, struct got_repository *repo) > +{ > + const struct got_error *err; > + struct load_packed_obj_arg *a = arg; > + const char *relpath; > + > + /* > + * When we receive a tree's ID and path but not the tree itself, > + * this tree object was not found in the pack file. This is the > + * last time we are being called for this optimized traversal. > + * Return from here and switch to loading objects the slow way. > + */ > + if (tree == NULL) { I got_object_id_dup fails in this if body aren't we leaking those dup'ed ids? If i'm reading correctly we're coming here from load_packed_object_ids which allocates load_packed_obj_arg on the stack and bails out on error. > + free(a->id); > + a->id = got_object_id_dup(id); > + if (a->id == NULL) > + return got_error_from_errno("got_object_id_dup"); > + > + free(a->dpath); > + a->dpath = strdup(dpath); > + if (a->dpath == NULL) > + return got_error_from_errno("strdup"); > + > + a->mtime = mtime; > + return NULL; > + } > + > + if (got_object_idset_contains(a->idset, id) || > + got_object_idset_contains(a->idset_exclude, id)) > + return NULL; > + > + relpath = dpath; > + while (relpath[0] == '/') > + relpath++; > + > + err = add_object(a->want_meta, > + a->want_meta ? a->idset : a->idset_exclude, > + id, relpath, GOT_OBJ_TYPE_TREE, mtime, a->loose_obj_only, > + repo, a->ncolored, a->nfound, a->ntrees, > + a->progress_cb, a->progress_arg, a->rl); > + if (err) > + return err; > + > + return load_tree_entries(NULL, a->want_meta, a->idset, > + a->idset_exclude, tree, dpath, mtime, repo, > + a->loose_obj_only, a->ncolored, a->nfound, a->ntrees, > + a->progress_cb, a->progress_arg, a->rl, > + a->cancel_cb, a->cancel_arg); > +} > + > +static const struct got_error * > +load_packed_object_ids(struct got_object_id **commits, int ncommits, > + int want_meta, struct got_object_idset *idset, > + struct got_object_idset *idset_exclude, int loose_obj_only, > + struct got_repository *repo, struct got_packidx *packidx, > + int *ncolored, int *nfound, int *ntrees, > + got_pack_progress_cb progress_cb, void *progress_arg, > + struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) > +{ > + const struct got_error *err = NULL; > + struct load_packed_obj_arg lpa; > + > + memset(&lpa, 0, sizeof(lpa)); > + lpa.want_meta = want_meta; > + lpa.idset = idset; > + lpa.idset_exclude = idset_exclude; > + lpa.loose_obj_only = loose_obj_only; > + lpa.ncolored = ncolored; > + lpa.nfound = nfound; > + lpa.ntrees = ntrees; > + lpa.progress_cb = progress_cb; > + lpa.progress_arg = progress_arg; > + lpa.rl = rl; > + lpa.cancel_cb = cancel_cb; > + lpa.cancel_arg = cancel_arg; > + > + /* Attempt to load objects via got-read-pack, as far as possible. */ > + err = got_object_enumerate(load_packed_commit_id, > + load_packed_tree_ids, &lpa, commits, ncommits, packidx, repo); > + if (err) > + return err; > + > + if (lpa.id == NULL) > + return NULL; > + > + /* > + * An incomplete tree hierarchy was present in the pack file > + * and caused loading to be aborted midway through a commit. > + * Continue loading trees the slow way. > + */ > + err = load_tree(want_meta, idset, idset_exclude, > + lpa.id, lpa.dpath, lpa.mtime, repo, loose_obj_only, > + ncolored, nfound, ntrees, progress_cb, progress_arg, rl, > + cancel_cb, cancel_arg); > + free(lpa.id); > + free(lpa.dpath); > + return err; > +} > + > +static const struct got_error * > +find_pack_for_enumeration(struct got_packidx **best_packidx, > + struct got_object_id **ids, int nids, struct got_repository *repo) > +{ > + const struct got_error *err = NULL; > + struct got_pathlist_entry *pe; > + const char *best_packidx_path = NULL; > + int nobj_max = 0; > + int ncommits_max = 0; > + > + *best_packidx = NULL; > + > + /* > + * Find the largest pack which contains at least some of the > + * commits and tags we are interested in. > + */ > + TAILQ_FOREACH(pe, &repo->packidx_paths, entry) { > + const char *path_packidx = pe->path; > + struct got_packidx *packidx; > + int nobj, i, idx, ncommits = 0; > + > + err = got_repo_get_packidx(&packidx, path_packidx, repo); > + if (err) > + break; > + > + nobj = be32toh(packidx->hdr.fanout_table[0xff]); > + if (nobj <= nobj_max) > + continue; > + > + for (i = 0; i < nids; i++) { > + idx = got_packidx_get_object_idx(packidx, ids[i]); > + if (idx != -1) > + ncommits++; > + } > + if (ncommits > ncommits_max) { > + best_packidx_path = path_packidx; > + nobj_max = nobj; > + ncommits_max = ncommits; > + } > + } > + nit: maybe drop braces here? > + if (best_packidx_path) { > + err = got_repo_get_packidx(best_packidx, best_packidx_path, > + repo); > + } > + > + return err; > +} > + > +static const struct got_error * > load_object_ids(int *ncolored, int *nfound, int *ntrees, > struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs, > struct got_object_id **ours, int nours, struct got_repository *repo, > @@ -1457,6 +1659,7 @@ load_object_ids(int *ncolored, int *nfound, int *ntree > { > const struct got_error *err = NULL; > struct got_object_id **ids = NULL; > + struct got_packidx *packidx = NULL; > int i, nobj = 0, obj_type; > struct got_object_idset *idset_exclude; > > @@ -1473,6 +1676,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree > if (err) > goto done; > > + err = find_pack_for_enumeration(&packidx, theirs, ntheirs, repo); > + if (err) > + goto done; > + if (packidx) { > + err = load_packed_object_ids(theirs, ntheirs, 0, > + idset, idset_exclude, loose_obj_only, repo, packidx, > + ncolored, nfound, ntrees, progress_cb, progress_arg, rl, > + cancel_cb, cancel_arg); > + if (err) > + goto done; > + } > + > for (i = 0; i < ntheirs; i++) { > struct got_object_id *id = theirs[i]; > if (id == NULL) > @@ -1497,6 +1712,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree > } > } > > + err = find_pack_for_enumeration(&packidx, ids, nobj, repo); > + if (err) > + goto done; > + if (packidx) { > + err = load_packed_object_ids(ids, nobj, 1, > + idset, idset_exclude, loose_obj_only, repo, packidx, > + ncolored, nfound, ntrees, > + progress_cb, progress_arg, rl, cancel_cb, cancel_arg); > + if (err) > + goto done; > + } > + > for (i = 0; i < nobj; i++) { > err = load_commit(1, idset, idset_exclude, > ids[i], repo, loose_obj_only, ncolored, nfound, ntrees, > blob - 782f94ad26528e54d59fa5855bd1a0a4f674588b > blob + d5f10725785b576a723c85e2b6322e71e49f192a > --- lib/privsep.c > +++ lib/privsep.c > @@ -1443,8 +1443,8 @@ got_privsep_recv_commit(struct got_commit_object **com > } > > static const struct got_error * > -send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries, > - int idx0, int idxN, size_t len) > +send_tree_entries_batch(struct imsgbuf *ibuf, > + struct got_parsed_tree_entry *entries, int idx0, int idxN, size_t len) > { > struct ibuf *wbuf; > struct got_imsg_tree_entries ientries; > @@ -1479,21 +1479,14 @@ send_tree_entries(struct imsgbuf *ibuf, struct got_par > return NULL; > } > > -const struct got_error * > -got_privsep_send_tree(struct imsgbuf *ibuf, > - struct got_parsed_tree_entry *entries, int nentries) > +static const struct got_error * > +send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries, > + int nentries) > { > const struct got_error *err = NULL; > - struct got_imsg_tree_object itree; > - size_t entries_len; > int i, j; > + size_t entries_len = sizeof(struct got_imsg_tree_entries); > > - itree.nentries = nentries; > - if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree)) > - == -1) > - return got_error_from_errno("imsg_compose TREE"); > - > - entries_len = sizeof(struct got_imsg_tree_entries); > i = 0; > for (j = 0; j < nentries; j++) { > struct got_parsed_tree_entry *pte = &entries[j]; > @@ -1501,7 +1494,7 @@ got_privsep_send_tree(struct imsgbuf *ibuf, > > if (j > 0 && > entries_len + len > MAX_IMSGSIZE - IMSG_HEADER_SIZE) { > - err = send_tree_entries(ibuf, entries, > + err = send_tree_entries_batch(ibuf, entries, > i, j - 1, entries_len); > if (err) > return err; > @@ -1513,14 +1506,98 @@ got_privsep_send_tree(struct imsgbuf *ibuf, > } > > if (j > 0) { > - err = send_tree_entries(ibuf, entries, i, j - 1, entries_len); > + err = send_tree_entries_batch(ibuf, entries, i, j - 1, > + entries_len); > if (err) > return err; > } > > + return NULL; > +} > + > +const struct got_error * > +got_privsep_send_tree(struct imsgbuf *ibuf, > + struct got_parsed_tree_entry *entries, int nentries) > +{ > + const struct got_error *err = NULL; > + struct got_imsg_tree_object itree; > + > + itree.nentries = nentries; > + if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree)) > + == -1) > + return got_error_from_errno("imsg_compose TREE"); > + > + err = send_tree_entries(ibuf, entries, nentries); > + if (err) > + return err; > + > return flush_imsg(ibuf); > } > > + > +static const struct got_error * > +recv_tree_entries(void *data, size_t datalen, struct got_tree_object *tree, > + int *nentries) > +{ > + const struct got_error *err = NULL; > + struct got_imsg_tree_entries *ientries; > + struct got_tree_entry *te; > + size_t te_offset; > + size_t i; > + > + if (datalen <= sizeof(*ientries) || > + datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) > + return got_error(GOT_ERR_PRIVSEP_LEN); > + > + ientries = (struct got_imsg_tree_entries *)data; nit: and maybe here too? > + if (ientries->nentries > INT_MAX) { > + return got_error_msg(GOT_ERR_NO_SPACE, > + "too many tree entries"); > + } > + > + te_offset = sizeof(*ientries); > + for (i = 0; i < ientries->nentries; i++) { > + struct got_imsg_tree_entry ite; > + const char *te_name; > + uint8_t *buf = (uint8_t *)data + te_offset; > + > + if (te_offset >= datalen) { > + err = got_error(GOT_ERR_PRIVSEP_LEN); > + break; > + } > + > + /* Might not be aligned, size is ~32 bytes. */ > + memcpy(&ite, buf, sizeof(ite)); > + > + if (ite.namelen >= sizeof(te->name)) { > + err = got_error(GOT_ERR_PRIVSEP_LEN); > + break; > + } nit: I think you can leave this in a single line (it would be 70 characters long) now that it's not nested anymore. FWIW i sometimes find easier to mentally parse blocks like these if they're on a single line. > + if (te_offset + sizeof(ite) + ite.namelen > > + datalen) { > + err = got_error(GOT_ERR_PRIVSEP_LEN); > + break; > + } > + > + if (*nentries >= tree->nentries) { > + err = got_error(GOT_ERR_PRIVSEP_LEN); > + break; > + } > + te = &tree->entries[*nentries]; > + te_name = buf + sizeof(ite); > + memcpy(te->name, te_name, ite.namelen); > + te->name[ite.namelen] = '\0'; > + memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH); > + te->mode = ite.mode; > + te->idx = *nentries; > + (*nentries)++; > + > + te_offset += sizeof(ite) + ite.namelen; > + } > + > + return err; > +} > + > const struct got_error * > got_privsep_recv_tree(struct got_tree_object **tree, struct imsgbuf *ibuf) > { > @@ -1529,7 +1606,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s > MIN(sizeof(struct got_imsg_error), > sizeof(struct got_imsg_tree_object)); > struct got_imsg_tree_object *itree; > - size_t i; > int nentries = 0; > > *tree = NULL; > @@ -1542,9 +1618,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s > struct imsg imsg; > size_t n; > size_t datalen; > - struct got_imsg_tree_entries *ientries; > - struct got_tree_entry *te = NULL; > - size_t te_offset; > > n = imsg_get(ibuf, &imsg); > if (n == 0) { > @@ -1611,56 +1684,8 @@ got_privsep_recv_tree(struct got_tree_object **tree, s > err = got_error(GOT_ERR_PRIVSEP_MSG); > break; > } > - if (datalen <= sizeof(*ientries) || > - datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) { > - err = got_error(GOT_ERR_PRIVSEP_LEN); > - break; > - } > - > - ientries = imsg.data; > - if (ientries->nentries > INT_MAX) { > - err = got_error_msg(GOT_ERR_NO_SPACE, > - "too many tree entries"); > - break; > - } > - te_offset = sizeof(*ientries); > - for (i = 0; i < ientries->nentries; i++) { > - struct got_imsg_tree_entry ite; > - const char *te_name; > - uint8_t *buf = imsg.data + te_offset; > - > - if (te_offset >= datalen) { > - err = got_error(GOT_ERR_PRIVSEP_LEN); > - break; > - } > - > - /* Might not be aligned, size is ~32 bytes. */ > - memcpy(&ite, buf, sizeof(ite)); > - > - if (ite.namelen >= sizeof(te->name)) { > - err = got_error(GOT_ERR_PRIVSEP_LEN); > - break; > - } > - if (te_offset + sizeof(ite) + ite.namelen > > - datalen) { > - err = got_error(GOT_ERR_PRIVSEP_LEN); > - break; > - } > - if (nentries >= (*tree)->nentries) { > - err = got_error(GOT_ERR_PRIVSEP_LEN); > - break; > - } > - te = &(*tree)->entries[nentries]; > - te_name = buf + sizeof(ite); > - memcpy(te->name, te_name, ite.namelen); > - te->name[ite.namelen] = '\0'; > - memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH); > - te->mode = ite.mode; > - te->idx = nentries; > - nentries++; > - > - te_offset += sizeof(ite) + ite.namelen; > - } > + err = recv_tree_entries(imsg.data, datalen, > + *tree, &nentries); > break; > default: > err = got_error(GOT_ERR_PRIVSEP_MSG); > @@ -2731,6 +2756,278 @@ got_privsep_recv_traversed_commits(struct got_commit_o > } > > const struct got_error * > +got_privsep_send_enumerated_tree(size_t *totlen, struct imsgbuf *ibuf, > + struct got_object_id *tree_id, const char *path, > + struct got_parsed_tree_entry *entries, int nentries) > +{ > + const struct got_error *err = NULL; > + struct ibuf *wbuf; > + size_t path_len = strlen(path); > + size_t msglen; > + > + msglen = sizeof(struct got_imsg_enumerated_tree) + path_len; > + wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_TREE, 0, 0, msglen); > + if (wbuf == NULL) > + return got_error_from_errno("imsg_create ENUMERATED_TREE"); > + > + if (imsg_add(wbuf, tree_id->sha1, SHA1_DIGEST_LENGTH) == -1) { > + err = got_error_from_errno("imsg_add ENUMERATED_TREE"); we shouldn't call ibuf_free in the imsg_add error case, right? (the other imsg_add calls are fine, these one here are probably a leftover) > + ibuf_free(wbuf); > + return err; > + } > + if (imsg_add(wbuf, &nentries, sizeof(nentries)) == -1) { > + err = got_error_from_errno("imsg_add ENUMERATED_TREE"); > + ibuf_free(wbuf); > + return err; > + } > + if (imsg_add(wbuf, path, path_len) == -1) { > + err = got_error_from_errno("imsg_add ENUMERATED_TREE"); > + ibuf_free(wbuf); > + return err; > + } > + > + wbuf->fd = -1; > + imsg_close(ibuf, wbuf); > + > + if (entries) { > + err = send_tree_entries(ibuf, entries, nentries); > + if (err) > + return err; > + } > + > + return flush_imsg(ibuf); > +} > + > +const struct got_error * > +got_privsep_send_object_enumeration_request(struct imsgbuf *ibuf) > +{ > + if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_REQUEST, > + 0, 0, -1, NULL, 0) == -1) > + return got_error_from_errno("imsg_compose " > + "OBJECT_ENUMERATION_REQUEST"); > + > + return flush_imsg(ibuf); > +} > + > +const struct got_error * > +got_privsep_send_object_enumeration_done(struct imsgbuf *ibuf) > +{ > + if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_DONE, > + 0, 0, -1, NULL, 0) == -1) > + return got_error_from_errno("imsg_compose " > + "OBJECT_ENUMERATION_DONE"); > + > + return flush_imsg(ibuf); > +} > + > +const struct got_error * > +got_privsep_send_enumerated_commit(struct imsgbuf *ibuf, > + struct got_object_id *id, time_t mtime) > +{ > + struct ibuf *wbuf; > + > + wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_COMMIT, 0, 0, > + sizeof(struct got_imsg_enumerated_commit) + SHA1_DIGEST_LENGTH); > + if (wbuf == NULL) > + return got_error_from_errno("imsg_create ENUMERATED_COMMIT"); > + > + /* Keep in sync with struct got_imsg_enumerated_commit! */ > + if (imsg_add(wbuf, id, SHA1_DIGEST_LENGTH) == -1) > + return got_error_from_errno("imsg_add ENUMERATED_COMMIT"); > + if (imsg_add(wbuf, &mtime, sizeof(mtime)) == -1) > + return got_error_from_errno("imsg_add ENUMERATED_COMMIT"); > + > + wbuf->fd = -1; > + imsg_close(ibuf, wbuf); > + /* Don't flush yet, tree entries or ENUMERATION_DONE will follow. */ > + return NULL; > +} > + > +const struct got_error * > +got_privsep_recv_enumerated_objects(struct imsgbuf *ibuf, > + got_object_enumerate_commit_cb cb_commit, > + got_object_enumerate_tree_cb cb_tree, void *cb_arg, > + struct got_repository *repo) > +{ > + const struct got_error *err = NULL; > + struct imsg imsg; > + struct got_imsg_enumerated_commit *icommit = NULL; > + struct got_object_id commit_id; > + int have_commit = 0; > + time_t mtime = 0; > + struct got_tree_object tree; > + struct got_imsg_enumerated_tree *itree; > + struct got_object_id tree_id; > + char *path = NULL, *canon_path = NULL; > + size_t datalen, path_len; > + int nentries = -1; > + int done = 0; > + > + memset(&tree, 0, sizeof(tree)); > + > + while (!done) { > + err = got_privsep_recv_imsg(&imsg, ibuf, 0); > + if (err) > + break; > + > + datalen = imsg.hdr.len - IMSG_HEADER_SIZE; > + switch (imsg.hdr.type) { > + case GOT_IMSG_ENUMERATED_COMMIT: > + if (have_commit && nentries != -1) { > + err = got_error(GOT_ERR_PRIVSEP_MSG); > + break; > + } > + if (datalen != sizeof(*icommit)) { > + err = got_error(GOT_ERR_PRIVSEP_LEN); > + break; > + } > + icommit = (struct got_imsg_enumerated_commit *)imsg.data; > + memcpy(commit_id.sha1, icommit->id, SHA1_DIGEST_LENGTH); > + mtime = icommit->mtime; > + err = cb_commit(cb_arg, mtime, &commit_id, repo); > + if (err) > + break; > + have_commit = 1; > + break; > + case GOT_IMSG_ENUMERATED_TREE: > + /* Should be preceeded by GOT_IMSG_ENUMERATED_COMMIT. */ > + if (!have_commit) { > + err = got_error(GOT_ERR_PRIVSEP_MSG); > + break; > + } > + if (datalen < sizeof(*itree)) { > + err = got_error(GOT_ERR_PRIVSEP_LEN); > + break; > + } > + itree = imsg.data; > + path_len = datalen - sizeof(*itree); > + if (path_len == 0) { > + err = got_error(GOT_ERR_PRIVSEP_LEN); > + break; > + } > + memcpy(tree_id.sha1, itree->id, sizeof(tree_id.sha1)); > + free(path); > + path = malloc(path_len + 1); > + if (path == NULL) { > + err = got_error_from_errno("malloc"); > + break; > + } > + free(canon_path); > + canon_path = malloc(path_len + 1); > + if (canon_path == NULL) { > + err = got_error_from_errno("malloc"); > + break; > + } > + memcpy(path, (uint8_t *)imsg.data + sizeof(*itree), > + path_len); > + path[path_len] = '\0'; > + if (!got_path_is_absolute(path)) { > + err = got_error(GOT_ERR_BAD_PATH); > + break; > + } > + if (got_path_is_root_dir(path)) { > + /* XXX check what got_canonpath() does wrong */ > + canon_path[0] = '/'; > + canon_path[1] = '\0'; > + } else { > + err = got_canonpath(path, canon_path, > + path_len + 1); > + if (err) > + break; > + } > + if (strcmp(path, canon_path) != 0) { > + err = got_error(GOT_ERR_BAD_PATH); > + break; > + } > + if (nentries != -1) { > + err = got_error(GOT_ERR_PRIVSEP_MSG); > + break; > + } > + if (itree->nentries < -1) { > + err = got_error(GOT_ERR_PRIVSEP_MSG); > + break; > + } > + if (itree->nentries == -1) { > + /* Tree was not found in pack file. */ > + err = cb_tree(cb_arg, NULL, mtime, &tree_id, > + path, repo); > + break; > + } > + if (itree->nentries > INT_MAX) { > + err = got_error(GOT_ERR_PRIVSEP_LEN); > + break; > + } > + tree.entries = calloc(itree->nentries, > + sizeof(struct got_tree_entry)); > + if (tree.entries == NULL) { > + err = got_error_from_errno("calloc"); > + break; > + } > + if (itree->nentries == 0) { > + err = cb_tree(cb_arg, &tree, mtime, &tree_id, > + path, repo); > + if (err) > + break; > + > + /* Prepare for next tree. */ > + free(tree.entries); > + memset(&tree, 0, sizeof(tree)); > + nentries = -1; > + } else { > + tree.nentries = itree->nentries; > + nentries = 0; > + } > + break; > + case GOT_IMSG_TREE_ENTRIES: > + /* Should be preceeded by GOT_IMSG_ENUMERATED_TREE. */ > + if (nentries <= -1) { > + err = got_error(GOT_ERR_PRIVSEP_MSG); > + break; > + } > + err = recv_tree_entries(imsg.data, datalen, > + &tree, &nentries); > + if (err) > + break; > + if (tree.nentries == nentries) { > + err = cb_tree(cb_arg, &tree, mtime, &tree_id, > + path, repo); > + if (err) > + break; > + > + /* Prepare for next tree. */ > + free(tree.entries); > + memset(&tree, 0, sizeof(tree)); > + nentries = -1; > + } > + break; > + case GOT_IMSG_TREE_ENUMERATION_DONE: > + /* All trees have been found and traversed. */ > + if (path == NULL || nentries != -1) { > + err = got_error(GOT_ERR_PRIVSEP_MSG); > + break; > + } > + have_commit = 0; > + break; > + case GOT_IMSG_OBJECT_ENUMERATION_DONE: > + done = 1; > + break; > + default: > + err = got_error(GOT_ERR_PRIVSEP_MSG); > + break; > + } > + > + imsg_free(&imsg); > + if (err) > + break; > + } > + > + free(path); > + free(canon_path); > + free(tree.entries); > + return err; > +} > + > +const struct got_error * > got_privsep_send_raw_delta_req(struct imsgbuf *ibuf, int idx, > struct got_object_id *id) > { > blob - ea9a7e564cb840af0d6a61c8c8e0cf5ed3d93147 > blob + 41edfaa4585e69c5e5bdf199212200f20b694c25 > --- libexec/got-read-pack/got-read-pack.c > +++ libexec/got-read-pack/got-read-pack.c > @@ -14,6 +14,7 @@ > * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > */ > > +#include <sys/stat.h> > #include <sys/types.h> > #include <sys/queue.h> > #include <sys/uio.h> > @@ -587,7 +588,6 @@ send_commit_traversal_done(struct imsgbuf *ibuf) > return got_privsep_flush_imsg(ibuf); > } > > - > static const struct got_error * > commit_traversal_request(struct imsg *imsg, struct imsgbuf *ibuf, > struct got_pack *pack, struct got_packidx *packidx, > @@ -1022,6 +1022,31 @@ recv_object_ids(struct got_object_idset *idset, struct > } > > static const struct got_error * > +recv_object_id_queue(struct got_object_id_queue *queue, struct imsgbuf *ibuf) > +{ > + const struct got_error *err = NULL; > + int done = 0; > + struct got_object_qid *qid; > + struct got_object_id *ids; > + size_t nids, i; > + > + for (;;) { > + err = got_privsep_recv_object_idlist(&done, &ids, &nids, ibuf); > + if (err || done) > + break; > + for (i = 0; i < nids; i++) { > + err = got_object_qid_alloc_partial(&qid); > + if (err) > + return err; > + memcpy(&qid->id, &ids[i], sizeof(qid->id)); > + STAILQ_INSERT_TAIL(queue, qid, entry); > + } > + } > + > + return err; > +} > + > +static const struct got_error * > delta_reuse_request(struct imsg *imsg, struct imsgbuf *ibuf, > FILE *delta_outfile, struct got_pack *pack, struct got_packidx *packidx) > { > @@ -1132,6 +1157,281 @@ done: > } > > static const struct got_error * > +send_tree_enumeration_done(struct imsgbuf *ibuf) > +{ > + if (imsg_compose(ibuf, GOT_IMSG_TREE_ENUMERATION_DONE, 0, 0, -1, > + NULL, 0) == -1) > + return got_error_from_errno("imsg_compose TREE_ENUMERATION_DONE"); > + > + return got_privsep_flush_imsg(ibuf); > +} > + > +static const struct got_error * > +enumerate_tree(struct imsgbuf *ibuf, size_t *totlen, > + struct got_object_id *tree_id, > + const char *path, struct got_pack *pack, struct got_packidx *packidx, > + struct got_object_cache *objcache, struct got_object_idset *idset) > +{ > + const struct got_error *err = NULL; > + struct got_object_id_queue ids; > + struct got_object_qid *qid; > + uint8_t *buf = NULL; > + struct got_parsed_tree_entry *entries = NULL; > + > + STAILQ_INIT(&ids); > + > + err = got_object_qid_alloc_partial(&qid); > + if (err) > + return err; > + memcpy(&qid->id.sha1, tree_id, SHA1_DIGEST_LENGTH); > + qid->data = strdup(path); > + if (qid->data == NULL) { > + err = got_error_from_errno("strdup"); > + goto done; > + } > + STAILQ_INSERT_TAIL(&ids, qid, entry); > + qid = NULL; > + > + do { > + const char *path; > + int idx, nentries, i; > + > + if (sigint_received) { > + err = got_error(GOT_ERR_CANCELLED); > + goto done; > + } > + > + qid = STAILQ_FIRST(&ids); > + STAILQ_REMOVE_HEAD(&ids, entry); > + path = qid->data; > + > + idx = got_packidx_get_object_idx(packidx, &qid->id); > + if (idx == -1) { > + err = got_privsep_send_enumerated_tree(totlen, ibuf, > + &qid->id, path, NULL, -1); > + break; > + } > + > + err = open_tree(&buf, &entries, &nentries, > + pack, packidx, idx, &qid->id, objcache); > + if (err) { > + if (err->code != GOT_ERR_NO_OBJ) > + goto done; > + } > + > + err = got_privsep_send_enumerated_tree(totlen, > + ibuf, &qid->id, path, entries, nentries); > + if (err) > + goto done; > + > + err = got_object_idset_add(idset, &qid->id, NULL); > + if (err) > + goto done; > + > + for (i = 0; i < nentries; i++) { > + struct got_object_qid *eqid = NULL; > + struct got_parsed_tree_entry *pte = &entries[i]; > + char *p; > + > + if (!S_ISDIR(pte->mode)) > + continue; > + > + err = got_object_qid_alloc_partial(&eqid); > + if (err) > + goto done; > + memcpy(eqid->id.sha1, pte->id, sizeof(eqid->id.sha1)); > + > + if (got_object_idset_contains(idset, &eqid->id)) { > + got_object_qid_free(eqid); > + continue; > + } > + > + if (asprintf(&p, "%s%s%s", path, > + got_path_is_root_dir(path) ? "" : "/", > + pte->name) == -1) { > + err = got_error_from_errno("asprintf"); > + got_object_qid_free(eqid); > + goto done; > + } > + eqid->data = p; > + STAILQ_INSERT_TAIL(&ids, eqid, entry); > + idx = got_packidx_get_object_idx(packidx, &eqid->id); > + if (idx == -1) > + break; > + } > + > + free(qid->data); > + got_object_qid_free(qid); > + qid = NULL; > + > + free(entries); > + entries = NULL; > + free(buf); > + buf = NULL; > + } while (!STAILQ_EMPTY(&ids)); > + > + err = send_tree_enumeration_done(ibuf); > +done: > + free(buf); > + if (qid) > + free(qid->data); > + got_object_qid_free(qid); > + got_object_id_queue_free(&ids); > + free(entries); > + if (err) { > + if (err->code == GOT_ERR_PRIVSEP_PIPE) > + err = NULL; > + else > + got_privsep_send_error(ibuf, err); > + } > + > + return err; > +} > + > +static const struct got_error * > +enumeration_request(struct imsg *imsg, struct imsgbuf *ibuf, > + struct got_pack *pack, struct got_packidx *packidx, > + struct got_object_cache *objcache) > +{ > + const struct got_error *err = NULL; > + struct got_object_id_queue commit_ids; > + const struct got_object_id_queue *parents = NULL; > + struct got_object_qid *qid = NULL; > + struct got_object *obj = NULL; > + struct got_commit_object *commit = NULL; > + struct got_object_id *tree_id = NULL; > + size_t totlen = 0; > + struct got_object_idset *idset; > + int idx; > + > + STAILQ_INIT(&commit_ids); > + > + idset = got_object_idset_alloc(); > + if (idset == NULL) > + return got_error_from_errno("got_object_idset_alloc"); > + > + err = recv_object_id_queue(&commit_ids, ibuf); > + if (err) > + goto done; > + > + while (!STAILQ_EMPTY(&commit_ids)) { > + if (sigint_received) { > + err = got_error(GOT_ERR_CANCELLED); > + goto done; > + } > + > + qid = STAILQ_FIRST(&commit_ids); > + STAILQ_REMOVE_HEAD(&commit_ids, entry); > + > + if (got_object_idset_contains(idset, &qid->id)) { > + got_object_qid_free(qid); > + qid = NULL; > + continue; > + } > + > + idx = got_packidx_get_object_idx(packidx, &qid->id); > + if (idx == -1) > + break; > + > + err = open_object(&obj, pack, packidx, idx, &qid->id, > + objcache); > + if (err) > + goto done; > + if (obj->type == GOT_OBJ_TYPE_TAG) { > + struct got_tag_object *tag; > + uint8_t *buf; > + size_t len; > + err = got_packfile_extract_object_to_mem(&buf, > + &len, obj, pack); > + if (err) > + goto done; > + obj->size = len; > + err = got_object_parse_tag(&tag, buf, len); > + if (err) { > + free(buf); > + goto done; > + } > + err = open_commit(&commit, pack, packidx, idx, > + &tag->id, objcache); > + got_object_tag_close(tag); > + free(buf); > + if (err) > + goto done; > + } else if (obj->type == GOT_OBJ_TYPE_COMMIT) { > + err = open_commit(&commit, pack, packidx, idx, > + &qid->id, objcache); > + if (err) > + goto done; > + } else { > + err = got_error(GOT_ERR_OBJ_TYPE); > + goto done; > + } > + got_object_close(obj); > + obj = NULL; > + > + err = got_privsep_send_enumerated_commit(ibuf, &qid->id, > + got_object_commit_get_committer_time(commit)); > + if (err) > + goto done; > + > + tree_id = got_object_commit_get_tree_id(commit); > + idx = got_packidx_get_object_idx(packidx, tree_id); > + if (idx == -1) { > + err = got_privsep_send_enumerated_tree(&totlen, ibuf, > + tree_id, "", NULL, -1); > + if (err) > + goto done; > + break; > + } > + > + if (got_object_idset_contains(idset, tree_id)) { > + got_object_qid_free(qid); > + qid = NULL; > + continue; > + } > + > + err = enumerate_tree(ibuf, &totlen, tree_id, "/", > + pack, packidx, objcache, idset); > + if (err) > + goto done; > + > + got_object_qid_free(qid); > + qid = NULL; > + > + parents = got_object_commit_get_parent_ids(commit); > + if (parents) { > + struct got_object_qid *pid; > + STAILQ_FOREACH(pid, parents, entry) { > + if (got_object_idset_contains(idset, &pid->id)) > + continue; > + err = got_object_qid_alloc_partial(&qid); > + if (err) > + goto done; > + memcpy(&qid->id, &pid->id, sizeof(qid->id)); > + STAILQ_INSERT_TAIL(&commit_ids, qid, entry); > + qid = NULL; > + } > + } > + > + got_object_commit_close(commit); > + commit = NULL; > + } > + > + err = got_privsep_send_object_enumeration_done(ibuf); > + if (err) > + goto done; > +done: > + if (obj) > + got_object_close(obj); > + if (commit) > + got_object_commit_close(commit); > + got_object_qid_free(qid); > + got_object_id_queue_free(&commit_ids); > + got_object_idset_free(idset); > + return err; > +} > + > +static const struct got_error * > receive_pack(struct got_pack **packp, struct imsgbuf *ibuf) > { > const struct got_error *err = NULL; > @@ -1344,6 +1644,10 @@ main(int argc, char *argv[]) > err = commit_traversal_request(&imsg, &ibuf, pack, > packidx, &objcache); > break; > + case GOT_IMSG_OBJECT_ENUMERATION_REQUEST: > + err = enumeration_request(&imsg, &ibuf, pack, > + packidx, &objcache); > + break; > default: > err = got_error(GOT_ERR_PRIVSEP_MSG); > break; diff d6a28ffe187127e3247254d7e242bb52d66eb26b /home/op/w/got blob - 709eec07d085ee013da4bb4ebb657e8f97fa16b4 file + lib/got_lib_object.h --- lib/got_lib_object.h +++ lib/got_lib_object.h @@ -133,3 +133,13 @@ const struct got_error *got_object_tree_entry_dup(stru const struct got_error *got_traverse_packed_commits( struct got_object_id_queue *, struct got_object_id *, const char *, struct got_repository *); + +typedef const struct got_error *(*got_object_enumerate_commit_cb)(void *, + time_t, struct got_object_id *, struct got_repository *); +typedef const struct got_error *(*got_object_enumerate_tree_cb)(void *, + struct got_tree_object *, time_t, struct got_object_id *, const char *, + struct got_repository *); + +const struct got_error *got_object_enumerate(got_object_enumerate_commit_cb, + got_object_enumerate_tree_cb, void *, struct got_object_id **, int, + struct got_packidx *, struct got_repository *); blob - e719a95bde6bbe971668bb22ea3a72e2990ad927 file + lib/got_lib_privsep.h --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -145,6 +145,11 @@ enum got_imsg_type { GOT_IMSG_COMMIT_TRAVERSAL_REQUEST, GOT_IMSG_TRAVERSED_COMMITS, GOT_IMSG_COMMIT_TRAVERSAL_DONE, + GOT_IMSG_OBJECT_ENUMERATION_REQUEST, + GOT_IMSG_ENUMERATED_COMMIT, + GOT_IMSG_ENUMERATED_TREE, + GOT_IMSG_TREE_ENUMERATION_DONE, + GOT_IMSG_OBJECT_ENUMERATION_DONE, /* Message sending file descriptor to a temporary file. */ GOT_IMSG_TMPFD, @@ -556,6 +561,22 @@ struct got_imsg_traversed_commits { /* Followed by ncommit IDs of SHA1_DIGEST_LENGTH each */ } __attribute__((__packed__)); +/* Structure for GOT_IMSG_ENUMERATED_COMMIT */ +struct got_imsg_enumerated_commit { + uint8_t id[SHA1_DIGEST_LENGTH]; + time_t mtime; +} __attribute__((__packed__)); + +/* Structure for GOT_IMSG_ENUMERATED_TREE */ +struct got_imsg_enumerated_tree { + uint8_t id[SHA1_DIGEST_LENGTH]; /* tree ID */ + int nentries; /* number of tree entries */ + + /* Followed by tree's path in remaining data of imsg buffer. */ + + /* Followed by nentries * GOT_IMSG_TREE_ENTRY messages. */ +} __attribute__((__packed__)); + /* * Structure for GOT_IMSG_GOTCONFIG_REMOTE and * GOT_IMSG_GOTCONFIG_REMOTE data. @@ -721,6 +742,18 @@ const struct got_error *got_privsep_send_commit_traver const struct got_error *got_privsep_recv_traversed_commits( struct got_commit_object **, struct got_object_id **, struct got_object_id_queue *, struct imsgbuf *); +const struct got_error *got_privsep_send_enumerated_tree(size_t *, + struct imsgbuf *, struct got_object_id *, const char *, + struct got_parsed_tree_entry *, int); +const struct got_error *got_privsep_send_object_enumeration_request( + struct imsgbuf *); +const struct got_error *got_privsep_send_object_enumeration_done( + struct imsgbuf *); +const struct got_error *got_privsep_send_enumerated_commit(struct imsgbuf *, + struct got_object_id *, time_t); +const struct got_error *got_privsep_recv_enumerated_objects(struct imsgbuf *, + got_object_enumerate_commit_cb, got_object_enumerate_tree_cb, void *, + struct got_repository *); const struct got_error *got_privsep_send_raw_delta_req(struct imsgbuf *, int, struct got_object_id *); blob - 2d612890612d7d8a8e30549c38659cd083a2e41e file + lib/object.c --- lib/object.c +++ lib/object.c @@ -60,6 +60,10 @@ #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) #endif +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + struct got_object_id * got_object_get_id(struct got_object *obj) { @@ -2390,3 +2394,70 @@ done: free(changed_commit_id); return err; } + +const struct got_error * +got_object_enumerate(got_object_enumerate_commit_cb cb_commit, + got_object_enumerate_tree_cb cb_tree, void *cb_arg, + struct got_object_id **commit_ids, int ncommits, + struct got_packidx *packidx, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_object_id *ids[GOT_IMSG_OBJ_ID_LIST_MAX_NIDS]; + struct got_pack *pack; + char *path_packfile = NULL; + int i, j = 0; + + err = got_packidx_get_packfile_path(&path_packfile, + packidx->path_packidx); + if (err) + return err; + + pack = got_repo_get_cached_pack(repo, path_packfile); + if (pack == NULL) { + err = got_repo_cache_pack(&pack, repo, path_packfile, packidx); + if (err) + goto done; + } + + if (pack->privsep_child == NULL) { + err = start_pack_privsep_child(pack, packidx); + if (err) + goto done; + } + + err = got_privsep_send_object_enumeration_request( + pack->privsep_child->ibuf); + if (err) + goto done; + + /* + * XXX This is stupid. Consider adding a function which + * does the chunking internally? + */ + for (i = 0; i < ncommits; i++) { + j = i % nitems(ids); + ids[j] = commit_ids[i]; + if (j >= nitems(ids) - 1) { + err = got_privsep_send_object_idlist( + pack->privsep_child->ibuf, ids, j + 1); + if (err) + goto done; + } + } + if (j > 0) { + err = got_privsep_send_object_idlist( + pack->privsep_child->ibuf, ids, j + 1); + if (err) + goto done; + } + + err = got_privsep_send_object_idlist_done(pack->privsep_child->ibuf); + if (err) + goto done; + + err = got_privsep_recv_enumerated_objects(pack->privsep_child->ibuf, + cb_commit, cb_tree, cb_arg, repo); +done: + free(path_packfile); + return err; +} blob - bb8a404c064277aaedb6f0a5bd168e7765442df7 file + lib/pack_create.c --- lib/pack_create.c +++ lib/pack_create.c @@ -903,21 +903,16 @@ add_object(int want_meta, struct got_object_idset *ids static const struct got_error * load_tree_entries(struct got_object_id_queue *ids, int want_meta, struct got_object_idset *idset, struct got_object_idset *idset_exclude, - struct got_object_id *tree_id, + struct got_tree_object *tree, const char *dpath, time_t mtime, uint32_t seed, struct got_repository *repo, int loose_obj_only, int *ncolored, int *nfound, int *ntrees, got_pack_progress_cb progress_cb, void *progress_arg, struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err; - struct got_tree_object *tree; char *p = NULL; int i; - err = got_object_open_as_tree(&tree, repo, tree_id); - if (err) - return err; - (*ntrees)++; err = report_progress(progress_cb, progress_arg, rl, *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0); @@ -939,8 +934,16 @@ load_tree_entries(struct got_object_id_queue *ids, int got_object_idset_contains(idset, id) || got_object_idset_contains(idset_exclude, id)) continue; - - if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "", + + /* + * If got-read-pack is crawling trees for us then + * we are only here to collect blob IDs. + */ + if (ids == NULL && S_ISDIR(mode)) + continue; + + if (asprintf(&p, "%s%s%s", dpath, + got_path_is_root_dir(dpath) ? "" : "/", got_tree_entry_get_name(e)) == -1) { err = got_error_from_errno("asprintf"); break; @@ -970,7 +973,6 @@ load_tree_entries(struct got_object_id_queue *ids, int } } - got_object_tree_close(tree); free(p); return err; } @@ -987,6 +989,7 @@ load_tree(int want_meta, struct got_object_idset *idse const struct got_error *err = NULL; struct got_object_id_queue tree_ids; struct got_object_qid *qid; + struct got_tree_object *tree = NULL; if (got_object_idset_contains(idset, tree_id) || got_object_idset_contains(idset_exclude, tree_id)) @@ -1034,20 +1037,31 @@ load_tree(int want_meta, struct got_object_idset *idse break; } + err = got_object_open_as_tree(&tree, repo, &qid->id); + if (err) { + free(qid->data); + got_object_qid_free(qid); + break; + } + err = load_tree_entries(&tree_ids, want_meta, idset, - idset_exclude, &qid->id, - path, mtime, seed, repo, loose_obj_only, ncolored, nfound, - ntrees, progress_cb, progress_arg, rl, - cancel_cb, cancel_arg); + idset_exclude, tree, path, mtime, seed, repo, + loose_obj_only, ncolored, nfound, ntrees, progress_cb, + progress_arg, rl, cancel_cb, cancel_arg); free(qid->data); got_object_qid_free(qid); if (err) break; + + got_object_tree_close(tree); + tree = NULL; } STAILQ_FOREACH(qid, &tree_ids, entry) free(qid->data); got_object_id_queue_free(&tree_ids); + if (tree) + got_object_tree_close(tree); return err; } @@ -1451,7 +1465,197 @@ done: return err; } +struct load_packed_obj_arg { + /* output parameters: */ + struct got_object_id *id; + char *dpath; + time_t mtime; + + /* input parameters: */ + int want_meta; + struct got_object_idset *idset; + struct got_object_idset *idset_exclude; + uint32_t seed; + int loose_obj_only; + int *ncolored; + int *nfound; + int *ntrees; + got_pack_progress_cb progress_cb; + void *progress_arg; + struct got_ratelimit *rl; + got_cancel_cb cancel_cb; + void *cancel_arg; +}; + static const struct got_error * +load_packed_commit_id(void *arg, time_t mtime, struct got_object_id *id, + struct got_repository *repo) +{ + struct load_packed_obj_arg *a = arg; + + if (got_object_idset_contains(a->idset, id) || + got_object_idset_contains(a->idset_exclude, id)) + return NULL; + + return add_object(a->want_meta, + a->want_meta ? a->idset : a->idset_exclude, + id, "", GOT_OBJ_TYPE_COMMIT, mtime, a->seed, a->loose_obj_only, + repo, a->ncolored, a->nfound, a->ntrees, + a->progress_cb, a->progress_arg, a->rl); +} + +static const struct got_error * +load_packed_tree_ids(void *arg, struct got_tree_object *tree, time_t mtime, + struct got_object_id *id, const char *dpath, struct got_repository *repo) +{ + const struct got_error *err; + struct load_packed_obj_arg *a = arg; + const char *relpath; + + /* + * When we receive a tree's ID and path but not the tree itself, + * this tree object was not found in the pack file. This is the + * last time we are being called for this optimized traversal. + * Return from here and switch to loading objects the slow way. + */ + if (tree == NULL) { + free(a->id); + a->id = got_object_id_dup(id); + if (a->id == NULL) + return got_error_from_errno("got_object_id_dup"); + + free(a->dpath); + a->dpath = strdup(dpath); + if (a->dpath == NULL) + return got_error_from_errno("strdup"); + + a->mtime = mtime; + return NULL; + } + + if (got_object_idset_contains(a->idset, id) || + got_object_idset_contains(a->idset_exclude, id)) + return NULL; + + relpath = dpath; + while (relpath[0] == '/') + relpath++; + + err = add_object(a->want_meta, + a->want_meta ? a->idset : a->idset_exclude, + id, relpath, GOT_OBJ_TYPE_TREE, mtime, a->seed, a->loose_obj_only, + repo, a->ncolored, a->nfound, a->ntrees, + a->progress_cb, a->progress_arg, a->rl); + if (err) + return err; + + return load_tree_entries(NULL, a->want_meta, a->idset, + a->idset_exclude, tree, dpath, mtime, a->seed, repo, + a->loose_obj_only, a->ncolored, a->nfound, a->ntrees, + a->progress_cb, a->progress_arg, a->rl, + a->cancel_cb, a->cancel_arg); +} + +static const struct got_error * +load_packed_object_ids(struct got_object_id **commits, int ncommits, + int want_meta, struct got_object_idset *idset, + struct got_object_idset *idset_exclude, int loose_obj_only, + uint32_t seed, struct got_repository *repo, struct got_packidx *packidx, + int *ncolored, int *nfound, int *ntrees, + got_pack_progress_cb progress_cb, void *progress_arg, + struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err = NULL; + struct load_packed_obj_arg lpa; + + memset(&lpa, 0, sizeof(lpa)); + lpa.want_meta = want_meta; + lpa.idset = idset; + lpa.idset_exclude = idset_exclude; + lpa.seed = seed; + lpa.loose_obj_only = loose_obj_only; + lpa.ncolored = ncolored; + lpa.nfound = nfound; + lpa.ntrees = ntrees; + lpa.progress_cb = progress_cb; + lpa.progress_arg = progress_arg; + lpa.rl = rl; + lpa.cancel_cb = cancel_cb; + lpa.cancel_arg = cancel_arg; + + /* Attempt to load objects via got-read-pack, as far as possible. */ + err = got_object_enumerate(load_packed_commit_id, + load_packed_tree_ids, &lpa, commits, ncommits, packidx, repo); + if (err) + return err; + + if (lpa.id == NULL) + return NULL; + + /* + * An incomplete tree hierarchy was present in the pack file + * and caused loading to be aborted midway through a commit. + * Continue loading trees the slow way. + */ + err = load_tree(want_meta, idset, idset_exclude, + lpa.id, lpa.dpath, lpa.mtime, seed, repo, loose_obj_only, + ncolored, nfound, ntrees, progress_cb, progress_arg, rl, + cancel_cb, cancel_arg); + free(lpa.id); + free(lpa.dpath); + return err; +} + +static const struct got_error * +find_pack_for_enumeration(struct got_packidx **best_packidx, + struct got_object_id **ids, int nids, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_pathlist_entry *pe; + const char *best_packidx_path = NULL; + int nobj_max = 0; + int ncommits_max = 0; + + *best_packidx = NULL; + + /* + * Find the largest pack which contains at least some of the + * commits and tags we are interested in. + */ + TAILQ_FOREACH(pe, &repo->packidx_paths, entry) { + const char *path_packidx = pe->path; + struct got_packidx *packidx; + int nobj, i, idx, ncommits = 0; + + err = got_repo_get_packidx(&packidx, path_packidx, repo); + if (err) + break; + + nobj = be32toh(packidx->hdr.fanout_table[0xff]); + if (nobj <= nobj_max) + continue; + + for (i = 0; i < nids; i++) { + idx = got_packidx_get_object_idx(packidx, ids[i]); + if (idx != -1) + ncommits++; + } + if (ncommits > ncommits_max) { + best_packidx_path = path_packidx; + nobj_max = nobj; + ncommits_max = ncommits; + } + } + + if (best_packidx_path) { + err = got_repo_get_packidx(best_packidx, best_packidx_path, + repo); + } + + return err; +} + +static const struct got_error * load_object_ids(int *ncolored, int *nfound, int *ntrees, struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs, struct got_object_id **ours, int nours, struct got_repository *repo, @@ -1461,6 +1665,7 @@ load_object_ids(int *ncolored, int *nfound, int *ntree { const struct got_error *err = NULL; struct got_object_id **ids = NULL; + struct got_packidx *packidx = NULL; int i, nobj = 0, obj_type; struct got_object_idset *idset_exclude; @@ -1477,6 +1682,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree if (err) goto done; + err = find_pack_for_enumeration(&packidx, theirs, ntheirs, repo); + if (err) + goto done; + if (packidx) { + err = load_packed_object_ids(theirs, ntheirs, 0, + idset, idset_exclude, loose_obj_only, seed, repo, packidx, + ncolored, nfound, ntrees, progress_cb, progress_arg, rl, + cancel_cb, cancel_arg); + if (err) + goto done; + } + for (i = 0; i < ntheirs; i++) { struct got_object_id *id = theirs[i]; if (id == NULL) @@ -1501,6 +1718,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree } } + err = find_pack_for_enumeration(&packidx, ids, nobj, repo); + if (err) + goto done; + if (packidx) { + err = load_packed_object_ids(ids, nobj, 1, + idset, idset_exclude, loose_obj_only, seed, repo, packidx, + ncolored, nfound, ntrees, + progress_cb, progress_arg, rl, cancel_cb, cancel_arg); + if (err) + goto done; + } + for (i = 0; i < nobj; i++) { err = load_commit(1, idset, idset_exclude, ids[i], repo, seed, loose_obj_only, ncolored, nfound, ntrees, blob - 782f94ad26528e54d59fa5855bd1a0a4f674588b file + lib/privsep.c --- lib/privsep.c +++ lib/privsep.c @@ -1443,8 +1443,8 @@ got_privsep_recv_commit(struct got_commit_object **com } static const struct got_error * -send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries, - int idx0, int idxN, size_t len) +send_tree_entries_batch(struct imsgbuf *ibuf, + struct got_parsed_tree_entry *entries, int idx0, int idxN, size_t len) { struct ibuf *wbuf; struct got_imsg_tree_entries ientries; @@ -1479,21 +1479,14 @@ send_tree_entries(struct imsgbuf *ibuf, struct got_par return NULL; } -const struct got_error * -got_privsep_send_tree(struct imsgbuf *ibuf, - struct got_parsed_tree_entry *entries, int nentries) +static const struct got_error * +send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries, + int nentries) { const struct got_error *err = NULL; - struct got_imsg_tree_object itree; - size_t entries_len; int i, j; + size_t entries_len = sizeof(struct got_imsg_tree_entries); - itree.nentries = nentries; - if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree)) - == -1) - return got_error_from_errno("imsg_compose TREE"); - - entries_len = sizeof(struct got_imsg_tree_entries); i = 0; for (j = 0; j < nentries; j++) { struct got_parsed_tree_entry *pte = &entries[j]; @@ -1501,7 +1494,7 @@ got_privsep_send_tree(struct imsgbuf *ibuf, if (j > 0 && entries_len + len > MAX_IMSGSIZE - IMSG_HEADER_SIZE) { - err = send_tree_entries(ibuf, entries, + err = send_tree_entries_batch(ibuf, entries, i, j - 1, entries_len); if (err) return err; @@ -1513,14 +1506,98 @@ got_privsep_send_tree(struct imsgbuf *ibuf, } if (j > 0) { - err = send_tree_entries(ibuf, entries, i, j - 1, entries_len); + err = send_tree_entries_batch(ibuf, entries, i, j - 1, + entries_len); if (err) return err; } + return NULL; +} + +const struct got_error * +got_privsep_send_tree(struct imsgbuf *ibuf, + struct got_parsed_tree_entry *entries, int nentries) +{ + const struct got_error *err = NULL; + struct got_imsg_tree_object itree; + + itree.nentries = nentries; + if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree)) + == -1) + return got_error_from_errno("imsg_compose TREE"); + + err = send_tree_entries(ibuf, entries, nentries); + if (err) + return err; + return flush_imsg(ibuf); } + +static const struct got_error * +recv_tree_entries(void *data, size_t datalen, struct got_tree_object *tree, + int *nentries) +{ + const struct got_error *err = NULL; + struct got_imsg_tree_entries *ientries; + struct got_tree_entry *te; + size_t te_offset; + size_t i; + + if (datalen <= sizeof(*ientries) || + datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) + return got_error(GOT_ERR_PRIVSEP_LEN); + + ientries = (struct got_imsg_tree_entries *)data; + if (ientries->nentries > INT_MAX) { + return got_error_msg(GOT_ERR_NO_SPACE, + "too many tree entries"); + } + + te_offset = sizeof(*ientries); + for (i = 0; i < ientries->nentries; i++) { + struct got_imsg_tree_entry ite; + const char *te_name; + uint8_t *buf = (uint8_t *)data + te_offset; + + if (te_offset >= datalen) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + + /* Might not be aligned, size is ~32 bytes. */ + memcpy(&ite, buf, sizeof(ite)); + + if (ite.namelen >= sizeof(te->name)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + if (te_offset + sizeof(ite) + ite.namelen > + datalen) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + + if (*nentries >= tree->nentries) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + te = &tree->entries[*nentries]; + te_name = buf + sizeof(ite); + memcpy(te->name, te_name, ite.namelen); + te->name[ite.namelen] = '\0'; + memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH); + te->mode = ite.mode; + te->idx = *nentries; + (*nentries)++; + + te_offset += sizeof(ite) + ite.namelen; + } + + return err; +} + const struct got_error * got_privsep_recv_tree(struct got_tree_object **tree, struct imsgbuf *ibuf) { @@ -1529,7 +1606,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s MIN(sizeof(struct got_imsg_error), sizeof(struct got_imsg_tree_object)); struct got_imsg_tree_object *itree; - size_t i; int nentries = 0; *tree = NULL; @@ -1542,9 +1618,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s struct imsg imsg; size_t n; size_t datalen; - struct got_imsg_tree_entries *ientries; - struct got_tree_entry *te = NULL; - size_t te_offset; n = imsg_get(ibuf, &imsg); if (n == 0) { @@ -1611,56 +1684,8 @@ got_privsep_recv_tree(struct got_tree_object **tree, s err = got_error(GOT_ERR_PRIVSEP_MSG); break; } - if (datalen <= sizeof(*ientries) || - datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - - ientries = imsg.data; - if (ientries->nentries > INT_MAX) { - err = got_error_msg(GOT_ERR_NO_SPACE, - "too many tree entries"); - break; - } - te_offset = sizeof(*ientries); - for (i = 0; i < ientries->nentries; i++) { - struct got_imsg_tree_entry ite; - const char *te_name; - uint8_t *buf = imsg.data + te_offset; - - if (te_offset >= datalen) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - - /* Might not be aligned, size is ~32 bytes. */ - memcpy(&ite, buf, sizeof(ite)); - - if (ite.namelen >= sizeof(te->name)) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - if (te_offset + sizeof(ite) + ite.namelen > - datalen) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - if (nentries >= (*tree)->nentries) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - te = &(*tree)->entries[nentries]; - te_name = buf + sizeof(ite); - memcpy(te->name, te_name, ite.namelen); - te->name[ite.namelen] = '\0'; - memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH); - te->mode = ite.mode; - te->idx = nentries; - nentries++; - - te_offset += sizeof(ite) + ite.namelen; - } + err = recv_tree_entries(imsg.data, datalen, + *tree, &nentries); break; default: err = got_error(GOT_ERR_PRIVSEP_MSG); @@ -2731,6 +2756,278 @@ got_privsep_recv_traversed_commits(struct got_commit_o } const struct got_error * +got_privsep_send_enumerated_tree(size_t *totlen, struct imsgbuf *ibuf, + struct got_object_id *tree_id, const char *path, + struct got_parsed_tree_entry *entries, int nentries) +{ + const struct got_error *err = NULL; + struct ibuf *wbuf; + size_t path_len = strlen(path); + size_t msglen; + + msglen = sizeof(struct got_imsg_enumerated_tree) + path_len; + wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_TREE, 0, 0, msglen); + if (wbuf == NULL) + return got_error_from_errno("imsg_create ENUMERATED_TREE"); + + if (imsg_add(wbuf, tree_id->sha1, SHA1_DIGEST_LENGTH) == -1) { + err = got_error_from_errno("imsg_add ENUMERATED_TREE"); + ibuf_free(wbuf); + return err; + } + if (imsg_add(wbuf, &nentries, sizeof(nentries)) == -1) { + err = got_error_from_errno("imsg_add ENUMERATED_TREE"); + ibuf_free(wbuf); + return err; + } + if (imsg_add(wbuf, path, path_len) == -1) { + err = got_error_from_errno("imsg_add ENUMERATED_TREE"); + ibuf_free(wbuf); + return err; + } + + wbuf->fd = -1; + imsg_close(ibuf, wbuf); + + if (entries) { + err = send_tree_entries(ibuf, entries, nentries); + if (err) + return err; + } + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_object_enumeration_request(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_REQUEST, + 0, 0, -1, NULL, 0) == -1) + return got_error_from_errno("imsg_compose " + "OBJECT_ENUMERATION_REQUEST"); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_object_enumeration_done(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_DONE, + 0, 0, -1, NULL, 0) == -1) + return got_error_from_errno("imsg_compose " + "OBJECT_ENUMERATION_DONE"); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_enumerated_commit(struct imsgbuf *ibuf, + struct got_object_id *id, time_t mtime) +{ + struct ibuf *wbuf; + + wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_COMMIT, 0, 0, + sizeof(struct got_imsg_enumerated_commit) + SHA1_DIGEST_LENGTH); + if (wbuf == NULL) + return got_error_from_errno("imsg_create ENUMERATED_COMMIT"); + + /* Keep in sync with struct got_imsg_enumerated_commit! */ + if (imsg_add(wbuf, id, SHA1_DIGEST_LENGTH) == -1) + return got_error_from_errno("imsg_add ENUMERATED_COMMIT"); + if (imsg_add(wbuf, &mtime, sizeof(mtime)) == -1) + return got_error_from_errno("imsg_add ENUMERATED_COMMIT"); + + wbuf->fd = -1; + imsg_close(ibuf, wbuf); + /* Don't flush yet, tree entries or ENUMERATION_DONE will follow. */ + return NULL; +} + +const struct got_error * +got_privsep_recv_enumerated_objects(struct imsgbuf *ibuf, + got_object_enumerate_commit_cb cb_commit, + got_object_enumerate_tree_cb cb_tree, void *cb_arg, + struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct imsg imsg; + struct got_imsg_enumerated_commit *icommit = NULL; + struct got_object_id commit_id; + int have_commit = 0; + time_t mtime = 0; + struct got_tree_object tree; + struct got_imsg_enumerated_tree *itree; + struct got_object_id tree_id; + char *path = NULL, *canon_path = NULL; + size_t datalen, path_len; + int nentries = -1; + int done = 0; + + memset(&tree, 0, sizeof(tree)); + + while (!done) { + err = got_privsep_recv_imsg(&imsg, ibuf, 0); + if (err) + break; + + datalen = imsg.hdr.len - IMSG_HEADER_SIZE; + switch (imsg.hdr.type) { + case GOT_IMSG_ENUMERATED_COMMIT: + if (have_commit && nentries != -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (datalen != sizeof(*icommit)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + icommit = (struct got_imsg_enumerated_commit *)imsg.data; + memcpy(commit_id.sha1, icommit->id, SHA1_DIGEST_LENGTH); + mtime = icommit->mtime; + err = cb_commit(cb_arg, mtime, &commit_id, repo); + if (err) + break; + have_commit = 1; + break; + case GOT_IMSG_ENUMERATED_TREE: + /* Should be preceeded by GOT_IMSG_ENUMERATED_COMMIT. */ + if (!have_commit) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (datalen < sizeof(*itree)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + itree = imsg.data; + path_len = datalen - sizeof(*itree); + if (path_len == 0) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + memcpy(tree_id.sha1, itree->id, sizeof(tree_id.sha1)); + free(path); + path = malloc(path_len + 1); + if (path == NULL) { + err = got_error_from_errno("malloc"); + break; + } + free(canon_path); + canon_path = malloc(path_len + 1); + if (canon_path == NULL) { + err = got_error_from_errno("malloc"); + break; + } + memcpy(path, (uint8_t *)imsg.data + sizeof(*itree), + path_len); + path[path_len] = '\0'; + if (!got_path_is_absolute(path)) { + err = got_error(GOT_ERR_BAD_PATH); + break; + } + if (got_path_is_root_dir(path)) { + /* XXX check what got_canonpath() does wrong */ + canon_path[0] = '/'; + canon_path[1] = '\0'; + } else { + err = got_canonpath(path, canon_path, + path_len + 1); + if (err) + break; + } + if (strcmp(path, canon_path) != 0) { + err = got_error(GOT_ERR_BAD_PATH); + break; + } + if (nentries != -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (itree->nentries < -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (itree->nentries == -1) { + /* Tree was not found in pack file. */ + err = cb_tree(cb_arg, NULL, mtime, &tree_id, + path, repo); + break; + } + if (itree->nentries > INT_MAX) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + tree.entries = calloc(itree->nentries, + sizeof(struct got_tree_entry)); + if (tree.entries == NULL) { + err = got_error_from_errno("calloc"); + break; + } + if (itree->nentries == 0) { + err = cb_tree(cb_arg, &tree, mtime, &tree_id, + path, repo); + if (err) + break; + + /* Prepare for next tree. */ + free(tree.entries); + memset(&tree, 0, sizeof(tree)); + nentries = -1; + } else { + tree.nentries = itree->nentries; + nentries = 0; + } + break; + case GOT_IMSG_TREE_ENTRIES: + /* Should be preceeded by GOT_IMSG_ENUMERATED_TREE. */ + if (nentries <= -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + err = recv_tree_entries(imsg.data, datalen, + &tree, &nentries); + if (err) + break; + if (tree.nentries == nentries) { + err = cb_tree(cb_arg, &tree, mtime, &tree_id, + path, repo); + if (err) + break; + + /* Prepare for next tree. */ + free(tree.entries); + memset(&tree, 0, sizeof(tree)); + nentries = -1; + } + break; + case GOT_IMSG_TREE_ENUMERATION_DONE: + /* All trees have been found and traversed. */ + if (path == NULL || nentries != -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + have_commit = 0; + break; + case GOT_IMSG_OBJECT_ENUMERATION_DONE: + done = 1; + break; + default: + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + + imsg_free(&imsg); + if (err) + break; + } + + free(path); + free(canon_path); + free(tree.entries); + return err; +} + +const struct got_error * got_privsep_send_raw_delta_req(struct imsgbuf *ibuf, int idx, struct got_object_id *id) { blob - ea9a7e564cb840af0d6a61c8c8e0cf5ed3d93147 file + libexec/got-read-pack/got-read-pack.c --- libexec/got-read-pack/got-read-pack.c +++ libexec/got-read-pack/got-read-pack.c @@ -14,6 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include <sys/stat.h> #include <sys/types.h> #include <sys/queue.h> #include <sys/uio.h> @@ -587,7 +588,6 @@ send_commit_traversal_done(struct imsgbuf *ibuf) return got_privsep_flush_imsg(ibuf); } - static const struct got_error * commit_traversal_request(struct imsg *imsg, struct imsgbuf *ibuf, struct got_pack *pack, struct got_packidx *packidx, @@ -1022,6 +1022,31 @@ recv_object_ids(struct got_object_idset *idset, struct } static const struct got_error * +recv_object_id_queue(struct got_object_id_queue *queue, struct imsgbuf *ibuf) +{ + const struct got_error *err = NULL; + int done = 0; + struct got_object_qid *qid; + struct got_object_id *ids; + size_t nids, i; + + for (;;) { + err = got_privsep_recv_object_idlist(&done, &ids, &nids, ibuf); + if (err || done) + break; + for (i = 0; i < nids; i++) { + err = got_object_qid_alloc_partial(&qid); + if (err) + return err; + memcpy(&qid->id, &ids[i], sizeof(qid->id)); + STAILQ_INSERT_TAIL(queue, qid, entry); + } + } + + return err; +} + +static const struct got_error * delta_reuse_request(struct imsg *imsg, struct imsgbuf *ibuf, FILE *delta_outfile, struct got_pack *pack, struct got_packidx *packidx) { @@ -1132,6 +1157,281 @@ done: } static const struct got_error * +send_tree_enumeration_done(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_TREE_ENUMERATION_DONE, 0, 0, -1, + NULL, 0) == -1) + return got_error_from_errno("imsg_compose TREE_ENUMERATION_DONE"); + + return got_privsep_flush_imsg(ibuf); +} + +static const struct got_error * +enumerate_tree(struct imsgbuf *ibuf, size_t *totlen, + struct got_object_id *tree_id, + const char *path, struct got_pack *pack, struct got_packidx *packidx, + struct got_object_cache *objcache, struct got_object_idset *idset) +{ + const struct got_error *err = NULL; + struct got_object_id_queue ids; + struct got_object_qid *qid; + uint8_t *buf = NULL; + struct got_parsed_tree_entry *entries = NULL; + + STAILQ_INIT(&ids); + + err = got_object_qid_alloc_partial(&qid); + if (err) + return err; + memcpy(&qid->id.sha1, tree_id, SHA1_DIGEST_LENGTH); + qid->data = strdup(path); + if (qid->data == NULL) { + err = got_error_from_errno("strdup"); + goto done; + } + STAILQ_INSERT_TAIL(&ids, qid, entry); + qid = NULL; + + do { + const char *path; + int idx, nentries, i; + + if (sigint_received) { + err = got_error(GOT_ERR_CANCELLED); + goto done; + } + + qid = STAILQ_FIRST(&ids); + STAILQ_REMOVE_HEAD(&ids, entry); + path = qid->data; + + idx = got_packidx_get_object_idx(packidx, &qid->id); + if (idx == -1) { + err = got_privsep_send_enumerated_tree(totlen, ibuf, + &qid->id, path, NULL, -1); + break; + } + + err = open_tree(&buf, &entries, &nentries, + pack, packidx, idx, &qid->id, objcache); + if (err) { + if (err->code != GOT_ERR_NO_OBJ) + goto done; + } + + err = got_privsep_send_enumerated_tree(totlen, + ibuf, &qid->id, path, entries, nentries); + if (err) + goto done; + + err = got_object_idset_add(idset, &qid->id, NULL); + if (err) + goto done; + + for (i = 0; i < nentries; i++) { + struct got_object_qid *eqid = NULL; + struct got_parsed_tree_entry *pte = &entries[i]; + char *p; + + if (!S_ISDIR(pte->mode)) + continue; + + err = got_object_qid_alloc_partial(&eqid); + if (err) + goto done; + memcpy(eqid->id.sha1, pte->id, sizeof(eqid->id.sha1)); + + if (got_object_idset_contains(idset, &eqid->id)) { + got_object_qid_free(eqid); + continue; + } + + if (asprintf(&p, "%s%s%s", path, + got_path_is_root_dir(path) ? "" : "/", + pte->name) == -1) { + err = got_error_from_errno("asprintf"); + got_object_qid_free(eqid); + goto done; + } + eqid->data = p; + STAILQ_INSERT_TAIL(&ids, eqid, entry); + idx = got_packidx_get_object_idx(packidx, &eqid->id); + if (idx == -1) + break; + } + + free(qid->data); + got_object_qid_free(qid); + qid = NULL; + + free(entries); + entries = NULL; + free(buf); + buf = NULL; + } while (!STAILQ_EMPTY(&ids)); + + err = send_tree_enumeration_done(ibuf); +done: + free(buf); + if (qid) + free(qid->data); + got_object_qid_free(qid); + got_object_id_queue_free(&ids); + free(entries); + if (err) { + if (err->code == GOT_ERR_PRIVSEP_PIPE) + err = NULL; + else + got_privsep_send_error(ibuf, err); + } + + return err; +} + +static const struct got_error * +enumeration_request(struct imsg *imsg, struct imsgbuf *ibuf, + struct got_pack *pack, struct got_packidx *packidx, + struct got_object_cache *objcache) +{ + const struct got_error *err = NULL; + struct got_object_id_queue commit_ids; + const struct got_object_id_queue *parents = NULL; + struct got_object_qid *qid = NULL; + struct got_object *obj = NULL; + struct got_commit_object *commit = NULL; + struct got_object_id *tree_id = NULL; + size_t totlen = 0; + struct got_object_idset *idset; + int idx; + + STAILQ_INIT(&commit_ids); + + idset = got_object_idset_alloc(); + if (idset == NULL) + return got_error_from_errno("got_object_idset_alloc"); + + err = recv_object_id_queue(&commit_ids, ibuf); + if (err) + goto done; + + while (!STAILQ_EMPTY(&commit_ids)) { + if (sigint_received) { + err = got_error(GOT_ERR_CANCELLED); + goto done; + } + + qid = STAILQ_FIRST(&commit_ids); + STAILQ_REMOVE_HEAD(&commit_ids, entry); + + if (got_object_idset_contains(idset, &qid->id)) { + got_object_qid_free(qid); + qid = NULL; + continue; + } + + idx = got_packidx_get_object_idx(packidx, &qid->id); + if (idx == -1) + break; + + err = open_object(&obj, pack, packidx, idx, &qid->id, + objcache); + if (err) + goto done; + if (obj->type == GOT_OBJ_TYPE_TAG) { + struct got_tag_object *tag; + uint8_t *buf; + size_t len; + err = got_packfile_extract_object_to_mem(&buf, + &len, obj, pack); + if (err) + goto done; + obj->size = len; + err = got_object_parse_tag(&tag, buf, len); + if (err) { + free(buf); + goto done; + } + err = open_commit(&commit, pack, packidx, idx, + &tag->id, objcache); + got_object_tag_close(tag); + free(buf); + if (err) + goto done; + } else if (obj->type == GOT_OBJ_TYPE_COMMIT) { + err = open_commit(&commit, pack, packidx, idx, + &qid->id, objcache); + if (err) + goto done; + } else { + err = got_error(GOT_ERR_OBJ_TYPE); + goto done; + } + got_object_close(obj); + obj = NULL; + + err = got_privsep_send_enumerated_commit(ibuf, &qid->id, + got_object_commit_get_committer_time(commit)); + if (err) + goto done; + + tree_id = got_object_commit_get_tree_id(commit); + idx = got_packidx_get_object_idx(packidx, tree_id); + if (idx == -1) { + err = got_privsep_send_enumerated_tree(&totlen, ibuf, + tree_id, "", NULL, -1); + if (err) + goto done; + break; + } + + if (got_object_idset_contains(idset, tree_id)) { + got_object_qid_free(qid); + qid = NULL; + continue; + } + + err = enumerate_tree(ibuf, &totlen, tree_id, "/", + pack, packidx, objcache, idset); + if (err) + goto done; + + got_object_qid_free(qid); + qid = NULL; + + parents = got_object_commit_get_parent_ids(commit); + if (parents) { + struct got_object_qid *pid; + STAILQ_FOREACH(pid, parents, entry) { + if (got_object_idset_contains(idset, &pid->id)) + continue; + err = got_object_qid_alloc_partial(&qid); + if (err) + goto done; + memcpy(&qid->id, &pid->id, sizeof(qid->id)); + STAILQ_INSERT_TAIL(&commit_ids, qid, entry); + qid = NULL; + } + } + + got_object_commit_close(commit); + commit = NULL; + } + + err = got_privsep_send_object_enumeration_done(ibuf); + if (err) + goto done; +done: + if (obj) + got_object_close(obj); + if (commit) + got_object_commit_close(commit); + got_object_qid_free(qid); + got_object_id_queue_free(&commit_ids); + got_object_idset_free(idset); + return err; +} + +static const struct got_error * receive_pack(struct got_pack **packp, struct imsgbuf *ibuf) { const struct got_error *err = NULL; @@ -1344,6 +1644,10 @@ main(int argc, char *argv[]) err = commit_traversal_request(&imsg, &ibuf, pack, packidx, &objcache); break; + case GOT_IMSG_OBJECT_ENUMERATION_REQUEST: + err = enumeration_request(&imsg, &ibuf, pack, + packidx, &objcache); + break; default: err = got_error(GOT_ERR_PRIVSEP_MSG); break;
object enumeration in got-read-pack