From: Stefan Sperling Subject: Re: object enumeration in got-read-pack To: Omar Polo , gameoftrees@openbsd.org Date: Mon, 13 Jun 2022 14:28:54 +0200 On Tue, Jun 07, 2022 at 09:34:07PM +0200, Stefan Sperling wrote: > On Tue, Jun 07, 2022 at 12:34:15PM +0200, Stefan Sperling wrote: > > On Tue, May 24, 2022 at 10:06:28PM +0200, Stefan Sperling wrote: > > > This patch still has some design-level problems and I don't consider it > > > done yet. The current design works well for the case where we want to pack > > > every object that is referenced, but not so well when we want to pack only > > > a subset of objects; got-read-pack could end up doing a lot of unnecessary > > > work in that case. It only knows commits to start enumerating from, but has > > > no boundary commits that would tell it where to stop enumerating; it just > > > keeps going until it runs out of parent commits to traverse. > > > I still need to think about this some more. > > > > Updated version, where we can send got-read-pack a list of boundary commits. > > Enumeration in the got-read-pack process will stop as soon as those commits > > have been reached. > > This was committed but then backed out again in commit e44d9391 > because the logic I implemented here has a flaw. See that commit > message for details if you are interested. Next attempt. This fixes the bug op@ found and passes in the test setup provided to me by op@. It is still unclear how to reproduce this problem in the test suite. It should be possible, but requires some careful engineering of pack files, such that a tree hierarchy to be sent is only partially packed. ok? diff b78a74059a30e2c5f5a2884c3eec4c0305779157 84343c6e65af3bab2ad8041e9dcd8e52cc1dbff9 blob - 709eec07d085ee013da4bb4ebb657e8f97fa16b4 blob + 1f4430dcde3e6550f20a42cee4f1955e7ea675ba --- lib/got_lib_object.h +++ lib/got_lib_object.h @@ -133,3 +133,14 @@ const struct got_error *got_object_tree_entry_dup(stru const struct got_error *got_traverse_packed_commits( struct got_object_id_queue *, struct got_object_id *, const char *, struct got_repository *); + +typedef const struct got_error *(*got_object_enumerate_commit_cb)(void *, + time_t, struct got_object_id *, struct got_repository *); +typedef const struct got_error *(*got_object_enumerate_tree_cb)(void *, + struct got_tree_object *, time_t, struct got_object_id *, const char *, + struct got_repository *); + +const struct got_error *got_object_enumerate(got_object_enumerate_commit_cb, + got_object_enumerate_tree_cb, void *, struct got_object_id **, int, + struct got_object_id **, int, struct got_packidx *, + struct got_repository *); blob - e719a95bde6bbe971668bb22ea3a72e2990ad927 blob + c6c29a099c6a9b6dc6883641ef3ef96ce7f47156 --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -145,6 +145,11 @@ enum got_imsg_type { GOT_IMSG_COMMIT_TRAVERSAL_REQUEST, GOT_IMSG_TRAVERSED_COMMITS, GOT_IMSG_COMMIT_TRAVERSAL_DONE, + GOT_IMSG_OBJECT_ENUMERATION_REQUEST, + GOT_IMSG_ENUMERATED_COMMIT, + GOT_IMSG_ENUMERATED_TREE, + GOT_IMSG_TREE_ENUMERATION_DONE, + GOT_IMSG_OBJECT_ENUMERATION_DONE, /* Message sending file descriptor to a temporary file. */ GOT_IMSG_TMPFD, @@ -556,6 +561,22 @@ struct got_imsg_traversed_commits { /* Followed by ncommit IDs of SHA1_DIGEST_LENGTH each */ } __attribute__((__packed__)); +/* Structure for GOT_IMSG_ENUMERATED_COMMIT */ +struct got_imsg_enumerated_commit { + uint8_t id[SHA1_DIGEST_LENGTH]; + time_t mtime; +} __attribute__((__packed__)); + +/* Structure for GOT_IMSG_ENUMERATED_TREE */ +struct got_imsg_enumerated_tree { + uint8_t id[SHA1_DIGEST_LENGTH]; /* tree ID */ + int nentries; /* number of tree entries */ + + /* Followed by tree's path in remaining data of imsg buffer. */ + + /* Followed by nentries * GOT_IMSG_TREE_ENTRY messages. */ +} __attribute__((__packed__)); + /* * Structure for GOT_IMSG_GOTCONFIG_REMOTE and * GOT_IMSG_GOTCONFIG_REMOTE data. @@ -721,6 +742,18 @@ const struct got_error *got_privsep_send_commit_traver const struct got_error *got_privsep_recv_traversed_commits( struct got_commit_object **, struct got_object_id **, struct got_object_id_queue *, struct imsgbuf *); +const struct got_error *got_privsep_send_enumerated_tree(size_t *, + struct imsgbuf *, struct got_object_id *, const char *, + struct got_parsed_tree_entry *, int); +const struct got_error *got_privsep_send_object_enumeration_request( + struct imsgbuf *); +const struct got_error *got_privsep_send_object_enumeration_done( + struct imsgbuf *); +const struct got_error *got_privsep_send_enumerated_commit(struct imsgbuf *, + struct got_object_id *, time_t); +const struct got_error *got_privsep_recv_enumerated_objects(struct imsgbuf *, + got_object_enumerate_commit_cb, got_object_enumerate_tree_cb, void *, + struct got_repository *); const struct got_error *got_privsep_send_raw_delta_req(struct imsgbuf *, int, struct got_object_id *); blob - 47175236af75523bec4c9972e40679dcdd6d15f0 blob + eb06a1be95981661f8eed6fcf981bbe32a5b6119 --- lib/object.c +++ lib/object.c @@ -60,6 +60,10 @@ #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) #endif +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + struct got_object_id * got_object_get_id(struct got_object *obj) { @@ -2396,3 +2400,60 @@ done: free(changed_commit_id); return err; } + +const struct got_error * +got_object_enumerate(got_object_enumerate_commit_cb cb_commit, + got_object_enumerate_tree_cb cb_tree, void *cb_arg, + struct got_object_id **ours, int nours, + struct got_object_id **theirs, int ntheirs, + struct got_packidx *packidx, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_pack *pack; + char *path_packfile = NULL; + + err = got_packidx_get_packfile_path(&path_packfile, + packidx->path_packidx); + if (err) + return err; + + pack = got_repo_get_cached_pack(repo, path_packfile); + if (pack == NULL) { + err = got_repo_cache_pack(&pack, repo, path_packfile, packidx); + if (err) + goto done; + } + + if (pack->privsep_child == NULL) { + err = start_pack_privsep_child(pack, packidx); + if (err) + goto done; + } + + err = got_privsep_send_object_enumeration_request( + pack->privsep_child->ibuf); + if (err) + goto done; + + err = got_privsep_send_object_idlist(pack->privsep_child->ibuf, + ours, nours); + if (err) + goto done; + err = got_privsep_send_object_idlist_done(pack->privsep_child->ibuf); + if (err) + goto done; + + err = got_privsep_send_object_idlist(pack->privsep_child->ibuf, + theirs, ntheirs); + if (err) + goto done; + err = got_privsep_send_object_idlist_done(pack->privsep_child->ibuf); + if (err) + goto done; + + err = got_privsep_recv_enumerated_objects(pack->privsep_child->ibuf, + cb_commit, cb_tree, cb_arg, repo); +done: + free(path_packfile); + return err; +} blob - 16709d206a932e5931f9b6441439d71eace4473d blob + fff58b083aa8eadb970ebd09ce593455d223fcc9 --- lib/pack_create.c +++ lib/pack_create.c @@ -903,21 +903,16 @@ add_object(int want_meta, struct got_object_idset *ids static const struct got_error * load_tree_entries(struct got_object_id_queue *ids, int want_meta, struct got_object_idset *idset, struct got_object_idset *idset_exclude, - struct got_object_id *tree_id, + struct got_tree_object *tree, const char *dpath, time_t mtime, uint32_t seed, struct got_repository *repo, int loose_obj_only, int *ncolored, int *nfound, int *ntrees, got_pack_progress_cb progress_cb, void *progress_arg, struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err; - struct got_tree_object *tree; char *p = NULL; int i; - err = got_object_open_as_tree(&tree, repo, tree_id); - if (err) - return err; - (*ntrees)++; err = report_progress(progress_cb, progress_arg, rl, *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0); @@ -939,8 +934,16 @@ load_tree_entries(struct got_object_id_queue *ids, int got_object_idset_contains(idset, id) || got_object_idset_contains(idset_exclude, id)) continue; - - if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "", + + /* + * If got-read-pack is crawling trees for us then + * we are only here to collect blob IDs. + */ + if (ids == NULL && S_ISDIR(mode)) + continue; + + if (asprintf(&p, "%s%s%s", dpath, + got_path_is_root_dir(dpath) ? "" : "/", got_tree_entry_get_name(e)) == -1) { err = got_error_from_errno("asprintf"); break; @@ -970,7 +973,6 @@ load_tree_entries(struct got_object_id_queue *ids, int } } - got_object_tree_close(tree); free(p); return err; } @@ -987,6 +989,7 @@ load_tree(int want_meta, struct got_object_idset *idse const struct got_error *err = NULL; struct got_object_id_queue tree_ids; struct got_object_qid *qid; + struct got_tree_object *tree = NULL; if (got_object_idset_contains(idset, tree_id) || got_object_idset_contains(idset_exclude, tree_id)) @@ -1034,20 +1037,32 @@ load_tree(int want_meta, struct got_object_idset *idse break; } + err = got_object_open_as_tree(&tree, repo, &qid->id); + if (err) { + free(qid->data); + got_object_qid_free(qid); + break; + } + err = load_tree_entries(&tree_ids, want_meta, idset, - idset_exclude, &qid->id, - path, mtime, seed, repo, loose_obj_only, ncolored, nfound, - ntrees, progress_cb, progress_arg, rl, + idset_exclude, tree, path, mtime, seed, repo, + loose_obj_only, ncolored, nfound, ntrees, + progress_cb, progress_arg, rl, cancel_cb, cancel_arg); free(qid->data); got_object_qid_free(qid); if (err) break; + + got_object_tree_close(tree); + tree = NULL; } STAILQ_FOREACH(qid, &tree_ids, entry) free(qid->data); got_object_id_queue_free(&tree_ids); + if (tree) + got_object_tree_close(tree); return err; } @@ -1449,7 +1464,207 @@ done: return err; } +struct load_packed_obj_arg { + /* output parameters: */ + struct got_object_id *id; + char *dpath; + time_t mtime; + + /* input parameters: */ + uint32_t seed; + int want_meta; + struct got_object_idset *idset; + struct got_object_idset *idset_exclude; + int loose_obj_only; + int *ncolored; + int *nfound; + int *ntrees; + got_pack_progress_cb progress_cb; + void *progress_arg; + struct got_ratelimit *rl; + got_cancel_cb cancel_cb; + void *cancel_arg; +}; + static const struct got_error * +load_packed_commit_id(void *arg, time_t mtime, struct got_object_id *id, + struct got_repository *repo) +{ + struct load_packed_obj_arg *a = arg; + + if (got_object_idset_contains(a->idset, id) || + got_object_idset_contains(a->idset_exclude, id)) + return NULL; + + return add_object(a->want_meta, + a->want_meta ? a->idset : a->idset_exclude, + id, "", GOT_OBJ_TYPE_COMMIT, mtime, a->seed, a->loose_obj_only, + repo, a->ncolored, a->nfound, a->ntrees, + a->progress_cb, a->progress_arg, a->rl); +} + +static const struct got_error * +load_packed_tree_ids(void *arg, struct got_tree_object *tree, time_t mtime, + struct got_object_id *id, const char *dpath, struct got_repository *repo) +{ + const struct got_error *err; + struct load_packed_obj_arg *a = arg; + const char *relpath; + + /* + * When we receive a tree's ID and path but not the tree itself, + * this tree object was not found in the pack file. This is the + * last time we are being called for this optimized traversal. + * Return from here and switch to loading objects the slow way. + */ + if (tree == NULL) { + free(a->id); + a->id = got_object_id_dup(id); + if (a->id == NULL) { + err = got_error_from_errno("got_object_id_dup"); + free(a->dpath); + a->dpath = NULL; + return err; + } + + free(a->dpath); + a->dpath = strdup(dpath); + if (a->dpath == NULL) { + err = got_error_from_errno("strdup"); + free(a->id); + a->id = NULL; + return err; + } + + a->mtime = mtime; + return NULL; + } + + if (got_object_idset_contains(a->idset, id) || + got_object_idset_contains(a->idset_exclude, id)) + return NULL; + + relpath = dpath; + while (relpath[0] == '/') + relpath++; + + err = add_object(a->want_meta, + a->want_meta ? a->idset : a->idset_exclude, + id, relpath, GOT_OBJ_TYPE_TREE, mtime, a->seed, + a->loose_obj_only, repo, a->ncolored, a->nfound, a->ntrees, + a->progress_cb, a->progress_arg, a->rl); + if (err) + return err; + + return load_tree_entries(NULL, a->want_meta, a->idset, + a->idset_exclude, tree, dpath, mtime, a->seed, repo, + a->loose_obj_only, a->ncolored, a->nfound, a->ntrees, + a->progress_cb, a->progress_arg, a->rl, + a->cancel_cb, a->cancel_arg); +} + +static const struct got_error * +load_packed_object_ids(struct got_object_id **ours, int nours, + struct got_object_id **theirs, int ntheirs, + int want_meta, uint32_t seed, struct got_object_idset *idset, + struct got_object_idset *idset_exclude, int loose_obj_only, + struct got_repository *repo, struct got_packidx *packidx, + int *ncolored, int *nfound, int *ntrees, + got_pack_progress_cb progress_cb, void *progress_arg, + struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err = NULL; + struct load_packed_obj_arg lpa; + + memset(&lpa, 0, sizeof(lpa)); + lpa.seed = seed; + lpa.want_meta = want_meta; + lpa.idset = idset; + lpa.idset_exclude = idset_exclude; + lpa.loose_obj_only = loose_obj_only; + lpa.ncolored = ncolored; + lpa.nfound = nfound; + lpa.ntrees = ntrees; + lpa.progress_cb = progress_cb; + lpa.progress_arg = progress_arg; + lpa.rl = rl; + lpa.cancel_cb = cancel_cb; + lpa.cancel_arg = cancel_arg; + + /* Attempt to load objects via got-read-pack, as far as possible. */ + err = got_object_enumerate(load_packed_commit_id, + load_packed_tree_ids, &lpa, ours, nours, theirs, ntheirs, + packidx, repo); + if (err) + return err; + + if (lpa.id == NULL) + return NULL; + + /* + * An incomplete tree hierarchy was present in the pack file + * and caused loading to be aborted midway through a commit. + * Continue loading trees the slow way. + */ + err = load_tree(want_meta, idset, idset_exclude, + lpa.id, lpa.dpath, lpa.mtime, seed, repo, loose_obj_only, + ncolored, nfound, ntrees, progress_cb, progress_arg, rl, + cancel_cb, cancel_arg); + free(lpa.id); + free(lpa.dpath); + return err; +} + +static const struct got_error * +find_pack_for_enumeration(struct got_packidx **best_packidx, + struct got_object_id **ids, int nids, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_pathlist_entry *pe; + const char *best_packidx_path = NULL; + int nobj_max = 0; + int ncommits_max = 0; + + *best_packidx = NULL; + + /* + * Find the largest pack which contains at least some of the + * commits and tags we are interested in. + */ + TAILQ_FOREACH(pe, &repo->packidx_paths, entry) { + const char *path_packidx = pe->path; + struct got_packidx *packidx; + int nobj, i, idx, ncommits = 0; + + err = got_repo_get_packidx(&packidx, path_packidx, repo); + if (err) + break; + + nobj = be32toh(packidx->hdr.fanout_table[0xff]); + if (nobj <= nobj_max) + continue; + + for (i = 0; i < nids; i++) { + idx = got_packidx_get_object_idx(packidx, ids[i]); + if (idx != -1) + ncommits++; + } + if (ncommits > ncommits_max) { + best_packidx_path = path_packidx; + nobj_max = nobj; + ncommits_max = ncommits; + } + } + + if (best_packidx_path) { + err = got_repo_get_packidx(best_packidx, best_packidx_path, + repo); + } + + return err; +} + +static const struct got_error * load_object_ids(int *ncolored, int *nfound, int *ntrees, struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs, struct got_object_id **ours, int nours, struct got_repository *repo, @@ -1459,6 +1674,7 @@ load_object_ids(int *ncolored, int *nfound, int *ntree { const struct got_error *err = NULL; struct got_object_id **ids = NULL; + struct got_packidx *packidx = NULL; int i, nobj = 0, obj_type; struct got_object_idset *idset_exclude; @@ -1475,6 +1691,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree if (err) goto done; + err = find_pack_for_enumeration(&packidx, theirs, ntheirs, repo); + if (err) + goto done; + if (packidx) { + err = load_packed_object_ids(theirs, ntheirs, NULL, 0, 0, + seed, idset, idset_exclude, loose_obj_only, repo, packidx, + ncolored, nfound, ntrees, progress_cb, progress_arg, rl, + cancel_cb, cancel_arg); + if (err) + goto done; + } + for (i = 0; i < ntheirs; i++) { struct got_object_id *id = theirs[i]; if (id == NULL) @@ -1499,6 +1727,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree } } + err = find_pack_for_enumeration(&packidx, ids, nobj, repo); + if (err) + goto done; + if (packidx) { + err = load_packed_object_ids(ids, nobj, theirs, ntheirs, 1, + seed, idset, idset_exclude, loose_obj_only, repo, packidx, + ncolored, nfound, ntrees, + progress_cb, progress_arg, rl, cancel_cb, cancel_arg); + if (err) + goto done; + } + for (i = 0; i < nobj; i++) { err = load_commit(1, idset, idset_exclude, ids[i], repo, seed, loose_obj_only, ncolored, nfound, ntrees, blob - 782f94ad26528e54d59fa5855bd1a0a4f674588b blob + a63073982fd405c8f986e41f79764f7b0f74ee7c --- lib/privsep.c +++ lib/privsep.c @@ -1443,8 +1443,8 @@ got_privsep_recv_commit(struct got_commit_object **com } static const struct got_error * -send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries, - int idx0, int idxN, size_t len) +send_tree_entries_batch(struct imsgbuf *ibuf, + struct got_parsed_tree_entry *entries, int idx0, int idxN, size_t len) { struct ibuf *wbuf; struct got_imsg_tree_entries ientries; @@ -1479,21 +1479,14 @@ send_tree_entries(struct imsgbuf *ibuf, struct got_par return NULL; } -const struct got_error * -got_privsep_send_tree(struct imsgbuf *ibuf, - struct got_parsed_tree_entry *entries, int nentries) +static const struct got_error * +send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries, + int nentries) { const struct got_error *err = NULL; - struct got_imsg_tree_object itree; - size_t entries_len; int i, j; + size_t entries_len = sizeof(struct got_imsg_tree_entries); - itree.nentries = nentries; - if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree)) - == -1) - return got_error_from_errno("imsg_compose TREE"); - - entries_len = sizeof(struct got_imsg_tree_entries); i = 0; for (j = 0; j < nentries; j++) { struct got_parsed_tree_entry *pte = &entries[j]; @@ -1501,7 +1494,7 @@ got_privsep_send_tree(struct imsgbuf *ibuf, if (j > 0 && entries_len + len > MAX_IMSGSIZE - IMSG_HEADER_SIZE) { - err = send_tree_entries(ibuf, entries, + err = send_tree_entries_batch(ibuf, entries, i, j - 1, entries_len); if (err) return err; @@ -1513,14 +1506,97 @@ got_privsep_send_tree(struct imsgbuf *ibuf, } if (j > 0) { - err = send_tree_entries(ibuf, entries, i, j - 1, entries_len); + err = send_tree_entries_batch(ibuf, entries, i, j - 1, + entries_len); if (err) return err; } + return NULL; +} + +const struct got_error * +got_privsep_send_tree(struct imsgbuf *ibuf, + struct got_parsed_tree_entry *entries, int nentries) +{ + const struct got_error *err = NULL; + struct got_imsg_tree_object itree; + + itree.nentries = nentries; + if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree)) + == -1) + return got_error_from_errno("imsg_compose TREE"); + + err = send_tree_entries(ibuf, entries, nentries); + if (err) + return err; + return flush_imsg(ibuf); } + +static const struct got_error * +recv_tree_entries(void *data, size_t datalen, struct got_tree_object *tree, + int *nentries) +{ + const struct got_error *err = NULL; + struct got_imsg_tree_entries *ientries; + struct got_tree_entry *te; + size_t te_offset; + size_t i; + + if (datalen <= sizeof(*ientries) || + datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) + return got_error(GOT_ERR_PRIVSEP_LEN); + + ientries = (struct got_imsg_tree_entries *)data; + if (ientries->nentries > INT_MAX) { + return got_error_msg(GOT_ERR_NO_SPACE, + "too many tree entries"); + } + + te_offset = sizeof(*ientries); + for (i = 0; i < ientries->nentries; i++) { + struct got_imsg_tree_entry ite; + const char *te_name; + uint8_t *buf = (uint8_t *)data + te_offset; + + if (te_offset >= datalen) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + + /* Might not be aligned, size is ~32 bytes. */ + memcpy(&ite, buf, sizeof(ite)); + + if (ite.namelen >= sizeof(te->name)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + if (te_offset + sizeof(ite) + ite.namelen > datalen) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + + if (*nentries >= tree->nentries) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + te = &tree->entries[*nentries]; + te_name = buf + sizeof(ite); + memcpy(te->name, te_name, ite.namelen); + te->name[ite.namelen] = '\0'; + memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH); + te->mode = ite.mode; + te->idx = *nentries; + (*nentries)++; + + te_offset += sizeof(ite) + ite.namelen; + } + + return err; +} + const struct got_error * got_privsep_recv_tree(struct got_tree_object **tree, struct imsgbuf *ibuf) { @@ -1529,7 +1605,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s MIN(sizeof(struct got_imsg_error), sizeof(struct got_imsg_tree_object)); struct got_imsg_tree_object *itree; - size_t i; int nentries = 0; *tree = NULL; @@ -1542,9 +1617,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s struct imsg imsg; size_t n; size_t datalen; - struct got_imsg_tree_entries *ientries; - struct got_tree_entry *te = NULL; - size_t te_offset; n = imsg_get(ibuf, &imsg); if (n == 0) { @@ -1611,56 +1683,8 @@ got_privsep_recv_tree(struct got_tree_object **tree, s err = got_error(GOT_ERR_PRIVSEP_MSG); break; } - if (datalen <= sizeof(*ientries) || - datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - - ientries = imsg.data; - if (ientries->nentries > INT_MAX) { - err = got_error_msg(GOT_ERR_NO_SPACE, - "too many tree entries"); - break; - } - te_offset = sizeof(*ientries); - for (i = 0; i < ientries->nentries; i++) { - struct got_imsg_tree_entry ite; - const char *te_name; - uint8_t *buf = imsg.data + te_offset; - - if (te_offset >= datalen) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - - /* Might not be aligned, size is ~32 bytes. */ - memcpy(&ite, buf, sizeof(ite)); - - if (ite.namelen >= sizeof(te->name)) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - if (te_offset + sizeof(ite) + ite.namelen > - datalen) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - if (nentries >= (*tree)->nentries) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - te = &(*tree)->entries[nentries]; - te_name = buf + sizeof(ite); - memcpy(te->name, te_name, ite.namelen); - te->name[ite.namelen] = '\0'; - memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH); - te->mode = ite.mode; - te->idx = nentries; - nentries++; - - te_offset += sizeof(ite) + ite.namelen; - } + err = recv_tree_entries(imsg.data, datalen, + *tree, &nentries); break; default: err = got_error(GOT_ERR_PRIVSEP_MSG); @@ -2731,6 +2755,270 @@ got_privsep_recv_traversed_commits(struct got_commit_o } const struct got_error * +got_privsep_send_enumerated_tree(size_t *totlen, struct imsgbuf *ibuf, + struct got_object_id *tree_id, const char *path, + struct got_parsed_tree_entry *entries, int nentries) +{ + const struct got_error *err = NULL; + struct ibuf *wbuf; + size_t path_len = strlen(path); + size_t msglen; + + msglen = sizeof(struct got_imsg_enumerated_tree) + path_len; + wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_TREE, 0, 0, msglen); + if (wbuf == NULL) + return got_error_from_errno("imsg_create ENUMERATED_TREE"); + + if (imsg_add(wbuf, tree_id->sha1, SHA1_DIGEST_LENGTH) == -1) + return got_error_from_errno("imsg_add ENUMERATED_TREE"); + if (imsg_add(wbuf, &nentries, sizeof(nentries)) == -1) + return got_error_from_errno("imsg_add ENUMERATED_TREE"); + if (imsg_add(wbuf, path, path_len) == -1) + return got_error_from_errno("imsg_add ENUMERATED_TREE"); + + wbuf->fd = -1; + imsg_close(ibuf, wbuf); + + if (entries) { + err = send_tree_entries(ibuf, entries, nentries); + if (err) + return err; + } + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_object_enumeration_request(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_REQUEST, + 0, 0, -1, NULL, 0) == -1) + return got_error_from_errno("imsg_compose " + "OBJECT_ENUMERATION_REQUEST"); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_object_enumeration_done(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_DONE, + 0, 0, -1, NULL, 0) == -1) + return got_error_from_errno("imsg_compose " + "OBJECT_ENUMERATION_DONE"); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_enumerated_commit(struct imsgbuf *ibuf, + struct got_object_id *id, time_t mtime) +{ + struct ibuf *wbuf; + + wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_COMMIT, 0, 0, + sizeof(struct got_imsg_enumerated_commit) + SHA1_DIGEST_LENGTH); + if (wbuf == NULL) + return got_error_from_errno("imsg_create ENUMERATED_COMMIT"); + + /* Keep in sync with struct got_imsg_enumerated_commit! */ + if (imsg_add(wbuf, id, SHA1_DIGEST_LENGTH) == -1) + return got_error_from_errno("imsg_add ENUMERATED_COMMIT"); + if (imsg_add(wbuf, &mtime, sizeof(mtime)) == -1) + return got_error_from_errno("imsg_add ENUMERATED_COMMIT"); + + wbuf->fd = -1; + imsg_close(ibuf, wbuf); + /* Don't flush yet, tree entries or ENUMERATION_DONE will follow. */ + return NULL; +} + +const struct got_error * +got_privsep_recv_enumerated_objects(struct imsgbuf *ibuf, + got_object_enumerate_commit_cb cb_commit, + got_object_enumerate_tree_cb cb_tree, void *cb_arg, + struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct imsg imsg; + struct got_imsg_enumerated_commit *icommit = NULL; + struct got_object_id commit_id; + int have_commit = 0; + time_t mtime = 0; + struct got_tree_object tree; + struct got_imsg_enumerated_tree *itree; + struct got_object_id tree_id; + char *path = NULL, *canon_path = NULL; + size_t datalen, path_len; + int nentries = -1; + int done = 0; + + memset(&tree, 0, sizeof(tree)); + + while (!done) { + err = got_privsep_recv_imsg(&imsg, ibuf, 0); + if (err) + break; + + datalen = imsg.hdr.len - IMSG_HEADER_SIZE; + switch (imsg.hdr.type) { + case GOT_IMSG_ENUMERATED_COMMIT: + if (have_commit && nentries != -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (datalen != sizeof(*icommit)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + icommit = (struct got_imsg_enumerated_commit *)imsg.data; + memcpy(commit_id.sha1, icommit->id, SHA1_DIGEST_LENGTH); + mtime = icommit->mtime; + have_commit = 1; + break; + case GOT_IMSG_ENUMERATED_TREE: + /* Should be preceeded by GOT_IMSG_ENUMERATED_COMMIT. */ + if (!have_commit) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (datalen < sizeof(*itree)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + itree = imsg.data; + path_len = datalen - sizeof(*itree); + if (path_len == 0) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + memcpy(tree_id.sha1, itree->id, sizeof(tree_id.sha1)); + free(path); + path = malloc(path_len + 1); + if (path == NULL) { + err = got_error_from_errno("malloc"); + break; + } + free(canon_path); + canon_path = malloc(path_len + 1); + if (canon_path == NULL) { + err = got_error_from_errno("malloc"); + break; + } + memcpy(path, (uint8_t *)imsg.data + sizeof(*itree), + path_len); + path[path_len] = '\0'; + if (!got_path_is_absolute(path)) { + err = got_error(GOT_ERR_BAD_PATH); + break; + } + if (got_path_is_root_dir(path)) { + /* XXX check what got_canonpath() does wrong */ + canon_path[0] = '/'; + canon_path[1] = '\0'; + } else { + err = got_canonpath(path, canon_path, + path_len + 1); + if (err) + break; + } + if (strcmp(path, canon_path) != 0) { + err = got_error(GOT_ERR_BAD_PATH); + break; + } + if (nentries != -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (itree->nentries < -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (itree->nentries == -1) { + /* Tree was not found in pack file. */ + done = 1; + err = cb_tree(cb_arg, NULL, mtime, &tree_id, + path, repo); + break; + } + if (itree->nentries > INT_MAX) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + tree.entries = calloc(itree->nentries, + sizeof(struct got_tree_entry)); + if (tree.entries == NULL) { + err = got_error_from_errno("calloc"); + break; + } + if (itree->nentries == 0) { + err = cb_tree(cb_arg, &tree, mtime, &tree_id, + path, repo); + if (err) + break; + + /* Prepare for next tree. */ + free(tree.entries); + memset(&tree, 0, sizeof(tree)); + nentries = -1; + } else { + tree.nentries = itree->nentries; + nentries = 0; + } + break; + case GOT_IMSG_TREE_ENTRIES: + /* Should be preceeded by GOT_IMSG_ENUMERATED_TREE. */ + if (nentries <= -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + err = recv_tree_entries(imsg.data, datalen, + &tree, &nentries); + if (err) + break; + if (tree.nentries == nentries) { + err = cb_tree(cb_arg, &tree, mtime, &tree_id, + path, repo); + if (err) + break; + + /* Prepare for next tree. */ + free(tree.entries); + memset(&tree, 0, sizeof(tree)); + nentries = -1; + } + break; + case GOT_IMSG_TREE_ENUMERATION_DONE: + /* All trees have been found and traversed. */ + if (!have_commit || path == NULL || nentries != -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + err = cb_commit(cb_arg, mtime, &commit_id, repo); + if (err) + break; + have_commit = 0; + break; + case GOT_IMSG_OBJECT_ENUMERATION_DONE: + done = 1; + break; + default: + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + + imsg_free(&imsg); + if (err) + break; + } + + free(path); + free(canon_path); + free(tree.entries); + return err; +} + +const struct got_error * got_privsep_send_raw_delta_req(struct imsgbuf *ibuf, int idx, struct got_object_id *id) { @@ -2834,9 +3122,8 @@ got_privsep_recv_raw_delta(uint64_t *base_size, uint64 return err; } -const struct got_error * -got_privsep_send_object_idlist(struct imsgbuf *ibuf, - struct got_object_id **ids, size_t nids) +static const struct got_error * +send_idlist(struct imsgbuf *ibuf, struct got_object_id **ids, size_t nids) { const struct got_error *err = NULL; struct got_imsg_object_idlist idlist; @@ -2870,6 +3157,34 @@ got_privsep_send_object_idlist(struct imsgbuf *ibuf, } const struct got_error * +got_privsep_send_object_idlist(struct imsgbuf *ibuf, + struct got_object_id **ids, size_t nids) +{ + const struct got_error *err = NULL; + struct got_object_id *idlist[GOT_IMSG_OBJ_ID_LIST_MAX_NIDS]; + int i, j = 0; + + for (i = 0; i < nids; i++) { + j = i % nitems(idlist); + idlist[j] = ids[i]; + if (j >= nitems(idlist) - 1) { + err = send_idlist(ibuf, idlist, j + 1); + if (err) + return err; + j = 0; + } + } + + if (j > 0) { + err = send_idlist(ibuf, idlist, j + 1); + if (err) + return err; + } + + return NULL; +} + +const struct got_error * got_privsep_send_object_idlist_done(struct imsgbuf *ibuf) { if (imsg_compose(ibuf, GOT_IMSG_OBJ_ID_LIST_DONE, 0, 0, -1, NULL, 0) blob - 63bd0d3fe2ed9ec49659d7fcd00aa9f1412aacab blob + d83d6706651dd700631ee4ae71734baa34799fbb --- libexec/got-read-pack/got-read-pack.c +++ libexec/got-read-pack/got-read-pack.c @@ -14,6 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include #include #include #include @@ -587,7 +588,6 @@ send_commit_traversal_done(struct imsgbuf *ibuf) return got_privsep_flush_imsg(ibuf); } - static const struct got_error * commit_traversal_request(struct imsg *imsg, struct imsgbuf *ibuf, struct got_pack *pack, struct got_packidx *packidx, @@ -1022,6 +1022,31 @@ recv_object_ids(struct got_object_idset *idset, struct } static const struct got_error * +recv_object_id_queue(struct got_object_id_queue *queue, struct imsgbuf *ibuf) +{ + const struct got_error *err = NULL; + int done = 0; + struct got_object_qid *qid; + struct got_object_id *ids; + size_t nids, i; + + for (;;) { + err = got_privsep_recv_object_idlist(&done, &ids, &nids, ibuf); + if (err || done) + break; + for (i = 0; i < nids; i++) { + err = got_object_qid_alloc_partial(&qid); + if (err) + return err; + memcpy(&qid->id, &ids[i], sizeof(qid->id)); + STAILQ_INSERT_TAIL(queue, qid, entry); + } + } + + return err; +} + +static const struct got_error * delta_reuse_request(struct imsg *imsg, struct imsgbuf *ibuf, FILE *delta_outfile, struct got_pack *pack, struct got_packidx *packidx) { @@ -1132,6 +1157,375 @@ done: } static const struct got_error * +send_tree_enumeration_done(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_TREE_ENUMERATION_DONE, 0, 0, -1, + NULL, 0) == -1) + return got_error_from_errno("imsg_compose TREE_ENUMERATION_DONE"); + + return got_privsep_flush_imsg(ibuf); +} + +struct enumerated_tree { + struct got_object_id id; + char *path; + uint8_t *buf; + struct got_parsed_tree_entry *entries; + int nentries; +}; + +static const struct got_error * +enumerate_tree(int *have_all_entries, struct imsgbuf *ibuf, size_t *totlen, + struct got_object_id *tree_id, + const char *path, struct got_pack *pack, struct got_packidx *packidx, + struct got_object_cache *objcache, struct got_object_idset *idset, + struct enumerated_tree **trees, size_t *nalloc, size_t *ntrees) +{ + const struct got_error *err = NULL; + struct got_object_id_queue ids; + struct got_object_qid *qid; + uint8_t *buf = NULL; + struct got_parsed_tree_entry *entries = NULL; + int nentries = 0, i; + struct enumerated_tree *tree; + + *ntrees = 0; + *have_all_entries = 1; + STAILQ_INIT(&ids); + + err = got_object_qid_alloc_partial(&qid); + if (err) + return err; + memcpy(&qid->id.sha1, tree_id, SHA1_DIGEST_LENGTH); + qid->data = strdup(path); + if (qid->data == NULL) { + err = got_error_from_errno("strdup"); + goto done; + } + STAILQ_INSERT_TAIL(&ids, qid, entry); + qid = NULL; + + /* Traverse the tree hierarchy, gather tree object IDs and paths. */ + do { + const char *path; + int idx, i; + + if (sigint_received) { + err = got_error(GOT_ERR_CANCELLED); + goto done; + } + + qid = STAILQ_FIRST(&ids); + STAILQ_REMOVE_HEAD(&ids, entry); + path = qid->data; + + idx = got_packidx_get_object_idx(packidx, &qid->id); + if (idx == -1) { + *have_all_entries = 0; + break; + } + + err = open_tree(&buf, &entries, &nentries, + pack, packidx, idx, &qid->id, objcache); + if (err) { + if (err->code != GOT_ERR_NO_OBJ) + goto done; + } + + err = got_object_idset_add(idset, &qid->id, NULL); + if (err) + goto done; + + for (i = 0; i < nentries; i++) { + struct got_object_qid *eqid = NULL; + struct got_parsed_tree_entry *pte = &entries[i]; + char *p; + + if (!S_ISDIR(pte->mode)) + continue; + + err = got_object_qid_alloc_partial(&eqid); + if (err) + goto done; + memcpy(eqid->id.sha1, pte->id, sizeof(eqid->id.sha1)); + + if (got_object_idset_contains(idset, &eqid->id)) { + got_object_qid_free(eqid); + continue; + } + + if (asprintf(&p, "%s%s%s", path, + got_path_is_root_dir(path) ? "" : "/", + pte->name) == -1) { + err = got_error_from_errno("asprintf"); + got_object_qid_free(eqid); + goto done; + } + eqid->data = p; + STAILQ_INSERT_TAIL(&ids, eqid, entry); + } + + if (*ntrees >= *nalloc) { + struct enumerated_tree *new; + new = recallocarray(*trees, *nalloc, *nalloc + 16, + sizeof(*new)); + if (new == NULL) { + err = got_error_from_errno("malloc"); + goto done; + } + *trees = new; + *nalloc += 16; + } + tree = &(*trees)[*ntrees]; + (*ntrees)++; + memcpy(&tree->id, &qid->id, sizeof(tree->id)); + tree->path = qid->data; + tree->buf = buf; + buf = NULL; + tree->entries = entries; + entries = NULL; + tree->nentries = nentries; + + got_object_qid_free(qid); + qid = NULL; + } while (!STAILQ_EMPTY(&ids)); + + if (*have_all_entries) { + int i; + /* + * We have managed to traverse all entries in the hierarchy. + * Tell the main process what we have found. + */ + for (i = 0; i < *ntrees; i++) { + tree = &(*trees)[i]; + err = got_privsep_send_enumerated_tree(totlen, + ibuf, &tree->id, tree->path, tree->entries, + tree->nentries); + if (err) + goto done; + free(tree->buf); + tree->buf = NULL; + free(tree->path); + tree->path = NULL; + free(tree->entries); + tree->entries = NULL; + } + *ntrees = 0; /* don't loop again below to free memory */ + + err = send_tree_enumeration_done(ibuf); + } else { + /* + * We can only load fully packed tree hierarchies on + * behalf of the main process, otherwise the main process + * gets a wrong idea about which tree objects have + * already been traversed. + * Indicate a missing entry for the root of this tree. + * The main process should continue by loading this + * entire tree the slow way. + */ + err = got_privsep_send_enumerated_tree(totlen, ibuf, + tree_id, "", NULL, -1); + if (err) + goto done; + } +done: + free(buf); + free(entries); + for (i = 0; i < *ntrees; i++) { + tree = &(*trees)[i]; + free(tree->buf); + tree->buf = NULL; + free(tree->path); + tree->path = NULL; + free(tree->entries); + tree->entries = NULL; + } + if (qid) + free(qid->data); + got_object_qid_free(qid); + got_object_id_queue_free(&ids); + if (err) { + if (err->code == GOT_ERR_PRIVSEP_PIPE) + err = NULL; + else + got_privsep_send_error(ibuf, err); + } + + return err; +} + +static const struct got_error * +enumeration_request(struct imsg *imsg, struct imsgbuf *ibuf, + struct got_pack *pack, struct got_packidx *packidx, + struct got_object_cache *objcache) +{ + const struct got_error *err = NULL; + struct got_object_id_queue commit_ids; + const struct got_object_id_queue *parents = NULL; + struct got_object_qid *qid = NULL; + struct got_object *obj = NULL; + struct got_commit_object *commit = NULL; + struct got_object_id *tree_id = NULL; + size_t totlen = 0; + struct got_object_idset *idset; + int i, idx, have_all_entries = 1; + struct enumerated_tree *trees = NULL; + size_t ntrees = 0, nalloc = 16; + + STAILQ_INIT(&commit_ids); + + trees = calloc(1, nalloc); + if (trees == NULL) + return got_error_from_errno("calloc"); + + idset = got_object_idset_alloc(); + if (idset == NULL) { + err = got_error_from_errno("got_object_idset_alloc"); + goto done; + } + + err = recv_object_id_queue(&commit_ids, ibuf); + if (err) + goto done; + + err = recv_object_ids(idset, ibuf); + if (err) + goto done; + + while (!STAILQ_EMPTY(&commit_ids)) { + if (sigint_received) { + err = got_error(GOT_ERR_CANCELLED); + goto done; + } + + qid = STAILQ_FIRST(&commit_ids); + STAILQ_REMOVE_HEAD(&commit_ids, entry); + + if (got_object_idset_contains(idset, &qid->id)) { + got_object_qid_free(qid); + qid = NULL; + continue; + } + + idx = got_packidx_get_object_idx(packidx, &qid->id); + if (idx == -1) + break; + + err = open_object(&obj, pack, packidx, idx, &qid->id, + objcache); + if (err) + goto done; + if (obj->type == GOT_OBJ_TYPE_TAG) { + struct got_tag_object *tag; + uint8_t *buf; + size_t len; + err = got_packfile_extract_object_to_mem(&buf, + &len, obj, pack); + if (err) + goto done; + obj->size = len; + err = got_object_parse_tag(&tag, buf, len); + if (err) { + free(buf); + goto done; + } + idx = got_packidx_get_object_idx(packidx, &tag->id); + if (idx == -1) + break; + err = open_commit(&commit, pack, packidx, idx, + &tag->id, objcache); + got_object_tag_close(tag); + free(buf); + if (err) + goto done; + } else if (obj->type == GOT_OBJ_TYPE_COMMIT) { + err = open_commit(&commit, pack, packidx, idx, + &qid->id, objcache); + if (err) + goto done; + } else { + err = got_error(GOT_ERR_OBJ_TYPE); + goto done; + } + got_object_close(obj); + obj = NULL; + + err = got_privsep_send_enumerated_commit(ibuf, &qid->id, + got_object_commit_get_committer_time(commit)); + if (err) + goto done; + + tree_id = got_object_commit_get_tree_id(commit); + idx = got_packidx_get_object_idx(packidx, tree_id); + if (idx == -1) { + err = got_privsep_send_enumerated_tree(&totlen, ibuf, + tree_id, "", NULL, -1); + if (err) + goto done; + break; + } + + if (got_object_idset_contains(idset, tree_id)) { + got_object_qid_free(qid); + qid = NULL; + continue; + } + + err = enumerate_tree(&have_all_entries, ibuf, &totlen, tree_id, "/", + pack, packidx, objcache, idset, &trees, &nalloc, &ntrees); + if (err) + goto done; + + if (!have_all_entries) + break; + + got_object_qid_free(qid); + qid = NULL; + + parents = got_object_commit_get_parent_ids(commit); + if (parents) { + struct got_object_qid *pid; + STAILQ_FOREACH(pid, parents, entry) { + if (got_object_idset_contains(idset, &pid->id)) + continue; + err = got_object_qid_alloc_partial(&qid); + if (err) + goto done; + memcpy(&qid->id, &pid->id, sizeof(qid->id)); + STAILQ_INSERT_TAIL(&commit_ids, qid, entry); + qid = NULL; + } + } + + got_object_commit_close(commit); + commit = NULL; + } + + if (have_all_entries) { + err = got_privsep_send_object_enumeration_done(ibuf); + if (err) + goto done; + } +done: + if (obj) + got_object_close(obj); + if (commit) + got_object_commit_close(commit); + got_object_qid_free(qid); + got_object_id_queue_free(&commit_ids); + if (idset) + got_object_idset_free(idset); + for (i = 0; i < ntrees; i++) { + struct enumerated_tree *tree = &trees[i]; + free(tree->buf); + free(tree->path); + free(tree->entries); + } + free(trees); + return err; +} + +static const struct got_error * receive_pack(struct got_pack **packp, struct imsgbuf *ibuf) { const struct got_error *err = NULL; @@ -1341,6 +1735,10 @@ main(int argc, char *argv[]) err = commit_traversal_request(&imsg, &ibuf, pack, packidx, &objcache); break; + case GOT_IMSG_OBJECT_ENUMERATION_REQUEST: + err = enumeration_request(&imsg, &ibuf, pack, + packidx, &objcache); + break; default: err = got_error(GOT_ERR_PRIVSEP_MSG); break;