From: Stefan Sperling Subject: object enumeration in got-read-pack To: gameoftrees@openbsd.org Date: Thu, 19 May 2022 18:14:07 +0200 This patch adds object enumeration support to got-read-pack. The initial step of packing becomes faster if a large pack file is available which contains most of the commits and trees which need to be loaded in order to enumerate the set of objects to be packed. We gain speed by cutting imsg traffic between the main process and got-read-pack during this phase. The main process longer needs to send requests for individual objects as long as got-read-pack is able to enumerate them. This seems solid in my testing. More testing would be very welcome. ok? diff 3d5026743b4462677a70d5a09bf4e5b65caee742 e61dc599e38172053b4e92e7d5331749e2d5e458 blob - 709eec07d085ee013da4bb4ebb657e8f97fa16b4 blob + 60f97103b1abf937d5f4d34671be29fb3599df48 --- lib/got_lib_object.h +++ lib/got_lib_object.h @@ -133,3 +133,13 @@ const struct got_error *got_object_tree_entry_dup(stru const struct got_error *got_traverse_packed_commits( struct got_object_id_queue *, struct got_object_id *, const char *, struct got_repository *); + +typedef const struct got_error *(*got_object_enumerate_commit_cb)(void *, + time_t, struct got_object_id *, struct got_repository *); +typedef const struct got_error *(*got_object_enumerate_tree_cb)(void *, + struct got_tree_object *, time_t, struct got_object_id *, const char *, + struct got_repository *); + +const struct got_error *got_object_enumerate(got_object_enumerate_commit_cb, + got_object_enumerate_tree_cb, void *, struct got_object_id **, int, + struct got_packidx *, struct got_repository *); blob - e719a95bde6bbe971668bb22ea3a72e2990ad927 blob + c6c29a099c6a9b6dc6883641ef3ef96ce7f47156 --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -145,6 +145,11 @@ enum got_imsg_type { GOT_IMSG_COMMIT_TRAVERSAL_REQUEST, GOT_IMSG_TRAVERSED_COMMITS, GOT_IMSG_COMMIT_TRAVERSAL_DONE, + GOT_IMSG_OBJECT_ENUMERATION_REQUEST, + GOT_IMSG_ENUMERATED_COMMIT, + GOT_IMSG_ENUMERATED_TREE, + GOT_IMSG_TREE_ENUMERATION_DONE, + GOT_IMSG_OBJECT_ENUMERATION_DONE, /* Message sending file descriptor to a temporary file. */ GOT_IMSG_TMPFD, @@ -556,6 +561,22 @@ struct got_imsg_traversed_commits { /* Followed by ncommit IDs of SHA1_DIGEST_LENGTH each */ } __attribute__((__packed__)); +/* Structure for GOT_IMSG_ENUMERATED_COMMIT */ +struct got_imsg_enumerated_commit { + uint8_t id[SHA1_DIGEST_LENGTH]; + time_t mtime; +} __attribute__((__packed__)); + +/* Structure for GOT_IMSG_ENUMERATED_TREE */ +struct got_imsg_enumerated_tree { + uint8_t id[SHA1_DIGEST_LENGTH]; /* tree ID */ + int nentries; /* number of tree entries */ + + /* Followed by tree's path in remaining data of imsg buffer. */ + + /* Followed by nentries * GOT_IMSG_TREE_ENTRY messages. */ +} __attribute__((__packed__)); + /* * Structure for GOT_IMSG_GOTCONFIG_REMOTE and * GOT_IMSG_GOTCONFIG_REMOTE data. @@ -721,6 +742,18 @@ const struct got_error *got_privsep_send_commit_traver const struct got_error *got_privsep_recv_traversed_commits( struct got_commit_object **, struct got_object_id **, struct got_object_id_queue *, struct imsgbuf *); +const struct got_error *got_privsep_send_enumerated_tree(size_t *, + struct imsgbuf *, struct got_object_id *, const char *, + struct got_parsed_tree_entry *, int); +const struct got_error *got_privsep_send_object_enumeration_request( + struct imsgbuf *); +const struct got_error *got_privsep_send_object_enumeration_done( + struct imsgbuf *); +const struct got_error *got_privsep_send_enumerated_commit(struct imsgbuf *, + struct got_object_id *, time_t); +const struct got_error *got_privsep_recv_enumerated_objects(struct imsgbuf *, + got_object_enumerate_commit_cb, got_object_enumerate_tree_cb, void *, + struct got_repository *); const struct got_error *got_privsep_send_raw_delta_req(struct imsgbuf *, int, struct got_object_id *); blob - 2d612890612d7d8a8e30549c38659cd083a2e41e blob + 1b025984a415471435ce11fea6762fde7d0051ce --- lib/object.c +++ lib/object.c @@ -60,6 +60,10 @@ #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) #endif +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + struct got_object_id * got_object_get_id(struct got_object *obj) { @@ -2390,3 +2394,70 @@ done: free(changed_commit_id); return err; } + +const struct got_error * +got_object_enumerate(got_object_enumerate_commit_cb cb_commit, + got_object_enumerate_tree_cb cb_tree, void *cb_arg, + struct got_object_id **commit_ids, int ncommits, + struct got_packidx *packidx, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_object_id *ids[GOT_IMSG_OBJ_ID_LIST_MAX_NIDS]; + struct got_pack *pack; + char *path_packfile = NULL; + int i, j = 0; + + err = got_packidx_get_packfile_path(&path_packfile, + packidx->path_packidx); + if (err) + return err; + + pack = got_repo_get_cached_pack(repo, path_packfile); + if (pack == NULL) { + err = got_repo_cache_pack(&pack, repo, path_packfile, packidx); + if (err) + goto done; + } + + if (pack->privsep_child == NULL) { + err = start_pack_privsep_child(pack, packidx); + if (err) + goto done; + } + + err = got_privsep_send_object_enumeration_request( + pack->privsep_child->ibuf); + if (err) + goto done; + + /* + * XXX This is stupid. Consider adding a function which + * does the chunking internally? + */ + for (i = 0; i < ncommits; i++) { + j = i % nitems(ids); + ids[j] = commit_ids[i]; + if (j >= nitems(ids) - 1) { + err = got_privsep_send_object_idlist( + pack->privsep_child->ibuf, ids, j + 1); + if (err) + goto done; + } + } + if (j > 0) { + err = got_privsep_send_object_idlist( + pack->privsep_child->ibuf, ids, j + 1); + if (err) + goto done; + } + + err = got_privsep_send_object_idlist_done(pack->privsep_child->ibuf); + if (err) + goto done; + + err = got_privsep_recv_enumerated_objects(pack->privsep_child->ibuf, + cb_commit, cb_tree, cb_arg, repo); +done: + free(path_packfile); + return err; +} blob - 5415448aa86fb16df21cd1e028be51fecc89f699 blob + 57ca13500de704bf62dcc5e79ac5a9c078b9f90e --- lib/pack_create.c +++ lib/pack_create.c @@ -906,21 +906,16 @@ add_object(int want_meta, struct got_object_idset *ids static const struct got_error * load_tree_entries(struct got_object_id_queue *ids, int want_meta, struct got_object_idset *idset, struct got_object_idset *idset_exclude, - struct got_object_id *tree_id, + struct got_tree_object *tree, const char *dpath, time_t mtime, struct got_repository *repo, int loose_obj_only, int *ncolored, int *nfound, int *ntrees, got_pack_progress_cb progress_cb, void *progress_arg, struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err; - struct got_tree_object *tree; char *p = NULL; int i; - err = got_object_open_as_tree(&tree, repo, tree_id); - if (err) - return err; - (*ntrees)++; err = report_progress(progress_cb, progress_arg, rl, *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0); @@ -942,8 +937,16 @@ load_tree_entries(struct got_object_id_queue *ids, int got_object_idset_contains(idset, id) || got_object_idset_contains(idset_exclude, id)) continue; - - if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "", + + /* + * If got-read-pack is crawling trees for us then + * we are only here to collect blob IDs. + */ + if (ids == NULL && S_ISDIR(mode)) + continue; + + if (asprintf(&p, "%s%s%s", dpath, + got_path_is_root_dir(dpath) ? "" : "/", got_tree_entry_get_name(e)) == -1) { err = got_error_from_errno("asprintf"); break; @@ -954,6 +957,8 @@ load_tree_entries(struct got_object_id_queue *ids, int err = got_object_qid_alloc(&qid, id); if (err) break; + qid->data = p; + p = NULL; STAILQ_INSERT_TAIL(ids, qid, entry); } else if (S_ISREG(mode) || S_ISLNK(mode)) { err = add_object(want_meta, @@ -963,12 +968,14 @@ load_tree_entries(struct got_object_id_queue *ids, int progress_cb, progress_arg, rl); if (err) break; + free(p); + p = NULL; + } else { /* should not happen... */ + free(p); + p = NULL; } - free(p); - p = NULL; } - got_object_tree_close(tree); free(p); return err; } @@ -985,6 +992,7 @@ load_tree(int want_meta, struct got_object_idset *idse const struct got_error *err = NULL; struct got_object_id_queue tree_ids; struct got_object_qid *qid; + struct got_tree_object *tree = NULL; if (got_object_idset_contains(idset, tree_id) || got_object_idset_contains(idset_exclude, tree_id)) @@ -1022,16 +1030,24 @@ load_tree(int want_meta, struct got_object_idset *idse break; } + err = got_object_open_as_tree(&tree, repo, &qid->id); + if (err) + break; + err = load_tree_entries(&tree_ids, want_meta, idset, - idset_exclude, &qid->id, - dpath, mtime, repo, loose_obj_only, ncolored, nfound, - ntrees, progress_cb, progress_arg, rl, + idset_exclude, tree, dpath, mtime, repo, loose_obj_only, + ncolored, nfound, ntrees, progress_cb, progress_arg, rl, cancel_cb, cancel_arg); got_object_qid_free(qid); if (err) break; + + got_object_tree_close(tree); + tree = NULL; } + if (tree) + got_object_tree_close(tree); got_object_id_queue_free(&tree_ids); return err; } @@ -1436,7 +1452,195 @@ done: return err; } +struct load_packed_obj_arg { + /* output parameters: */ + struct got_object_id *id; + char *dpath; + time_t mtime; + + /* input parameters: */ + int want_meta; + struct got_object_idset *idset; + struct got_object_idset *idset_exclude; + int loose_obj_only; + int *ncolored; + int *nfound; + int *ntrees; + got_pack_progress_cb progress_cb; + void *progress_arg; + struct got_ratelimit *rl; + got_cancel_cb cancel_cb; + void *cancel_arg; +}; + static const struct got_error * +load_packed_commit_id(void *arg, time_t mtime, struct got_object_id *id, + struct got_repository *repo) +{ + struct load_packed_obj_arg *a = arg; + + if (got_object_idset_contains(a->idset, id) || + got_object_idset_contains(a->idset_exclude, id)) + return NULL; + + return add_object(a->want_meta, + a->want_meta ? a->idset : a->idset_exclude, + id, "", GOT_OBJ_TYPE_COMMIT, mtime, a->loose_obj_only, repo, + a->ncolored, a->nfound, a->ntrees, + a->progress_cb, a->progress_arg, a->rl); +} + +static const struct got_error * +load_packed_tree_ids(void *arg, struct got_tree_object *tree, time_t mtime, + struct got_object_id *id, const char *dpath, struct got_repository *repo) +{ + const struct got_error *err; + struct load_packed_obj_arg *a = arg; + const char *relpath; + + /* + * When we receive a tree's ID and path but not the tree itself, + * this tree object was not found in the pack file. This is the + * last time we are being called for this optimized traversal. + * Return from here and switch to loading objects the slow way. + */ + if (tree == NULL) { + free(a->id); + a->id = got_object_id_dup(id); + if (a->id == NULL) + return got_error_from_errno("got_object_id_dup"); + + free(a->dpath); + a->dpath = strdup(dpath); + if (a->dpath == NULL) + return got_error_from_errno("strdup"); + + a->mtime = mtime; + return NULL; + } + + if (got_object_idset_contains(a->idset, id) || + got_object_idset_contains(a->idset_exclude, id)) + return NULL; + + relpath = dpath; + while (relpath[0] == '/') + relpath++; + + err = add_object(a->want_meta, + a->want_meta ? a->idset : a->idset_exclude, + id, relpath, GOT_OBJ_TYPE_TREE, mtime, a->loose_obj_only, + repo, a->ncolored, a->nfound, a->ntrees, + a->progress_cb, a->progress_arg, a->rl); + if (err) + return err; + + return load_tree_entries(NULL, a->want_meta, a->idset, + a->idset_exclude, tree, dpath, mtime, repo, + a->loose_obj_only, a->ncolored, a->nfound, a->ntrees, + a->progress_cb, a->progress_arg, a->rl, + a->cancel_cb, a->cancel_arg); +} + +static const struct got_error * +load_packed_object_ids(struct got_object_id **commits, int ncommits, + int want_meta, struct got_object_idset *idset, + struct got_object_idset *idset_exclude, int loose_obj_only, + struct got_repository *repo, struct got_packidx *packidx, + int *ncolored, int *nfound, int *ntrees, + got_pack_progress_cb progress_cb, void *progress_arg, + struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err = NULL; + struct load_packed_obj_arg lpa; + + memset(&lpa, 0, sizeof(lpa)); + lpa.want_meta = want_meta; + lpa.idset = idset; + lpa.idset_exclude = idset_exclude; + lpa.loose_obj_only = loose_obj_only; + lpa.ncolored = ncolored; + lpa.nfound = nfound; + lpa.ntrees = ntrees; + lpa.progress_cb = progress_cb; + lpa.progress_arg = progress_arg; + lpa.rl = rl; + lpa.cancel_cb = cancel_cb; + lpa.cancel_arg = cancel_arg; + + /* Attempt to load objects via got-read-pack, as far as possible. */ + err = got_object_enumerate(load_packed_commit_id, + load_packed_tree_ids, &lpa, commits, ncommits, packidx, repo); + if (err) + return err; + + if (lpa.id == NULL) + return NULL; + + /* + * An incomplete tree hierarchy was present in the pack file + * and caused loading to be aborted midway through a commit. + * Continue loading trees the slow way. + */ + err = load_tree(want_meta, idset, idset_exclude, + lpa.id, lpa.dpath, lpa.mtime, repo, loose_obj_only, + ncolored, nfound, ntrees, progress_cb, progress_arg, rl, + cancel_cb, cancel_arg); + free(lpa.id); + free(lpa.dpath); + return err; +} + +static const struct got_error * +find_pack_for_enumeration(struct got_packidx **best_packidx, + struct got_object_id **ids, int nids, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_pathlist_entry *pe; + const char *best_packidx_path = NULL; + int nobj_max = 0; + int ncommits_max = 0; + + *best_packidx = NULL; + + /* + * Find the largest pack which contains at least some of the + * commits and tags we are interested in. + */ + TAILQ_FOREACH(pe, &repo->packidx_paths, entry) { + const char *path_packidx = pe->path; + struct got_packidx *packidx; + int nobj, i, idx, ncommits = 0; + + err = got_repo_get_packidx(&packidx, path_packidx, repo); + if (err) + break; + + nobj = be32toh(packidx->hdr.fanout_table[0xff]); + if (nobj <= nobj_max) + continue; + + for (i = 0; i < nids; i++) { + idx = got_packidx_get_object_idx(packidx, ids[i]); + if (idx != -1) + ncommits++; + } + if (ncommits > ncommits_max) { + best_packidx_path = path_packidx; + nobj_max = nobj; + ncommits_max = ncommits; + } + } + + if (best_packidx_path) { + err = got_repo_get_packidx(best_packidx, best_packidx_path, + repo); + } + + return err; +} + +static const struct got_error * load_object_ids(int *ncolored, int *nfound, int *ntrees, struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs, struct got_object_id **ours, int nours, struct got_repository *repo, @@ -1445,6 +1649,7 @@ load_object_ids(int *ncolored, int *nfound, int *ntree { const struct got_error *err = NULL; struct got_object_id **ids = NULL; + struct got_packidx *packidx = NULL; int i, nobj = 0, obj_type; struct got_object_idset *idset_exclude; @@ -1461,6 +1666,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree if (err) goto done; + err = find_pack_for_enumeration(&packidx, theirs, ntheirs, repo); + if (err) + goto done; + if (packidx) { + err = load_packed_object_ids(theirs, ntheirs, 0, + idset, idset_exclude, loose_obj_only, repo, packidx, + ncolored, nfound, ntrees, progress_cb, progress_arg, rl, + cancel_cb, cancel_arg); + if (err) + goto done; + } + for (i = 0; i < ntheirs; i++) { struct got_object_id *id = theirs[i]; if (id == NULL) @@ -1485,6 +1702,18 @@ load_object_ids(int *ncolored, int *nfound, int *ntree } } + err = find_pack_for_enumeration(&packidx, ids, nobj, repo); + if (err) + goto done; + if (packidx) { + err = load_packed_object_ids(ids, nobj, 1, + idset, idset_exclude, loose_obj_only, repo, packidx, + ncolored, nfound, ntrees, + progress_cb, progress_arg, rl, cancel_cb, cancel_arg); + if (err) + goto done; + } + for (i = 0; i < nobj; i++) { err = load_commit(1, idset, idset_exclude, ids[i], repo, loose_obj_only, ncolored, nfound, ntrees, blob - 782f94ad26528e54d59fa5855bd1a0a4f674588b blob + 88202f185de9b1d38214b3170fcca808e026fe29 --- lib/privsep.c +++ lib/privsep.c @@ -1443,8 +1443,8 @@ got_privsep_recv_commit(struct got_commit_object **com } static const struct got_error * -send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries, - int idx0, int idxN, size_t len) +send_tree_entries_batch(struct imsgbuf *ibuf, + struct got_parsed_tree_entry *entries, int idx0, int idxN, size_t len) { struct ibuf *wbuf; struct got_imsg_tree_entries ientries; @@ -1479,21 +1479,14 @@ send_tree_entries(struct imsgbuf *ibuf, struct got_par return NULL; } -const struct got_error * -got_privsep_send_tree(struct imsgbuf *ibuf, - struct got_parsed_tree_entry *entries, int nentries) +static const struct got_error * +send_tree_entries(struct imsgbuf *ibuf, struct got_parsed_tree_entry *entries, + int nentries) { const struct got_error *err = NULL; - struct got_imsg_tree_object itree; - size_t entries_len; int i, j; + size_t entries_len = sizeof(struct got_imsg_tree_entries); - itree.nentries = nentries; - if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree)) - == -1) - return got_error_from_errno("imsg_compose TREE"); - - entries_len = sizeof(struct got_imsg_tree_entries); i = 0; for (j = 0; j < nentries; j++) { struct got_parsed_tree_entry *pte = &entries[j]; @@ -1501,7 +1494,7 @@ got_privsep_send_tree(struct imsgbuf *ibuf, if (j > 0 && entries_len + len > MAX_IMSGSIZE - IMSG_HEADER_SIZE) { - err = send_tree_entries(ibuf, entries, + err = send_tree_entries_batch(ibuf, entries, i, j - 1, entries_len); if (err) return err; @@ -1513,14 +1506,98 @@ got_privsep_send_tree(struct imsgbuf *ibuf, } if (j > 0) { - err = send_tree_entries(ibuf, entries, i, j - 1, entries_len); + err = send_tree_entries_batch(ibuf, entries, i, j - 1, + entries_len); if (err) return err; } + return NULL; +} + +const struct got_error * +got_privsep_send_tree(struct imsgbuf *ibuf, + struct got_parsed_tree_entry *entries, int nentries) +{ + const struct got_error *err = NULL; + struct got_imsg_tree_object itree; + + itree.nentries = nentries; + if (imsg_compose(ibuf, GOT_IMSG_TREE, 0, 0, -1, &itree, sizeof(itree)) + == -1) + return got_error_from_errno("imsg_compose TREE"); + + err = send_tree_entries(ibuf, entries, nentries); + if (err) + return err; + return flush_imsg(ibuf); } + +static const struct got_error * +recv_tree_entries(void *data, size_t datalen, struct got_tree_object *tree, + int *nentries) +{ + const struct got_error *err = NULL; + struct got_imsg_tree_entries *ientries; + struct got_tree_entry *te; + size_t te_offset; + size_t i; + + if (datalen <= sizeof(*ientries) || + datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) + return got_error(GOT_ERR_PRIVSEP_LEN); + + ientries = (struct got_imsg_tree_entries *)data; + if (ientries->nentries > INT_MAX) { + return got_error_msg(GOT_ERR_NO_SPACE, + "too many tree entries"); + } + + te_offset = sizeof(*ientries); + for (i = 0; i < ientries->nentries; i++) { + struct got_imsg_tree_entry ite; + const char *te_name; + uint8_t *buf = (uint8_t *)data + te_offset; + + if (te_offset >= datalen) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + + /* Might not be aligned, size is ~32 bytes. */ + memcpy(&ite, buf, sizeof(ite)); + + if (ite.namelen >= sizeof(te->name)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + if (te_offset + sizeof(ite) + ite.namelen > + datalen) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + + if (*nentries >= tree->nentries) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + te = &tree->entries[*nentries]; + te_name = buf + sizeof(ite); + memcpy(te->name, te_name, ite.namelen); + te->name[ite.namelen] = '\0'; + memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH); + te->mode = ite.mode; + te->idx = *nentries; + (*nentries)++; + + te_offset += sizeof(ite) + ite.namelen; + } + + return err; +} + const struct got_error * got_privsep_recv_tree(struct got_tree_object **tree, struct imsgbuf *ibuf) { @@ -1529,7 +1606,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s MIN(sizeof(struct got_imsg_error), sizeof(struct got_imsg_tree_object)); struct got_imsg_tree_object *itree; - size_t i; int nentries = 0; *tree = NULL; @@ -1542,9 +1618,6 @@ got_privsep_recv_tree(struct got_tree_object **tree, s struct imsg imsg; size_t n; size_t datalen; - struct got_imsg_tree_entries *ientries; - struct got_tree_entry *te = NULL; - size_t te_offset; n = imsg_get(ibuf, &imsg); if (n == 0) { @@ -1611,56 +1684,8 @@ got_privsep_recv_tree(struct got_tree_object **tree, s err = got_error(GOT_ERR_PRIVSEP_MSG); break; } - if (datalen <= sizeof(*ientries) || - datalen > MAX_IMSGSIZE - IMSG_HEADER_SIZE) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - - ientries = imsg.data; - if (ientries->nentries > INT_MAX) { - err = got_error_msg(GOT_ERR_NO_SPACE, - "too many tree entries"); - break; - } - te_offset = sizeof(*ientries); - for (i = 0; i < ientries->nentries; i++) { - struct got_imsg_tree_entry ite; - const char *te_name; - uint8_t *buf = imsg.data + te_offset; - - if (te_offset >= datalen) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - - /* Might not be aligned, size is ~32 bytes. */ - memcpy(&ite, buf, sizeof(ite)); - - if (ite.namelen >= sizeof(te->name)) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - if (te_offset + sizeof(ite) + ite.namelen > - datalen) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - if (nentries >= (*tree)->nentries) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - break; - } - te = &(*tree)->entries[nentries]; - te_name = buf + sizeof(ite); - memcpy(te->name, te_name, ite.namelen); - te->name[ite.namelen] = '\0'; - memcpy(te->id.sha1, ite.id, SHA1_DIGEST_LENGTH); - te->mode = ite.mode; - te->idx = nentries; - nentries++; - - te_offset += sizeof(ite) + ite.namelen; - } + err = recv_tree_entries(imsg.data, datalen, + *tree, &nentries); break; default: err = got_error(GOT_ERR_PRIVSEP_MSG); @@ -2731,6 +2756,275 @@ got_privsep_recv_traversed_commits(struct got_commit_o } const struct got_error * +got_privsep_send_enumerated_tree(size_t *totlen, struct imsgbuf *ibuf, + struct got_object_id *tree_id, const char *path, + struct got_parsed_tree_entry *entries, int nentries) +{ + const struct got_error *err = NULL; + struct ibuf *wbuf; + size_t path_len = strlen(path); + size_t msglen; + + msglen = sizeof(struct got_imsg_enumerated_tree) + path_len; + wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_TREE, 0, 0, msglen); + if (wbuf == NULL) + return got_error_from_errno("imsg_create ENUMERATED_TREE"); + + if (imsg_add(wbuf, tree_id->sha1, SHA1_DIGEST_LENGTH) == -1) { + err = got_error_from_errno("imsg_add ENUMERATED_TREE"); + ibuf_free(wbuf); + return err; + } + if (imsg_add(wbuf, &nentries, sizeof(nentries)) == -1) { + err = got_error_from_errno("imsg_add ENUMERATED_TREE"); + ibuf_free(wbuf); + return err; + } + if (imsg_add(wbuf, path, path_len) == -1) { + err = got_error_from_errno("imsg_add ENUMERATED_TREE"); + ibuf_free(wbuf); + return err; + } + + wbuf->fd = -1; + imsg_close(ibuf, wbuf); + + if (entries) { + err = send_tree_entries(ibuf, entries, nentries); + if (err) + return err; + } + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_object_enumeration_request(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_REQUEST, + 0, 0, -1, NULL, 0) == -1) + return got_error_from_errno("imsg_compose " + "OBJECT_ENUMERATION_REQUEST"); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_object_enumeration_done(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_DONE, + 0, 0, -1, NULL, 0) == -1) + return got_error_from_errno("imsg_compose " + "OBJECT_ENUMERATION_DONE"); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_enumerated_commit(struct imsgbuf *ibuf, + struct got_object_id *id, time_t mtime) +{ + struct ibuf *wbuf; + + wbuf = imsg_create(ibuf, GOT_IMSG_ENUMERATED_COMMIT, 0, 0, + sizeof(struct got_imsg_enumerated_commit) + SHA1_DIGEST_LENGTH); + if (wbuf == NULL) + return got_error_from_errno("imsg_create ENUMERATED_COMMIT"); + + /* Keep in sync with struct got_imsg_enumerated_commit! */ + if (imsg_add(wbuf, id, SHA1_DIGEST_LENGTH) == -1) + return got_error_from_errno("imsg_add ENUMERATED_COMMIT"); + if (imsg_add(wbuf, &mtime, sizeof(mtime)) == -1) + return got_error_from_errno("imsg_add ENUMERATED_COMMIT"); + + wbuf->fd = -1; + imsg_close(ibuf, wbuf); + /* Don't flush yet, tree entries or ENUMERATION_DONE will follow. */ + return NULL; +} + +const struct got_error * +got_privsep_recv_enumerated_objects(struct imsgbuf *ibuf, + got_object_enumerate_commit_cb cb_commit, + got_object_enumerate_tree_cb cb_tree, void *cb_arg, + struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct imsg imsg; + struct got_imsg_enumerated_commit *icommit = NULL; + struct got_object_id commit_id; + int have_commit = 0; + time_t mtime = 0; + struct got_tree_object tree; + struct got_imsg_enumerated_tree *itree; + struct got_object_id tree_id; + char *path = NULL, *canon_path = NULL; + size_t datalen, path_len; + int nentries = -1; + int done = 0; + + memset(&tree, 0, sizeof(tree)); + + while (!done) { + err = got_privsep_recv_imsg(&imsg, ibuf, 0); + if (err) + return err; + + datalen = imsg.hdr.len - IMSG_HEADER_SIZE; + switch (imsg.hdr.type) { + case GOT_IMSG_ENUMERATED_COMMIT: + if (have_commit && nentries != -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (datalen != sizeof(*icommit)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + icommit = (struct got_imsg_enumerated_commit *)imsg.data; + memcpy(commit_id.sha1, icommit->id, SHA1_DIGEST_LENGTH); + mtime = icommit->mtime; + err = cb_commit(cb_arg, mtime, &commit_id, repo); + if (err) + break; + have_commit = 1; + break; + case GOT_IMSG_ENUMERATED_TREE: + /* Should be preceeded by GOT_IMSG_ENUMERATED_COMMIT. */ + if (!have_commit) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (datalen < sizeof(*itree)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + itree = imsg.data; + path_len = datalen - sizeof(*itree); + if (path_len == 0) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + memcpy(tree_id.sha1, itree->id, sizeof(tree_id.sha1)); + free(path); + path = malloc(path_len + 1); + if (path == NULL) { + err = got_error_from_errno("malloc"); + break; + } + free(canon_path); + canon_path = malloc(path_len + 1); + if (canon_path == NULL) { + err = got_error_from_errno("malloc"); + break; + } + memcpy(path, (uint8_t *)imsg.data + sizeof(*itree), + path_len); + path[path_len] = '\0'; + if (!got_path_is_absolute(path)) { + err = got_error(GOT_ERR_BAD_PATH); + break; + } + if (got_path_is_root_dir(path)) { + /* XXX check what got_canonpath() does wrong */ + canon_path[0] = '/'; + canon_path[1] = '\0'; + } else { + err = got_canonpath(path, canon_path, + path_len + 1); + if (err) + break; + } + if (strcmp(path, canon_path) != 0) { + err = got_error(GOT_ERR_BAD_PATH); + break; + } + if (nentries != -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (itree->nentries < -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + if (itree->nentries == -1) { + /* Tree was not found in pack file. */ + err = cb_tree(cb_arg, NULL, mtime, &tree_id, + path, repo); + break; + } + if (itree->nentries > INT_MAX) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + tree.entries = calloc(itree->nentries, + sizeof(struct got_tree_entry)); + if (tree.entries == NULL) + err = got_error_from_errno("calloc"); + if (itree->nentries == 0) { + err = cb_tree(cb_arg, &tree, mtime, &tree_id, + path, repo); + if (err) + break; + + /* Prepare for next tree. */ + free(tree.entries); + memset(&tree, 0, sizeof(tree)); + nentries = -1; + } else { + tree.nentries = itree->nentries; + nentries = 0; + } + break; + case GOT_IMSG_TREE_ENTRIES: + /* Should be preceeded by GOT_IMSG_ENUMERATED_TREE. */ + if (nentries <= -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + err = recv_tree_entries(imsg.data, datalen, + &tree, &nentries); + if (err) + break; + if (tree.nentries == nentries) { + err = cb_tree(cb_arg, &tree, mtime, &tree_id, + path, repo); + if (err) + break; + + /* Prepare for next tree. */ + free(tree.entries); + memset(&tree, 0, sizeof(tree)); + nentries = -1; + } + break; + case GOT_IMSG_TREE_ENUMERATION_DONE: + /* All trees have been found and traversed. */ + if (path == NULL || nentries != -1) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + have_commit = 0; + break; + case GOT_IMSG_OBJECT_ENUMERATION_DONE: + done = 1; + break; + default: + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + + imsg_free(&imsg); + if (err) + break; + } + + free(path); + free(canon_path); + return err; +} + +const struct got_error * got_privsep_send_raw_delta_req(struct imsgbuf *ibuf, int idx, struct got_object_id *id) { blob - ea9a7e564cb840af0d6a61c8c8e0cf5ed3d93147 blob + 41edfaa4585e69c5e5bdf199212200f20b694c25 --- libexec/got-read-pack/got-read-pack.c +++ libexec/got-read-pack/got-read-pack.c @@ -14,6 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include #include #include #include @@ -587,7 +588,6 @@ send_commit_traversal_done(struct imsgbuf *ibuf) return got_privsep_flush_imsg(ibuf); } - static const struct got_error * commit_traversal_request(struct imsg *imsg, struct imsgbuf *ibuf, struct got_pack *pack, struct got_packidx *packidx, @@ -1022,6 +1022,31 @@ recv_object_ids(struct got_object_idset *idset, struct } static const struct got_error * +recv_object_id_queue(struct got_object_id_queue *queue, struct imsgbuf *ibuf) +{ + const struct got_error *err = NULL; + int done = 0; + struct got_object_qid *qid; + struct got_object_id *ids; + size_t nids, i; + + for (;;) { + err = got_privsep_recv_object_idlist(&done, &ids, &nids, ibuf); + if (err || done) + break; + for (i = 0; i < nids; i++) { + err = got_object_qid_alloc_partial(&qid); + if (err) + return err; + memcpy(&qid->id, &ids[i], sizeof(qid->id)); + STAILQ_INSERT_TAIL(queue, qid, entry); + } + } + + return err; +} + +static const struct got_error * delta_reuse_request(struct imsg *imsg, struct imsgbuf *ibuf, FILE *delta_outfile, struct got_pack *pack, struct got_packidx *packidx) { @@ -1132,6 +1157,281 @@ done: } static const struct got_error * +send_tree_enumeration_done(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_TREE_ENUMERATION_DONE, 0, 0, -1, + NULL, 0) == -1) + return got_error_from_errno("imsg_compose TREE_ENUMERATION_DONE"); + + return got_privsep_flush_imsg(ibuf); +} + +static const struct got_error * +enumerate_tree(struct imsgbuf *ibuf, size_t *totlen, + struct got_object_id *tree_id, + const char *path, struct got_pack *pack, struct got_packidx *packidx, + struct got_object_cache *objcache, struct got_object_idset *idset) +{ + const struct got_error *err = NULL; + struct got_object_id_queue ids; + struct got_object_qid *qid; + uint8_t *buf = NULL; + struct got_parsed_tree_entry *entries = NULL; + + STAILQ_INIT(&ids); + + err = got_object_qid_alloc_partial(&qid); + if (err) + return err; + memcpy(&qid->id.sha1, tree_id, SHA1_DIGEST_LENGTH); + qid->data = strdup(path); + if (qid->data == NULL) { + err = got_error_from_errno("strdup"); + goto done; + } + STAILQ_INSERT_TAIL(&ids, qid, entry); + qid = NULL; + + do { + const char *path; + int idx, nentries, i; + + if (sigint_received) { + err = got_error(GOT_ERR_CANCELLED); + goto done; + } + + qid = STAILQ_FIRST(&ids); + STAILQ_REMOVE_HEAD(&ids, entry); + path = qid->data; + + idx = got_packidx_get_object_idx(packidx, &qid->id); + if (idx == -1) { + err = got_privsep_send_enumerated_tree(totlen, ibuf, + &qid->id, path, NULL, -1); + break; + } + + err = open_tree(&buf, &entries, &nentries, + pack, packidx, idx, &qid->id, objcache); + if (err) { + if (err->code != GOT_ERR_NO_OBJ) + goto done; + } + + err = got_privsep_send_enumerated_tree(totlen, + ibuf, &qid->id, path, entries, nentries); + if (err) + goto done; + + err = got_object_idset_add(idset, &qid->id, NULL); + if (err) + goto done; + + for (i = 0; i < nentries; i++) { + struct got_object_qid *eqid = NULL; + struct got_parsed_tree_entry *pte = &entries[i]; + char *p; + + if (!S_ISDIR(pte->mode)) + continue; + + err = got_object_qid_alloc_partial(&eqid); + if (err) + goto done; + memcpy(eqid->id.sha1, pte->id, sizeof(eqid->id.sha1)); + + if (got_object_idset_contains(idset, &eqid->id)) { + got_object_qid_free(eqid); + continue; + } + + if (asprintf(&p, "%s%s%s", path, + got_path_is_root_dir(path) ? "" : "/", + pte->name) == -1) { + err = got_error_from_errno("asprintf"); + got_object_qid_free(eqid); + goto done; + } + eqid->data = p; + STAILQ_INSERT_TAIL(&ids, eqid, entry); + idx = got_packidx_get_object_idx(packidx, &eqid->id); + if (idx == -1) + break; + } + + free(qid->data); + got_object_qid_free(qid); + qid = NULL; + + free(entries); + entries = NULL; + free(buf); + buf = NULL; + } while (!STAILQ_EMPTY(&ids)); + + err = send_tree_enumeration_done(ibuf); +done: + free(buf); + if (qid) + free(qid->data); + got_object_qid_free(qid); + got_object_id_queue_free(&ids); + free(entries); + if (err) { + if (err->code == GOT_ERR_PRIVSEP_PIPE) + err = NULL; + else + got_privsep_send_error(ibuf, err); + } + + return err; +} + +static const struct got_error * +enumeration_request(struct imsg *imsg, struct imsgbuf *ibuf, + struct got_pack *pack, struct got_packidx *packidx, + struct got_object_cache *objcache) +{ + const struct got_error *err = NULL; + struct got_object_id_queue commit_ids; + const struct got_object_id_queue *parents = NULL; + struct got_object_qid *qid = NULL; + struct got_object *obj = NULL; + struct got_commit_object *commit = NULL; + struct got_object_id *tree_id = NULL; + size_t totlen = 0; + struct got_object_idset *idset; + int idx; + + STAILQ_INIT(&commit_ids); + + idset = got_object_idset_alloc(); + if (idset == NULL) + return got_error_from_errno("got_object_idset_alloc"); + + err = recv_object_id_queue(&commit_ids, ibuf); + if (err) + goto done; + + while (!STAILQ_EMPTY(&commit_ids)) { + if (sigint_received) { + err = got_error(GOT_ERR_CANCELLED); + goto done; + } + + qid = STAILQ_FIRST(&commit_ids); + STAILQ_REMOVE_HEAD(&commit_ids, entry); + + if (got_object_idset_contains(idset, &qid->id)) { + got_object_qid_free(qid); + qid = NULL; + continue; + } + + idx = got_packidx_get_object_idx(packidx, &qid->id); + if (idx == -1) + break; + + err = open_object(&obj, pack, packidx, idx, &qid->id, + objcache); + if (err) + goto done; + if (obj->type == GOT_OBJ_TYPE_TAG) { + struct got_tag_object *tag; + uint8_t *buf; + size_t len; + err = got_packfile_extract_object_to_mem(&buf, + &len, obj, pack); + if (err) + goto done; + obj->size = len; + err = got_object_parse_tag(&tag, buf, len); + if (err) { + free(buf); + goto done; + } + err = open_commit(&commit, pack, packidx, idx, + &tag->id, objcache); + got_object_tag_close(tag); + free(buf); + if (err) + goto done; + } else if (obj->type == GOT_OBJ_TYPE_COMMIT) { + err = open_commit(&commit, pack, packidx, idx, + &qid->id, objcache); + if (err) + goto done; + } else { + err = got_error(GOT_ERR_OBJ_TYPE); + goto done; + } + got_object_close(obj); + obj = NULL; + + err = got_privsep_send_enumerated_commit(ibuf, &qid->id, + got_object_commit_get_committer_time(commit)); + if (err) + goto done; + + tree_id = got_object_commit_get_tree_id(commit); + idx = got_packidx_get_object_idx(packidx, tree_id); + if (idx == -1) { + err = got_privsep_send_enumerated_tree(&totlen, ibuf, + tree_id, "", NULL, -1); + if (err) + goto done; + break; + } + + if (got_object_idset_contains(idset, tree_id)) { + got_object_qid_free(qid); + qid = NULL; + continue; + } + + err = enumerate_tree(ibuf, &totlen, tree_id, "/", + pack, packidx, objcache, idset); + if (err) + goto done; + + got_object_qid_free(qid); + qid = NULL; + + parents = got_object_commit_get_parent_ids(commit); + if (parents) { + struct got_object_qid *pid; + STAILQ_FOREACH(pid, parents, entry) { + if (got_object_idset_contains(idset, &pid->id)) + continue; + err = got_object_qid_alloc_partial(&qid); + if (err) + goto done; + memcpy(&qid->id, &pid->id, sizeof(qid->id)); + STAILQ_INSERT_TAIL(&commit_ids, qid, entry); + qid = NULL; + } + } + + got_object_commit_close(commit); + commit = NULL; + } + + err = got_privsep_send_object_enumeration_done(ibuf); + if (err) + goto done; +done: + if (obj) + got_object_close(obj); + if (commit) + got_object_commit_close(commit); + got_object_qid_free(qid); + got_object_id_queue_free(&commit_ids); + got_object_idset_free(idset); + return err; +} + +static const struct got_error * receive_pack(struct got_pack **packp, struct imsgbuf *ibuf) { const struct got_error *err = NULL; @@ -1344,6 +1644,10 @@ main(int argc, char *argv[]) err = commit_traversal_request(&imsg, &ibuf, pack, packidx, &objcache); break; + case GOT_IMSG_OBJECT_ENUMERATION_REQUEST: + err = enumeration_request(&imsg, &ibuf, pack, + packidx, &objcache); + break; default: err = got_error(GOT_ERR_PRIVSEP_MSG); break;