From: Stefan Sperling Subject: findtwixt() speedup To: gameoftrees@openbsd.org Date: Mon, 11 Apr 2022 17:57:27 +0200 I have been looking for ways to speed up findtwixt(). This patch speeds up simple cases like this one: gotadmin pack -a -x 0.68 main Instead of coloring all commits in history of the main branch, the new code will only color commits between main and the 0.68 tag in this case. There could be more ways to speed things up. But I want to be careful with changes in this area of the code. We are relying on this code for 'got send' to work. All regression tests are passing. Additional testing would be very welcome. During my testing I noticed some oddities in the numbers displayed during progress output. Those are due to existing bugs and can be ignored for now. ok? ----------------------------------------------- commit 337789c5338718e48ad21f80dae692a91c332ccc (color-breadth-first, noel/color-breadth-first) from: Stefan Sperling date: Mon Apr 11 15:37:43 2022 UTC speed up the 'gotadmin pack' commit coloring phase Sort references by commit date before processing them. With this we will tend to process newer commits first, which might give us an advantage for the shortcuts implemented below. Process commits in breadth-first order instead of depth-first order. This allows us to stop history traversal before reading in the entire history of at least one reference. Stop traversing as soon as no more commits remain in our set of commits we want to keep in the pack file. This condition implies that the pack will be empty, and further processing is pointless. Keep track of boundary commits, i.e. commits where we stop traversing a line of history. And stop traversing any line of history as soon as we discover that its history is rooted at a boundary commit. This detects the common case where 'got send' creates a pack file including commits on the "main" branch and excluding commits on the "origin/main" branch. In this case, coloring of commits on "origin/main" now terminates once the "origin/main" tip is reached via "main". diff 103f19a1f3b497422681f16a67d841419eb9cdd8 7f4859acfc1af7a92ae8c529ea7f856fe2b0c0fe blob - b52856ef9a6f270f64d7a55d2f968ff3d9040061 blob + d33ef960467af91b46cd71112751469664080749 --- lib/pack_create.c +++ lib/pack_create.c @@ -1101,25 +1101,35 @@ enum findtwixt_color { COLOR_DROP, COLOR_BLANK, }; -static const int findtwixt_colors[] = { - COLOR_KEEP, - COLOR_DROP, - COLOR_BLANK + +struct findtwixt_data { + int color; + struct got_object_id *tip; }; static const struct got_error * queue_commit_id(struct got_object_id_queue *ids, struct got_object_id *id, - int color, struct got_repository *repo) + int color, struct got_object_id *tip, struct got_repository *repo) { const struct got_error *err; + struct findtwixt_data *data; struct got_object_qid *qid; + data = malloc(sizeof(*data)); + if (data == NULL) + return got_error_from_errno("malloc"); + err = got_object_qid_alloc(&qid, id); - if (err) + if (err) { + free(data); return err; + } STAILQ_INSERT_TAIL(ids, qid, entry); - qid->data = (void *)&findtwixt_colors[color]; + data->color = color; + data->tip = tip; + qid->data = data; + return NULL; } @@ -1133,6 +1143,7 @@ drop_commit(struct got_object_idset *keep, struct got_ const struct got_object_id_queue *parents; struct got_object_id_queue ids; struct got_object_qid *qid; + struct got_object_idset_element *entry; STAILQ_INIT(&ids); @@ -1152,22 +1163,29 @@ drop_commit(struct got_object_idset *keep, struct got_ STAILQ_REMOVE_HEAD(&ids, entry); if (got_object_idset_contains(drop, qid->id)) { + free(qid->data); got_object_qid_free(qid); continue; } err = got_object_idset_add(drop, qid->id, NULL); if (err) { + free(qid->data); got_object_qid_free(qid); break; } - if (!got_object_idset_contains(keep, qid->id)) { + entry = got_object_idset_get_element(keep, qid->id); + if (entry == NULL) { + free(qid->data); got_object_qid_free(qid); continue; } + got_object_idset_remove_element(keep, entry); + err = got_object_open_as_commit(&commit, repo, qid->id); + free(qid->data); got_object_qid_free(qid); if (err) break; @@ -1207,6 +1225,7 @@ append_id(struct got_object_id *id, void *data, void * static const struct got_error * queue_commit_or_tag_id(struct got_object_id *id, int color, + struct got_object_id *tip, struct got_object_id_queue *ids, struct got_repository *repo) { const struct got_error *err; @@ -1226,7 +1245,7 @@ queue_commit_or_tag_id(struct got_object_id *id, int c } if (obj_type == GOT_OBJ_TYPE_COMMIT) { - err = queue_commit_id(ids, id, color, repo); + err = queue_commit_id(ids, id, color, id, repo); if (err) goto done; } @@ -1237,9 +1256,10 @@ done: } static const struct got_error * -color_commits(int *ncolored, struct got_object_id_queue *ids, +color_commits(int *ncolored, struct got_object_id_queue *parent_ids, + struct got_object_id_queue *ids, struct got_object_idset *keep, struct got_object_idset *drop, - struct got_repository *repo, + struct got_object_idset *boundaries, struct got_repository *repo, got_pack_progress_cb progress_cb, void *progress_arg, struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) { @@ -1248,6 +1268,8 @@ color_commits(int *ncolored, struct got_object_id_queu struct got_object_qid *qid; while (!STAILQ_EMPTY(ids)) { + struct findtwixt_data *data; + struct got_object_id *tip; int qcolor, ncolor; if (cancel_cb) { @@ -1257,7 +1279,9 @@ color_commits(int *ncolored, struct got_object_id_queu } qid = STAILQ_FIRST(ids); - qcolor = *((int *)qid->data); + data = (struct findtwixt_data *)qid->data; + qcolor = data->color; + tip = data->tip; if (got_object_idset_contains(drop, qid->id)) ncolor = COLOR_DROP; @@ -1274,7 +1298,11 @@ color_commits(int *ncolored, struct got_object_id_queu if (ncolor == COLOR_DROP || (ncolor == COLOR_KEEP && qcolor == COLOR_KEEP)) { + err = got_object_idset_add(boundaries, qid->id, NULL); + if (err) + break; STAILQ_REMOVE_HEAD(ids, entry); + free(qid->data); got_object_qid_free(qid); continue; } @@ -1284,11 +1312,20 @@ color_commits(int *ncolored, struct got_object_id_queu cancel_cb, cancel_arg); if (err) break; + err = got_object_idset_add(boundaries, qid->id, NULL); + if (err) + break; } else if (ncolor == COLOR_BLANK) { struct got_commit_object *commit; const struct got_object_id_queue *parents; struct got_object_qid *pid; + if (got_object_idset_contains(boundaries, tip)) { + STAILQ_REMOVE_HEAD(ids, entry); + got_object_qid_free(qid); + continue; + } + if (qcolor == COLOR_KEEP) err = got_object_idset_add(keep, qid->id, NULL); else @@ -1303,8 +1340,8 @@ color_commits(int *ncolored, struct got_object_id_queu parents = got_object_commit_get_parent_ids(commit); if (parents) { STAILQ_FOREACH(pid, parents, entry) { - err = queue_commit_id(ids, pid->id, - qcolor, repo); + err = queue_commit_id(parent_ids, + pid->id, qcolor, tip, repo); if (err) break; } @@ -1319,6 +1356,7 @@ color_commits(int *ncolored, struct got_object_id_queu } STAILQ_REMOVE_HEAD(ids, entry); + free(qid->data); got_object_qid_free(qid); } @@ -1336,11 +1374,13 @@ findtwixt(struct got_object_id ***res, int *nres, int struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err = NULL; - struct got_object_id_queue ids; - struct got_object_idset *keep, *drop; + struct got_object_id_queue ids, parent_ids; + struct got_object_idset *keep, *drop, *boundaries = NULL; + struct got_object_qid *qid; int i, nkeep; STAILQ_INIT(&ids); + STAILQ_INIT(&parent_ids); *res = NULL; *nres = 0; *ncolored = 0; @@ -1355,11 +1395,17 @@ findtwixt(struct got_object_id ***res, int *nres, int goto done; } + boundaries = got_object_idset_alloc(); + if (boundaries == NULL) { + err = got_error_from_errno("got_object_idset_alloc"); + goto done; + } + for (i = 0; i < nhead; i++) { struct got_object_id *id = head[i]; if (id == NULL) continue; - err = queue_commit_or_tag_id(id, COLOR_KEEP, &ids, repo); + err = queue_commit_or_tag_id(id, COLOR_KEEP, id, &ids, repo); if (err) goto done; } @@ -1368,15 +1414,24 @@ findtwixt(struct got_object_id ***res, int *nres, int struct got_object_id *id = tail[i]; if (id == NULL) continue; - err = queue_commit_or_tag_id(id, COLOR_DROP, &ids, repo); + err = queue_commit_or_tag_id(id, COLOR_DROP, id, &ids, repo); if (err) goto done; } - err = color_commits(ncolored, &ids, keep, drop, repo, - progress_cb, progress_arg, rl, cancel_cb, cancel_arg); - if (err) - goto done; + do { + while (!STAILQ_EMPTY(&parent_ids)) { + qid = STAILQ_FIRST(&parent_ids); + STAILQ_REMOVE_HEAD(&parent_ids, entry); + STAILQ_INSERT_TAIL(&ids, qid, entry); + } + err = color_commits(ncolored, &parent_ids, &ids, + keep, drop, boundaries, repo, + progress_cb, progress_arg, rl, cancel_cb, cancel_arg); + if (err) + goto done; + } while (!STAILQ_EMPTY(&parent_ids) && + got_object_idset_num_elements(keep) > 0); nkeep = got_object_idset_num_elements(keep); if (nkeep > 0) { @@ -1398,7 +1453,14 @@ findtwixt(struct got_object_id ***res, int *nres, int done: got_object_idset_free(keep); got_object_idset_free(drop); + if (boundaries) + got_object_idset_free(boundaries); + STAILQ_FOREACH(qid, &ids, entry) + free(qid->data); got_object_id_queue_free(&ids); + STAILQ_FOREACH(qid, &parent_ids, entry) + free(qid->data); + got_object_id_queue_free(&parent_ids); return err; } @@ -1419,7 +1481,7 @@ load_object_ids(int *ncolored, int *nfound, int *ntree err = findtwixt(&ids, &nobj, ncolored, ours, nours, theirs, ntheirs, repo, progress_cb, progress_arg, rl, cancel_cb, cancel_arg); - if (err || nobj == 0) + if (err) goto done; for (i = 0; i < ntheirs; i++) { blob - c1adcc4de91fadddf5e049d93cc9a744c07579bf blob + a2cd689e950dde612d9eb621415378868483d8bf --- lib/repository_admin.c +++ lib/repository_admin.c @@ -75,6 +75,11 @@ get_reflist_object_ids(struct got_object_id ***ids, in *ids = NULL; *nobjects = 0; + err = got_reflist_sort(refs, + got_ref_cmp_by_commit_timestamp_descending, repo); + if (err) + return err; + *ids = reallocarray(NULL, alloc_chunksz, sizeof(struct got_object_id *)); if (*ids == NULL) return got_error_from_errno("reallocarray");