"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
findtwixt() speedup
To:
gameoftrees@openbsd.org
Date:
Mon, 11 Apr 2022 17:57:27 +0200

Download raw body.

Thread
I have been looking for ways to speed up findtwixt().

This patch speeds up simple cases like this one:
  gotadmin pack -a -x 0.68 main
Instead of coloring all commits in history of the main branch, the new
code will only color commits between main and the 0.68 tag in this case.

There could be more ways to speed things up. But I want to be careful
with changes in this area of the code. We are relying on this code for
'got send' to work.

All regression tests are passing.

Additional testing would be very welcome.
During my testing I noticed some oddities in the numbers displayed during
progress output. Those are due to existing bugs and can be ignored for now.

ok?

-----------------------------------------------
commit 337789c5338718e48ad21f80dae692a91c332ccc (color-breadth-first, noel/color-breadth-first)
from: Stefan Sperling <stsp@stsp.name>
date: Mon Apr 11 15:37:43 2022 UTC
 
 speed up the 'gotadmin pack' commit coloring phase
 
 Sort references by commit date before processing them. With this
 we will tend to process newer commits first, which might give us
 an advantage for the shortcuts implemented below.
 
 Process commits in breadth-first order instead of depth-first order.
 This allows us to stop history traversal before reading in the
 entire history of at least one reference.
 
 Stop traversing as soon as no more commits remain in our set of
 commits we want to keep in the pack file. This condition implies
 that the pack will be empty, and further processing is pointless.
 
 Keep track of boundary commits, i.e. commits where we stop traversing
 a line of history. And stop traversing any line of history as soon as
 we discover that its history is rooted at a boundary commit. This detects
 the common case where 'got send' creates a pack file including commits
 on the "main" branch and excluding commits on the "origin/main" branch.
 In this case, coloring of commits on "origin/main" now terminates once
 the "origin/main" tip is reached via "main".
 
diff 103f19a1f3b497422681f16a67d841419eb9cdd8 7f4859acfc1af7a92ae8c529ea7f856fe2b0c0fe
blob - b52856ef9a6f270f64d7a55d2f968ff3d9040061
blob + d33ef960467af91b46cd71112751469664080749
--- lib/pack_create.c
+++ lib/pack_create.c
@@ -1101,25 +1101,35 @@ enum findtwixt_color {
 	COLOR_DROP,
 	COLOR_BLANK,
 };
-static const int findtwixt_colors[] = {
-	COLOR_KEEP,
-	COLOR_DROP,
-	COLOR_BLANK
+
+struct findtwixt_data {
+	int color;
+	struct got_object_id *tip;
 };
 
 static const struct got_error *
 queue_commit_id(struct got_object_id_queue *ids, struct got_object_id *id,
-    int color, struct got_repository *repo)
+    int color, struct got_object_id *tip, struct got_repository *repo)
 {
 	const struct got_error *err;
+	struct findtwixt_data *data;
 	struct got_object_qid *qid;
 
+	data = malloc(sizeof(*data));
+	if (data == NULL)
+		return got_error_from_errno("malloc");
+
 	err = got_object_qid_alloc(&qid, id);
-	if (err)
+	if (err) {
+		free(data);
 		return err;
+	}
 
 	STAILQ_INSERT_TAIL(ids, qid, entry);
-	qid->data = (void *)&findtwixt_colors[color];
+	data->color = color;
+	data->tip = tip;
+	qid->data = data;
+
 	return NULL;
 }
 
@@ -1133,6 +1143,7 @@ drop_commit(struct got_object_idset *keep, struct got_
 	const struct got_object_id_queue *parents;
 	struct got_object_id_queue ids;
 	struct got_object_qid *qid;
+	struct got_object_idset_element *entry;
 
 	STAILQ_INIT(&ids);
 
@@ -1152,22 +1163,29 @@ drop_commit(struct got_object_idset *keep, struct got_
 		STAILQ_REMOVE_HEAD(&ids, entry);
 
 		if (got_object_idset_contains(drop, qid->id)) {
+			free(qid->data);
 			got_object_qid_free(qid);
 			continue;
 		}
 
 		err = got_object_idset_add(drop, qid->id, NULL);
 		if (err) {
+			free(qid->data);
 			got_object_qid_free(qid);
 			break;
 		}
 
-		if (!got_object_idset_contains(keep, qid->id)) {
+		entry = got_object_idset_get_element(keep, qid->id);
+		if (entry == NULL) {
+			free(qid->data);
 			got_object_qid_free(qid);
 			continue;
 		}
 
+		got_object_idset_remove_element(keep, entry);
+
 		err = got_object_open_as_commit(&commit, repo, qid->id);
+		free(qid->data);
 		got_object_qid_free(qid);
 		if (err)
 			break;
@@ -1207,6 +1225,7 @@ append_id(struct got_object_id *id, void *data, void *
 
 static const struct got_error *
 queue_commit_or_tag_id(struct got_object_id *id, int color,
+    struct got_object_id *tip,
     struct got_object_id_queue *ids, struct got_repository *repo)
 {
 	const struct got_error *err;
@@ -1226,7 +1245,7 @@ queue_commit_or_tag_id(struct got_object_id *id, int c
 	}
 
 	if (obj_type == GOT_OBJ_TYPE_COMMIT) {
-		err = queue_commit_id(ids, id, color, repo);
+		err = queue_commit_id(ids, id, color, id, repo);
 		if (err)
 			goto done;
 	}
@@ -1237,9 +1256,10 @@ done:
 }
 
 static const struct got_error *
-color_commits(int *ncolored, struct got_object_id_queue *ids,
+color_commits(int *ncolored, struct got_object_id_queue *parent_ids,
+    struct got_object_id_queue *ids,
     struct got_object_idset *keep, struct got_object_idset *drop,
-    struct got_repository *repo,
+    struct got_object_idset *boundaries, struct got_repository *repo,
     got_pack_progress_cb progress_cb, void *progress_arg,
     struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
 {
@@ -1248,6 +1268,8 @@ color_commits(int *ncolored, struct got_object_id_queu
 	struct got_object_qid *qid;
 
 	while (!STAILQ_EMPTY(ids)) {
+		struct findtwixt_data *data;
+		struct got_object_id *tip;
 		int qcolor, ncolor;
 
 		if (cancel_cb) {
@@ -1257,7 +1279,9 @@ color_commits(int *ncolored, struct got_object_id_queu
 		}
 
 		qid = STAILQ_FIRST(ids);
-		qcolor = *((int *)qid->data);
+		data = (struct findtwixt_data *)qid->data;
+		qcolor = data->color;
+		tip = data->tip;
 
 		if (got_object_idset_contains(drop, qid->id))
 			ncolor = COLOR_DROP;
@@ -1274,7 +1298,11 @@ color_commits(int *ncolored, struct got_object_id_queu
 
 		if (ncolor == COLOR_DROP || (ncolor == COLOR_KEEP &&
 		    qcolor == COLOR_KEEP)) {
+			err = got_object_idset_add(boundaries, qid->id, NULL);
+			if (err)
+				break;
 			STAILQ_REMOVE_HEAD(ids, entry);
+			free(qid->data);
 			got_object_qid_free(qid);
 			continue;
 		}
@@ -1284,11 +1312,20 @@ color_commits(int *ncolored, struct got_object_id_queu
 			    cancel_cb, cancel_arg);
 			if (err)
 				break;
+			err = got_object_idset_add(boundaries, qid->id, NULL);
+			if (err)
+				break;
 		} else if (ncolor == COLOR_BLANK) {
 			struct got_commit_object *commit;
 			const struct got_object_id_queue *parents;
 			struct got_object_qid *pid;
 
+			if (got_object_idset_contains(boundaries, tip)) {
+				STAILQ_REMOVE_HEAD(ids, entry);
+				got_object_qid_free(qid);
+				continue;
+			}
+
 			if (qcolor == COLOR_KEEP)
 				err = got_object_idset_add(keep, qid->id, NULL);
 			else
@@ -1303,8 +1340,8 @@ color_commits(int *ncolored, struct got_object_id_queu
 			parents = got_object_commit_get_parent_ids(commit);
 			if (parents) {
 				STAILQ_FOREACH(pid, parents, entry) {
-					err = queue_commit_id(ids, pid->id,
-					    qcolor, repo);
+					err = queue_commit_id(parent_ids,
+					    pid->id, qcolor, tip, repo);
 					if (err)
 						break;
 				}
@@ -1319,6 +1356,7 @@ color_commits(int *ncolored, struct got_object_id_queu
 		}
 
 		STAILQ_REMOVE_HEAD(ids, entry);
+		free(qid->data);
 		got_object_qid_free(qid);
 	}
 
@@ -1336,11 +1374,13 @@ findtwixt(struct got_object_id ***res, int *nres, int 
     struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
-	struct got_object_id_queue ids;
-	struct got_object_idset *keep, *drop;
+	struct got_object_id_queue ids, parent_ids;
+	struct got_object_idset *keep, *drop, *boundaries = NULL;
+	struct got_object_qid *qid;
 	int i, nkeep;
 
 	STAILQ_INIT(&ids);
+	STAILQ_INIT(&parent_ids);
 	*res = NULL;
 	*nres = 0;
 	*ncolored = 0;
@@ -1355,11 +1395,17 @@ findtwixt(struct got_object_id ***res, int *nres, int 
 		goto done;
 	}
 
+	boundaries = got_object_idset_alloc();
+	if (boundaries == NULL) {
+		err = got_error_from_errno("got_object_idset_alloc");
+		goto done;
+	}
+
 	for (i = 0; i < nhead; i++) {
 		struct got_object_id *id = head[i];
 		if (id == NULL)
 			continue;
-		err = queue_commit_or_tag_id(id, COLOR_KEEP, &ids, repo);
+		err = queue_commit_or_tag_id(id, COLOR_KEEP, id, &ids, repo);
 		if (err)
 			goto done;
 	}		
@@ -1368,15 +1414,24 @@ findtwixt(struct got_object_id ***res, int *nres, int 
 		struct got_object_id *id = tail[i];
 		if (id == NULL)
 			continue;
-		err = queue_commit_or_tag_id(id, COLOR_DROP, &ids, repo);
+		err = queue_commit_or_tag_id(id, COLOR_DROP, id, &ids, repo);
 		if (err)
 			goto done;
 	}
 
-	err = color_commits(ncolored, &ids, keep, drop, repo,
-	    progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
-	if (err)
-		goto done;
+	do {
+		while (!STAILQ_EMPTY(&parent_ids)) {
+			qid = STAILQ_FIRST(&parent_ids);
+			STAILQ_REMOVE_HEAD(&parent_ids, entry);
+			STAILQ_INSERT_TAIL(&ids, qid, entry);
+		}
+		err = color_commits(ncolored, &parent_ids, &ids,
+		    keep, drop, boundaries, repo,
+		    progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
+		if (err)
+			goto done;
+	} while (!STAILQ_EMPTY(&parent_ids) &&
+	    got_object_idset_num_elements(keep) > 0);
 
 	nkeep = got_object_idset_num_elements(keep);
 	if (nkeep > 0) {
@@ -1398,7 +1453,14 @@ findtwixt(struct got_object_id ***res, int *nres, int 
 done:
 	got_object_idset_free(keep);
 	got_object_idset_free(drop);
+	if (boundaries)
+		got_object_idset_free(boundaries);
+	STAILQ_FOREACH(qid, &ids, entry)
+		free(qid->data);
 	got_object_id_queue_free(&ids);
+	STAILQ_FOREACH(qid, &parent_ids, entry)
+		free(qid->data);
+	got_object_id_queue_free(&parent_ids);
 	return err;
 }
 
@@ -1419,7 +1481,7 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
 
 	err = findtwixt(&ids, &nobj, ncolored, ours, nours, theirs, ntheirs,
 	    repo, progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
-	if (err || nobj == 0)
+	if (err)
 		goto done;
 
 	for (i = 0; i < ntheirs; i++) {
blob - c1adcc4de91fadddf5e049d93cc9a744c07579bf
blob + a2cd689e950dde612d9eb621415378868483d8bf
--- lib/repository_admin.c
+++ lib/repository_admin.c
@@ -75,6 +75,11 @@ get_reflist_object_ids(struct got_object_id ***ids, in
 	*ids = NULL;
 	*nobjects = 0;
 
+	err = got_reflist_sort(refs,
+	    got_ref_cmp_by_commit_timestamp_descending, repo);
+	if (err)
+		return err;
+
 	*ids = reallocarray(NULL, alloc_chunksz, sizeof(struct got_object_id *));
 	if (*ids == NULL)
 		return got_error_from_errno("reallocarray");