"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
avoid extra tree requests after got-read-pack enumeration
To:
gameoftrees@openbsd.org
Date:
Tue, 14 Jun 2022 14:46:24 +0200

Download raw body.

Thread
Profiling got-read-pack shows a lot of individual tree requests
being sent by gotadmin pack -a, even if all required commits and
trees are present in the enumeration pack file.
This happens because gotadmin will always walk the entire history
again, the slow way, in order to look for any missing objects.

To fix this, make got-read-pack indicate whether it managed to locate
all required objects in its pack file, and only try loading more objects
if enumeration from the pack file was incomplete.

Tags need to be loaded in any case because got-read-pack will never
send tags while enumerating objects. This could still be optimized later.

ok?
 
diff db07f832356e93a5230e9fa259df9b9e6bd411db 892fbb972eda5bf0ff7fdb65c649845fad3c3cad
blob - 1f4430dcde3e6550f20a42cee4f1955e7ea675ba
blob + 58f6ea9d97872411048d72595be2da70a1bb58ae
--- lib/got_lib_object.h
+++ lib/got_lib_object.h
@@ -140,7 +140,7 @@ typedef const struct got_error *(*got_object_enumerate
     struct got_tree_object *, time_t, struct got_object_id *, const char *,
     struct got_repository *);
 
-const struct got_error *got_object_enumerate(got_object_enumerate_commit_cb,
-    got_object_enumerate_tree_cb, void *, struct got_object_id **, int,
-    struct got_object_id **, int, struct got_packidx *,
-    struct got_repository *);
+const struct got_error *got_object_enumerate(int *,
+    got_object_enumerate_commit_cb, got_object_enumerate_tree_cb, void *,
+    struct got_object_id **, int, struct got_object_id **, int,
+    struct got_packidx *, struct got_repository *);
blob - cc6e50c604345707509575df4645384d00d97a63
blob + af9557d85c3c38603367d54310b5f2a59d7b3b11
--- lib/got_lib_privsep.h
+++ lib/got_lib_privsep.h
@@ -150,6 +150,7 @@ enum got_imsg_type {
 	GOT_IMSG_ENUMERATED_TREE,
 	GOT_IMSG_TREE_ENUMERATION_DONE,
 	GOT_IMSG_OBJECT_ENUMERATION_DONE,
+	GOT_IMSG_OBJECT_ENUMERATION_INCOMPLETE,
 
 	/* Message sending file descriptor to a temporary file. */
 	GOT_IMSG_TMPFD,
@@ -749,11 +750,13 @@ const struct got_error *got_privsep_send_object_enumer
     struct imsgbuf *);
 const struct got_error *got_privsep_send_object_enumeration_done(
     struct imsgbuf *);
+const struct got_error *got_privsep_send_object_enumeration_incomplete(
+    struct imsgbuf *);
 const struct got_error *got_privsep_send_enumerated_commit(struct imsgbuf *,
     struct got_object_id *, time_t);
-const struct got_error *got_privsep_recv_enumerated_objects(struct imsgbuf *,
-    got_object_enumerate_commit_cb, got_object_enumerate_tree_cb, void *,
-    struct got_repository *);
+const struct got_error *got_privsep_recv_enumerated_objects(int *,
+    struct imsgbuf *, got_object_enumerate_commit_cb,
+    got_object_enumerate_tree_cb, void *, struct got_repository *);
 
 const struct got_error *got_privsep_send_raw_delta_req(struct imsgbuf *, int,
     struct got_object_id *);
blob - eb06a1be95981661f8eed6fcf981bbe32a5b6119
blob + f39d1d87d659236f80c05f15876b571c2e27febf
--- lib/object.c
+++ lib/object.c
@@ -2402,7 +2402,8 @@ done:
 }
 
 const struct got_error *
-got_object_enumerate(got_object_enumerate_commit_cb cb_commit,
+got_object_enumerate(int *found_all_objects,
+    got_object_enumerate_commit_cb cb_commit,
     got_object_enumerate_tree_cb cb_tree, void *cb_arg,
     struct got_object_id **ours, int nours,
     struct got_object_id **theirs, int ntheirs,
@@ -2451,8 +2452,8 @@ got_object_enumerate(got_object_enumerate_commit_cb cb
 	if (err)
 		goto done;
 
-	err = got_privsep_recv_enumerated_objects(pack->privsep_child->ibuf,
-	    cb_commit, cb_tree, cb_arg, repo);
+	err = got_privsep_recv_enumerated_objects(found_all_objects,
+	    pack->privsep_child->ibuf, cb_commit, cb_tree, cb_arg, repo);
 done:
 	free(path_packfile);
 	return err;
blob - f6f2cb6c69424d99e5cf161ab27dffb59b0546cc
blob + 2e6d5f071a005d22724536b529a844122de2155f
--- lib/pack_create.c
+++ lib/pack_create.c
@@ -1564,7 +1564,8 @@ load_packed_tree_ids(void *arg, struct got_tree_object
 }
 
 static const struct got_error *
-load_packed_object_ids(struct got_object_id **ours, int nours,
+load_packed_object_ids(int *found_all_objects,
+    struct got_object_id **ours, int nours,
     struct got_object_id **theirs, int ntheirs,
     int want_meta, uint32_t seed, struct got_object_idset *idset,
     struct got_object_idset *idset_exclude, int loose_obj_only,
@@ -1592,7 +1593,7 @@ load_packed_object_ids(struct got_object_id **ours, in
 	lpa.cancel_arg = cancel_arg;
 
 	/* Attempt to load objects via got-read-pack, as far as possible. */
-	err = got_object_enumerate(load_packed_commit_id,
+	err = got_object_enumerate(found_all_objects, load_packed_commit_id,
 	   load_packed_tree_ids, &lpa, ours, nours, theirs, ntheirs,
 	   packidx, repo);
 	if (err)
@@ -1603,7 +1604,7 @@ load_packed_object_ids(struct got_object_id **ours, in
 
 	/*
 	 * An incomplete tree hierarchy was present in the pack file
-	 * and caused loading to be aborted midway through a commit.
+	 * and caused loading to be aborted.
 	 * Continue loading trees the slow way.
 	 */
 	err = load_tree(want_meta, idset, idset_exclude,
@@ -1675,7 +1676,7 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
 	const struct got_error *err = NULL;
 	struct got_object_id **ids = NULL;
 	struct got_packidx *packidx = NULL;
-	int i, nobj = 0, obj_type;
+	int i, nobj = 0, obj_type, found_all_objects = 0;
 	struct got_object_idset *idset_exclude;
 
 	idset_exclude = got_object_idset_alloc();
@@ -1695,10 +1696,10 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
 	if (err)
 		goto done;
 	if (packidx) {
-		err = load_packed_object_ids(theirs, ntheirs, NULL, 0, 0,
-		    seed, idset, idset_exclude, loose_obj_only, repo, packidx,
-		    ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
-		    cancel_cb, cancel_arg);
+		err = load_packed_object_ids(&found_all_objects,
+		    theirs, ntheirs, NULL, 0, 0, seed, idset, idset_exclude,
+		    loose_obj_only, repo, packidx, ncolored, nfound, ntrees,
+		    progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
 		if (err)
 			goto done;
 	}
@@ -1711,12 +1712,15 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
 		if (err)
 			return err;
 		if (obj_type == GOT_OBJ_TYPE_COMMIT) {
-			err = load_commit(0, idset, idset_exclude, id, repo,
-			    seed, loose_obj_only, ncolored, nfound, ntrees,
-			    progress_cb, progress_arg, rl,
-			    cancel_cb, cancel_arg);
-			if (err)
-				goto done;
+			if (!found_all_objects) {
+				err = load_commit(0, idset, idset_exclude,
+				    id, repo, seed, loose_obj_only,
+				    ncolored, nfound, ntrees,
+				    progress_cb, progress_arg, rl,
+				    cancel_cb, cancel_arg);
+				if (err)
+					goto done;
+			}
 		} else if (obj_type == GOT_OBJ_TYPE_TAG) {
 			err = load_tag(0, idset, idset_exclude, id, repo,
 			    seed, loose_obj_only, ncolored, nfound, ntrees,
@@ -1727,24 +1731,28 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
 		}
 	}
 
+	found_all_objects = 0;
 	err = find_pack_for_enumeration(&packidx, ids, nobj, repo);
 	if (err)
 		goto done;
 	if (packidx) {
-		err = load_packed_object_ids(ids, nobj, theirs, ntheirs, 1,
-		    seed, idset, idset_exclude, loose_obj_only, repo, packidx,
-		    ncolored, nfound, ntrees,
+		err = load_packed_object_ids(&found_all_objects, ids,
+		    nobj, theirs, ntheirs, 1, seed, idset, idset_exclude,
+		    loose_obj_only, repo, packidx, ncolored, nfound, ntrees,
 		    progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
 		if (err)
 			goto done;
 	}
 
-	for (i = 0; i < nobj; i++) {
-		err = load_commit(1, idset, idset_exclude, ids[i], repo,
-		    seed, loose_obj_only, ncolored, nfound, ntrees,
-		    progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
-		if (err)
-			goto done;
+	if (!found_all_objects) {
+		for (i = 0; i < nobj; i++) {
+			err = load_commit(1, idset, idset_exclude, ids[i],
+			    repo, seed, loose_obj_only, ncolored, nfound,
+			    ntrees, progress_cb, progress_arg, rl,
+			    cancel_cb, cancel_arg);
+			if (err)
+				goto done;
+		}
 	}
 
 	for (i = 0; i < nours; i++) {
blob - a63073982fd405c8f986e41f79764f7b0f74ee7c
blob + 215f0f780667828e32af82af77d46a6ef39accab
--- lib/privsep.c
+++ lib/privsep.c
@@ -2811,6 +2811,17 @@ got_privsep_send_object_enumeration_done(struct imsgbu
 }
 
 const struct got_error *
+got_privsep_send_object_enumeration_incomplete(struct imsgbuf *ibuf)
+{
+	if (imsg_compose(ibuf, GOT_IMSG_OBJECT_ENUMERATION_INCOMPLETE,
+	    0, 0, -1, NULL, 0) == -1)
+		return got_error_from_errno("imsg_compose "
+		    "OBJECT_ENUMERATION_INCOMPLETE");
+
+	return flush_imsg(ibuf);
+}
+
+const struct got_error *
 got_privsep_send_enumerated_commit(struct imsgbuf *ibuf,
     struct got_object_id *id, time_t mtime)
 {
@@ -2834,7 +2845,8 @@ got_privsep_send_enumerated_commit(struct imsgbuf *ibu
 }
 
 const struct got_error *
-got_privsep_recv_enumerated_objects(struct imsgbuf *ibuf,
+got_privsep_recv_enumerated_objects(int *found_all_objects,
+    struct imsgbuf *ibuf,
     got_object_enumerate_commit_cb cb_commit,
     got_object_enumerate_tree_cb cb_tree, void *cb_arg,
     struct got_repository *repo)
@@ -2853,6 +2865,7 @@ got_privsep_recv_enumerated_objects(struct imsgbuf *ib
 	int nentries = -1;
 	int done = 0;
 
+	*found_all_objects = 0;
 	memset(&tree, 0, sizeof(tree));
 
 	while (!done) {
@@ -3000,8 +3013,12 @@ got_privsep_recv_enumerated_objects(struct imsgbuf *ib
 			have_commit = 0;
 			break;
 		case GOT_IMSG_OBJECT_ENUMERATION_DONE:
+			*found_all_objects = 1;
 			done = 1;
 			break;
+		case GOT_IMSG_OBJECT_ENUMERATION_INCOMPLETE:
+			done = 1;
+			break;
 		default:
 			err = got_error(GOT_ERR_PRIVSEP_MSG);
 			break;
blob - 17f1fc1cc94a1d35ab873bd4b25914da0d483828
blob + 46804e966079831ecd0d13c64a0fbcbb0b18778a
--- libexec/got-read-pack/got-read-pack.c
+++ libexec/got-read-pack/got-read-pack.c
@@ -1408,8 +1408,10 @@ enumeration_request(struct imsg *imsg, struct imsgbuf 
 		}
 
 		idx = got_packidx_get_object_idx(packidx, &qid->id);
-		if (idx == -1)
+		if (idx == -1) {
+			have_all_entries = 0;
 			break;
+		}
 
 		err = open_object(&obj, pack, packidx, idx, &qid->id,
 		    objcache);
@@ -1430,8 +1432,10 @@ enumeration_request(struct imsg *imsg, struct imsgbuf 
 				goto done;
 			}
 			idx = got_packidx_get_object_idx(packidx, &tag->id);
-			if (idx == -1)
+			if (idx == -1) {
+				have_all_entries = 0;
 				break;
+			}
 			err = open_commit(&commit, pack, packidx, idx,
 			    &tag->id, objcache);
 			got_object_tag_close(tag);
@@ -1458,6 +1462,7 @@ enumeration_request(struct imsg *imsg, struct imsgbuf 
 		tree_id = got_object_commit_get_tree_id(commit);
 		idx = got_packidx_get_object_idx(packidx, tree_id);
 		if (idx == -1) {
+			have_all_entries = 0;
 			err = got_privsep_send_enumerated_tree(&totlen, ibuf,
 			    tree_id, "/", NULL, -1);
 			if (err)
@@ -1505,6 +1510,10 @@ enumeration_request(struct imsg *imsg, struct imsgbuf 
 		err = got_privsep_send_object_enumeration_done(ibuf);
 		if (err)
 			goto done;
+	} else {
+		err = got_privsep_send_object_enumeration_incomplete(ibuf);
+		if (err)
+			goto done;
 	}
 done:
 	if (obj)