"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
gotd repo_read want/have object ID storage
To:
gameoftrees@openbsd.org
Date:
Sun, 26 Feb 2023 23:36:11 +0100

Download raw body.

Thread
Make gotd repo_read store want/have commit IDs in ID sets rather than arrays.

Currently only used to detect and avoid storing duplicate IDs sent in want
and have lines by the client. If in the future we ever wanted to check which
IDs the client has already sent us we could now do O(1) hash table lookups
rather than iterating arrays.

I believe the latter might become interesting if we ever decided to implement
multi-ack protocol extensions, which would require actually walking the
commit graph instead of the dumb object existence checks we do now.

ok?

diff c3b9af18468be6672f62e2d321b5fc2893837c62 bacac23829beff55f09483f7de22f11ee4d3c1c0
commit - c3b9af18468be6672f62e2d321b5fc2893837c62
commit + bacac23829beff55f09483f7de22f11ee4d3c1c0
blob - fa9d0fe7c98493ac9d2d6fa32f34baa5310f2e37
blob + 378a3faff5b94e566a97bc638b8a0b4abdb6ad22
--- gotd/repo_read.c
+++ gotd/repo_read.c
@@ -71,8 +71,8 @@ static struct repo_read_client {
 	int				 delta_cache_fd;
 	int				 report_progress;
 	int				 pack_pipe;
-	struct gotd_object_id_array	 want_ids;
-	struct gotd_object_id_array	 have_ids;
+	struct got_object_idset		*want_ids;
+	struct got_object_idset		*have_ids;
 } repo_read_client;
 
 static volatile sig_atomic_t sigint_received;
@@ -373,8 +373,9 @@ record_object_id(struct gotd_object_id_array *array, s
 }
 
 static const struct got_error *
-record_object_id(struct gotd_object_id_array *array, struct got_object_id *id)
+append_object_id(struct got_object_id *id, void *data, void *arg)
 {
+	struct gotd_object_id_array *array = arg;
 	const size_t alloc_chunksz = 256;
 
 	if (array->ids == NULL) {
@@ -394,27 +395,11 @@ record_object_id(struct gotd_object_id_array *array, s
 		array->nalloc += alloc_chunksz;
 	}
 
-	array->ids[array->nids] = got_object_id_dup(id);
-	if (array->ids[array->nids] == NULL)
-		return got_error_from_errno("got_object_id_dup");
+	array->ids[array->nids] = id;
 	array->nids++;
 	return NULL;
 }
 
-static void
-free_object_ids(struct gotd_object_id_array *array)
-{
-	size_t i;
-
-	for (i = 0; i < array->nids; i++)
-		free(array->ids[i]);
-	free(array->ids);
-
-	array->ids = NULL;
-	array->nalloc = 0;
-	array->nids = 0;
-}
-
 static const struct got_error *
 recv_want(struct imsg *imsg)
 {
@@ -449,9 +434,11 @@ recv_want(struct imsg *imsg)
 	    obj_type != GOT_OBJ_TYPE_TAG)
 		return got_error(GOT_ERR_OBJ_TYPE);
 
-	err = record_object_id(&client->want_ids, &id);
-	if (err)
-		return err;
+	if (!got_object_idset_contains(client->want_ids, &id)) {
+		err = got_object_idset_add(client->want_ids, &id, NULL);
+		if (err)
+			return err;
+	}
 
 	gotd_imsg_send_ack(&id, &ibuf, PROC_REPO_READ, repo_read.pid);
 	imsg_clear(&ibuf);
@@ -501,9 +488,11 @@ recv_have(struct imsg *imsg)
 		goto done;
 	}
 
-	err = record_object_id(&client->have_ids, &id);
-	if (err)
-		return err;
+	if (!got_object_idset_contains(client->have_ids, &id)) {
+		err = got_object_idset_add(client->have_ids, &id, NULL);
+		if (err)
+			goto done;
+	}
 
 	gotd_imsg_send_ack(&id, &ibuf, PROC_REPO_READ, repo_read.pid);
 done:
@@ -624,9 +613,14 @@ send_packfile(struct imsg *imsg, struct gotd_imsgev *i
 	struct imsgbuf ibuf;
 	struct repo_read_pack_progress_arg pa;
 	struct got_ratelimit rl;
+	struct gotd_object_id_array want_ids;
+	struct gotd_object_id_array have_ids;
 
 	log_debug("packfile request received");
 
+	memset(&want_ids, 0, sizeof(want_ids));
+	memset(&have_ids, 0, sizeof(have_ids));
+
 	got_ratelimit_init(&rl, 2, 0);
 
 	if (client->delta_cache_fd == -1 || client->pack_pipe == -1)
@@ -645,9 +639,17 @@ send_packfile(struct imsg *imsg, struct gotd_imsgev *i
 	pa.ibuf = &ibuf;
 	pa.report_progress = client->report_progress;
 
+	err = got_object_idset_for_each(client->want_ids,
+	    append_object_id, &want_ids);
+	if (err)
+		goto done;
+	err = got_object_idset_for_each(client->have_ids,
+	   append_object_id, &have_ids);
+	if (err)
+		goto done;
+
 	err = got_pack_create(packsha1, client->pack_pipe, delta_cache,
-	    client->have_ids.ids, client->have_ids.nids,
-	    client->want_ids.ids, client->want_ids.nids,
+	    have_ids.ids, have_ids.nids, want_ids.ids, want_ids.nids,
 	    repo_read.repo, 0, 1, 0, pack_progress, &pa, &rl,
 	    check_cancelled, NULL);
 	if (err)
@@ -666,6 +668,8 @@ done:
 	if (delta_cache != NULL && fclose(delta_cache) == EOF && err == NULL)
 		err = got_error_from_errno("fclose");
 	imsg_clear(&ibuf);
+	free(want_ids.ids);
+	free(have_ids.ids);
 	return err;
 }
 
@@ -867,6 +871,16 @@ repo_read_main(const char *title, const char *repo_pat
 	client->fd = -1;
 	client->delta_cache_fd = -1;
 	client->pack_pipe = -1;
+	client->have_ids = got_object_idset_alloc();
+	if (client->have_ids == NULL) {
+		err = got_error_from_errno("got_object_idset_alloc");
+		goto done;
+	}
+	client->want_ids = got_object_idset_alloc();
+	if (client->want_ids == NULL) {
+		err = got_error_from_errno("got_object_idset_alloc");
+		goto done;
+	}
 
 	repo_read.title = title;
 	repo_read.pid = getpid();
@@ -917,8 +931,10 @@ repo_read_shutdown(void)
 
 	log_debug("%s: shutting down", repo_read.title);
 
-	free_object_ids(&client->have_ids);
-	free_object_ids(&client->want_ids);
+	if (client->have_ids)
+		got_object_idset_free(client->have_ids);
+	if (client->want_ids)
+		got_object_idset_free(client->want_ids);
 	if (client->fd != -1)
 		close(client->fd);
 	if (client->delta_cache_fd != -1)