"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: gotd repo_read want/have object ID storage
To:
Stefan Sperling <stsp@stsp.name>
Cc:
gameoftrees@openbsd.org
Date:
Mon, 27 Feb 2023 11:01:06 +0100

Download raw body.

Thread
On 2023/02/26 23:36:11 +0100, Stefan Sperling <stsp@stsp.name> wrote:
> Make gotd repo_read store want/have commit IDs in ID sets rather than arrays.
> 
> Currently only used to detect and avoid storing duplicate IDs sent in want
> and have lines by the client. If in the future we ever wanted to check which
> IDs the client has already sent us we could now do O(1) hash table lookups
> rather than iterating arrays.
> 
> I believe the latter might become interesting if we ever decided to implement
> multi-ack protocol extensions, which would require actually walking the
> commit graph instead of the dumb object existence checks we do now.

don't know the cons of supporting multi-ack server-side, but it'd be
really interesting to do!

> ok?

I like the move.  Diff reads fine and regress is still passing, ok op@

> diff c3b9af18468be6672f62e2d321b5fc2893837c62 bacac23829beff55f09483f7de22f11ee4d3c1c0
> commit - c3b9af18468be6672f62e2d321b5fc2893837c62
> commit + bacac23829beff55f09483f7de22f11ee4d3c1c0
> blob - fa9d0fe7c98493ac9d2d6fa32f34baa5310f2e37
> blob + 378a3faff5b94e566a97bc638b8a0b4abdb6ad22
> --- gotd/repo_read.c
> +++ gotd/repo_read.c
> @@ -71,8 +71,8 @@ static struct repo_read_client {
>  	int				 delta_cache_fd;
>  	int				 report_progress;
>  	int				 pack_pipe;
> -	struct gotd_object_id_array	 want_ids;
> -	struct gotd_object_id_array	 have_ids;
> +	struct got_object_idset		*want_ids;
> +	struct got_object_idset		*have_ids;
>  } repo_read_client;
>  
>  static volatile sig_atomic_t sigint_received;
> @@ -373,8 +373,9 @@ record_object_id(struct gotd_object_id_array *array, s
>  }
>  
>  static const struct got_error *
> -record_object_id(struct gotd_object_id_array *array, struct got_object_id *id)
> +append_object_id(struct got_object_id *id, void *data, void *arg)
>  {
> +	struct gotd_object_id_array *array = arg;
>  	const size_t alloc_chunksz = 256;
>  
>  	if (array->ids == NULL) {
> @@ -394,27 +395,11 @@ record_object_id(struct gotd_object_id_array *array, s
>  		array->nalloc += alloc_chunksz;
>  	}
>  
> -	array->ids[array->nids] = got_object_id_dup(id);
> -	if (array->ids[array->nids] == NULL)
> -		return got_error_from_errno("got_object_id_dup");
> +	array->ids[array->nids] = id;
>  	array->nids++;
>  	return NULL;
>  }
>  
> -static void
> -free_object_ids(struct gotd_object_id_array *array)
> -{
> -	size_t i;
> -
> -	for (i = 0; i < array->nids; i++)
> -		free(array->ids[i]);
> -	free(array->ids);
> -
> -	array->ids = NULL;
> -	array->nalloc = 0;
> -	array->nids = 0;
> -}
> -
>  static const struct got_error *
>  recv_want(struct imsg *imsg)
>  {
> @@ -449,9 +434,11 @@ recv_want(struct imsg *imsg)
>  	    obj_type != GOT_OBJ_TYPE_TAG)
>  		return got_error(GOT_ERR_OBJ_TYPE);
>  
> -	err = record_object_id(&client->want_ids, &id);
> -	if (err)
> -		return err;
> +	if (!got_object_idset_contains(client->want_ids, &id)) {
> +		err = got_object_idset_add(client->want_ids, &id, NULL);
> +		if (err)
> +			return err;
> +	}
>  
>  	gotd_imsg_send_ack(&id, &ibuf, PROC_REPO_READ, repo_read.pid);
>  	imsg_clear(&ibuf);
> @@ -501,9 +488,11 @@ recv_have(struct imsg *imsg)
>  		goto done;
>  	}
>  
> -	err = record_object_id(&client->have_ids, &id);
> -	if (err)
> -		return err;
> +	if (!got_object_idset_contains(client->have_ids, &id)) {
> +		err = got_object_idset_add(client->have_ids, &id, NULL);
> +		if (err)
> +			goto done;
> +	}
>  
>  	gotd_imsg_send_ack(&id, &ibuf, PROC_REPO_READ, repo_read.pid);
>  done:
> @@ -624,9 +613,14 @@ send_packfile(struct imsg *imsg, struct gotd_imsgev *i
>  	struct imsgbuf ibuf;
>  	struct repo_read_pack_progress_arg pa;
>  	struct got_ratelimit rl;
> +	struct gotd_object_id_array want_ids;
> +	struct gotd_object_id_array have_ids;
>  
>  	log_debug("packfile request received");
>  
> +	memset(&want_ids, 0, sizeof(want_ids));
> +	memset(&have_ids, 0, sizeof(have_ids));
> +
>  	got_ratelimit_init(&rl, 2, 0);
>  
>  	if (client->delta_cache_fd == -1 || client->pack_pipe == -1)
> @@ -645,9 +639,17 @@ send_packfile(struct imsg *imsg, struct gotd_imsgev *i
>  	pa.ibuf = &ibuf;
>  	pa.report_progress = client->report_progress;
>  
> +	err = got_object_idset_for_each(client->want_ids,
> +	    append_object_id, &want_ids);
> +	if (err)
> +		goto done;
> +	err = got_object_idset_for_each(client->have_ids,
> +	   append_object_id, &have_ids);
> +	if (err)
> +		goto done;
> +
>  	err = got_pack_create(packsha1, client->pack_pipe, delta_cache,
> -	    client->have_ids.ids, client->have_ids.nids,
> -	    client->want_ids.ids, client->want_ids.nids,
> +	    have_ids.ids, have_ids.nids, want_ids.ids, want_ids.nids,
>  	    repo_read.repo, 0, 1, 0, pack_progress, &pa, &rl,
>  	    check_cancelled, NULL);
>  	if (err)
> @@ -666,6 +668,8 @@ done:
>  	if (delta_cache != NULL && fclose(delta_cache) == EOF && err == NULL)
>  		err = got_error_from_errno("fclose");
>  	imsg_clear(&ibuf);
> +	free(want_ids.ids);
> +	free(have_ids.ids);
>  	return err;
>  }
>  
> @@ -867,6 +871,16 @@ repo_read_main(const char *title, const char *repo_pat
>  	client->fd = -1;
>  	client->delta_cache_fd = -1;
>  	client->pack_pipe = -1;
> +	client->have_ids = got_object_idset_alloc();
> +	if (client->have_ids == NULL) {
> +		err = got_error_from_errno("got_object_idset_alloc");
> +		goto done;
> +	}
> +	client->want_ids = got_object_idset_alloc();
> +	if (client->want_ids == NULL) {
> +		err = got_error_from_errno("got_object_idset_alloc");
> +		goto done;
> +	}
>  
>  	repo_read.title = title;
>  	repo_read.pid = getpid();
> @@ -917,8 +931,10 @@ repo_read_shutdown(void)
>  
>  	log_debug("%s: shutting down", repo_read.title);
>  
> -	free_object_ids(&client->have_ids);
> -	free_object_ids(&client->want_ids);
> +	if (client->have_ids)
> +		got_object_idset_free(client->have_ids);
> +	if (client->want_ids)
> +		got_object_idset_free(client->want_ids);
>  	if (client->fd != -1)
>  		close(client->fd);
>  	if (client->delta_cache_fd != -1)