"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Mark Jamsek <mark@jamsek.com>
Subject:
Re: simplify send.c for target refnames
To:
Josh Rickmar <jrick@zettaport.com>
Cc:
gameoftrees@openbsd.org
Date:
Thu, 06 Jul 2023 00:28:11 +1000

Download raw body.

Thread
Josh Rickmar <jrick@zettaport.com> wrote:
> On Wed, Jul 05, 2023 at 08:46:13AM -0400, Josh Rickmar wrote:
> > On Wed, Jul 05, 2023 at 08:38:09AM -0400, Josh Rickmar wrote:
> > > got_send_pack in send.c stuffed into a single reflist only to
> > > 
> > > 1) resolve these references multiple times over
> > > 2) convert the damn thing to a pathlist in the end because the reflist isn't
> > >    linked into the privsep handlers.
> > > 
> > > Building the pathlist directly seems like the saner approach.  This also
> > > moves the validation that the reference is 'sendable' (by got policy rules)
> > > to insert_ref.
> > > 
> > > The reference names are now provided directly as an insert_ref
> > > parameter instead of getting this from the resolved reference (which
> > > we now only do to check that it is not a symlink and is a valid type).
> > > This will simplify the cvg patch as it will allow us to put arbitrary
> > > target reference names into the pathlist, which correspond to object
> > > ids (pe->data).
> > > 
> > > Passes send.sh regress.
> > 
> > Fixed diff that resolves a double free (that regress didn't hit?)
> > 
> > our_ids points to object ids that were already freed when freeing the
> > have_refs pathlist.
> 
> Sorry for noise, but just noticed the freemask :)
> 
> Now the pathlist path and data elements actually get freed, and this
> also plugs a leak of the I believe this also plugs a leak of the
> their_refs path and data (their_ids contains copies that must still be
> freed).

I like this and think it's a good bit simpler! Just a couple minor nits
and comments inline.

> diff /home/jrick/src/got
> commit - 51b56d129f9e0504bdf70209c1011a5b077742a6
> path + /home/jrick/src/got
> blob - 2de21da09b554d15bc73174e05f9df16eaa3e746
> file + lib/send.c
> --- lib/send.c
> +++ lib/send.c
> @@ -1,6 +1,7 @@
>  /*
>   * Copyright (c) 2018, 2019 Ori Bernstein <ori@openbsd.org>
>   * Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
> + * Copyright (c) 2023 Josh Rickmar <jrick@zettaport.com>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -143,21 +144,50 @@ insert_ref(struct got_reflist_head *refs, const char *
>  }
>  
>  static const struct got_error *
> -insert_ref(struct got_reflist_head *refs, const char *refname,
> +insert_ref(struct got_pathlist_head *refs, const char *refname,
>      struct got_repository *repo)
>  {
>  	const struct got_error *err;
>  	struct got_reference *ref;
> -	struct got_reflist_entry *new;
> +	struct got_object_id *id = NULL;
> +	int obj_type;
>  
>  	err = got_ref_open(&ref, repo, refname, 0);
>  	if (err)
>  		return err;
>  
> -	err = got_reflist_insert(&new, refs, ref, got_ref_cmp_by_name, NULL);
> -	if (err || new == NULL /* duplicate */)
> +	if (got_ref_is_symbolic(ref)) {
> +		err = got_error_fmt(GOT_ERR_BAD_REF_TYPE,
> +		    "cannot send symbolic reference %s",
> +		    refname);

nit: can be reflowed without the wrap:

		err = got_error_fmt(GOT_ERR_BAD_REF_TYPE,
		    "cannot send symbolic reference %s", refname);

> +		goto done;
> +	}
> +
> +	err = got_ref_resolve(&id, repo, ref);
> +	if (err)
> +		goto done;
> +	err = got_object_get_type(&obj_type, repo, id);
> +	if (err)
> +		goto done;
> +	switch (obj_type) {
> +	case GOT_OBJ_TYPE_COMMIT:
> +	case GOT_OBJ_TYPE_TAG:
> +		break;
> +	default:
> +		err = got_error_fmt(GOT_ERR_OBJ_TYPE,
> +		    "cannot send %s", refname);
> +		goto done;
> +	}
> +
> +	err = got_pathlist_insert(NULL, refs, refname, id);
> +	if (err)
> +		goto done;
> +
> +	return NULL;
> +done:
> +	if (ref)
>  		got_ref_close(ref);
> -
> +	free(id);
>  	return err;
>  }

This made me look twice.

Rather than returning NULL before done and making done an exlusive error
cleanup, for consistency I think it would be better to stick with the
pattern:

done:
	if (err != NULL) {
		if (ref != NULL)
			got_ref_close(ref);
		free(id)
		return err;
	}
	return NULL:


Also, this is somewhat subjective but with moving the ref validation
logic into insert_ref(), maybe a name change as it no longer fits the
pattern of our insert routines (cf. insert_tree_entry()) that are
typically just dumb insertions. Maybe insert_valid_ref() or ...?

>  
> @@ -210,31 +240,14 @@ static struct got_reference *
>  	return NULL;
>  }
>  
> -static struct got_reference *
> -find_ref(struct got_reflist_head *refs, const char *refname)
> -{
> -	struct got_reflist_entry *re;
> -
> -	TAILQ_FOREACH(re, refs, entry) {
> -		if (got_path_cmp(got_ref_get_name(re->ref), refname,
> -		    strlen(got_ref_get_name(re->ref)),
> -		    strlen(refname)) == 0) {
> -			return re->ref;
> -		}
> -	}
> -
> -	return NULL;
> -}
> -
>  static struct got_pathlist_entry *
> -find_their_ref(struct got_pathlist_head *their_refs, const char *refname)
> +find_ref(struct got_pathlist_head *refs, const char *refname)
>  {
>  	struct got_pathlist_entry *pe;
>  
> -	TAILQ_FOREACH(pe, their_refs, entry) {
> -		const char *their_refname = pe->path;
> -		if (got_path_cmp(their_refname, refname,
> -		    strlen(their_refname), strlen(refname)) == 0) {
> +	TAILQ_FOREACH(pe, refs, entry) {
> +		if (got_path_cmp(pe->path, refname, strlen(pe->path),
> +		    strlen(refname)) == 0) {
>  			return pe;
>  		}
>  	}
> @@ -259,22 +272,18 @@ update_remote_ref(struct got_reference *my_ref, const 
>  }
>  
>  static const struct got_error *
> -update_remote_ref(struct got_reference *my_ref, const char *remote_name,
> +update_remote_ref(struct got_pathlist_entry *my_ref, const char *remote_name,
>      struct got_repository *repo)
>  {
>  	const struct got_error *err, *unlock_err;
> -	struct got_object_id *my_id;
> +	const char *refname = my_ref->path;
> +	struct got_object_id *my_id = my_ref->data;
>  	struct got_reference *ref = NULL;
>  	char *remote_refname = NULL;
>  	int ref_locked = 0;
>  
> -	err = got_ref_resolve(&my_id, repo, my_ref);
> +	err = get_remote_refname(&remote_refname, remote_name, refname);
>  	if (err)
> -		return err;
> -
> -	err = get_remote_refname(&remote_refname, remote_name,
> -	    got_ref_get_name(my_ref));
> -	if (err)
>  		goto done;
>  
>  	err = got_ref_open(&ref, repo, remote_refname, 1 /* lock */);
> @@ -301,7 +310,6 @@ done:
>  		}
>  		got_ref_close(ref);
>  	}
> -	free(my_id);
>  	free(remote_refname);
>  	return err;
>  }
> @@ -320,11 +328,9 @@ got_send_pack(const char *remote_name, struct got_path
>  	const struct got_error *err;
>  	struct imsgbuf sendibuf;
>  	pid_t sendpid = -1;
> -	struct got_reflist_head refs;
>  	struct got_pathlist_head have_refs;
>  	struct got_pathlist_head their_refs;
>  	struct got_pathlist_entry *pe;
> -	struct got_reflist_entry *re;
>  	struct got_object_id **our_ids = NULL;
>  	struct got_object_id **their_ids = NULL;
>  	int i, nours = 0, ntheirs = 0;
> @@ -336,7 +342,6 @@ got_send_pack(const char *remote_name, struct got_path
>  	int packfd = -1;
>  	FILE *delta_cache = NULL;
>  
> -	TAILQ_INIT(&refs);
>  	TAILQ_INIT(&have_refs);
>  	TAILQ_INIT(&their_refs);
>  
> @@ -348,10 +353,9 @@ got_send_pack(const char *remote_name, struct got_path
>  				err = got_error_from_errno("asprintf");
>  				goto done;
>  			}
> -			err = insert_ref(&refs, s, repo);
> -			free(s);
> +			err = insert_ref(&have_refs, s, repo);
>  		} else {
> -			err = insert_ref(&refs, branchname, repo);
> +			err = insert_ref(&have_refs, strdup(branchname), repo);
>  		}

Please check strdup for failure                      ^^^^^^^^^^^^^^^^^^

>  		if (err)
>  			goto done;
> @@ -359,13 +363,13 @@ got_send_pack(const char *remote_name, struct got_path
>  
>  	TAILQ_FOREACH(pe, delete_branches, entry) {
>  		const char *branchname = pe->path;
> -		struct got_reference *ref;
> +		struct got_pathlist_entry *ref;
>  		if (strncmp(branchname, "refs/heads/", 11) != 0) {
>  			err = got_error_fmt(GOT_ERR_SEND_DELETE_REF, "%s",
>  			    branchname);
>  			goto done;
>  		}
> -		ref = find_ref(&refs, branchname);
> +		ref = find_ref(&have_refs, branchname);
>  		if (ref) {
>  			err = got_error_fmt(GOT_ERR_SEND_DELETE_REF,
>  			    "changes on %s will be sent to server",
> @@ -382,49 +386,19 @@ got_send_pack(const char *remote_name, struct got_path
>  				err = got_error_from_errno("asprintf");
>  				goto done;
>  			}
> -			err = insert_ref(&refs, s, repo);
> -			free(s);
> +			err = insert_ref(&have_refs, s, repo);
>  		} else {
> -			err = insert_ref(&refs, tagname, repo);
> +			err = insert_ref(&have_refs, strdup(tagname), repo);

Likewise, strdup()                                   ^^^^^^^^^^^^^^^

>  		}
>  		if (err)
>  			goto done;
>  	}
>  
> -	if (TAILQ_EMPTY(&refs) && TAILQ_EMPTY(delete_branches)) {
> +	if (TAILQ_EMPTY(&have_refs) && TAILQ_EMPTY(delete_branches)) {
>  		err = got_error(GOT_ERR_SEND_EMPTY);
>  		goto done;
>  	}
>  
> -	TAILQ_FOREACH(re, &refs, entry) {
> -		struct got_object_id *id;
> -		int obj_type;
> -
> -		if (got_ref_is_symbolic(re->ref)) {
> -			err = got_error_fmt(GOT_ERR_BAD_REF_TYPE,
> -			    "cannot send symbolic reference %s",
> -			    got_ref_get_name(re->ref));
> -			goto done;
> -		}
> -
> -		err = got_ref_resolve(&id, repo, re->ref);
> -		if (err)
> -			goto done;
> -		err = got_object_get_type(&obj_type, repo, id);
> -		free(id);
> -		if (err)
> -			goto done;
> -		switch (obj_type) {
> -		case GOT_OBJ_TYPE_COMMIT:
> -		case GOT_OBJ_TYPE_TAG:
> -			break;
> -		default:
> -			err = got_error_fmt(GOT_ERR_OBJ_TYPE,
> -			    "cannot send %s", got_ref_get_name(re->ref));
> -			goto done;
> -		}
> -	}
> -
>  	packfd = got_opentempfd();
>  	if (packfd == -1) {
>  		err = got_error_from_errno("got_opentempfd");
> @@ -463,22 +437,12 @@ got_send_pack(const char *remote_name, struct got_path
>  	}
>  
>  	/*
> -	 * Convert reflist to pathlist since the privsep layer
> -	 * is linked into helper programs which lack reference.c.
> +	 * Also prepare the array of our object IDs which
> +	 * will be needed for generating a pack file.
>  	 */
> -	TAILQ_FOREACH(re, &refs, entry) {
> -		struct got_object_id *id;
> -		err = got_ref_resolve(&id, repo, re->ref);
> -		if (err)
> -			goto done;
> -		err = got_pathlist_append(&have_refs,
> -		    got_ref_get_name(re->ref), id);
> -		if (err)
> -			goto done;
> -		/*
> -		 * Also prepare the array of our object IDs which
> -		 * will be needed for generating a pack file.
> -		 */
> +	TAILQ_FOREACH(pe, &have_refs, entry) {
> +		struct got_object_id *id = pe->data;
> +
>  		err = realloc_ids(&our_ids, &nalloc_ours, nours + 1);
>  		if (err)
>  			goto done;
> @@ -507,7 +471,7 @@ got_send_pack(const char *remote_name, struct got_path
>  		struct got_object_id *their_id = pe->data;
>  		int have_their_id;
>  		struct got_object *obj;
> -		struct got_reference *my_ref = NULL;
> +		struct got_pathlist_entry *my_ref = NULL;
>  		int is_tag = 0;
>  
>  		/* Don't blindly trust the server to send us valid names. */
> @@ -521,23 +485,18 @@ got_send_pack(const char *remote_name, struct got_path
>  		 * Otherwise we can still use this reference as a hint to
>  		 * avoid uploading any objects the server already has.
>  		 */
> -		my_ref = find_ref(&refs, refname);
> +		my_ref = find_ref(&have_refs, refname);
>  		if (my_ref) {
> -			struct got_object_id *my_id;
> -			err = got_ref_resolve(&my_id, repo, my_ref);
> -			if (err)
> -				goto done;
> +			struct got_object_id *my_id = my_ref->data;
>  			if (got_object_id_cmp(my_id, their_id) != 0) {
>  				if (!overwrite_refs && is_tag) {
>  					err = got_error_fmt(
>  					    GOT_ERR_SEND_TAG_EXISTS,
>  					    "%s", refname);
> -					free(my_id);
>  					goto done;
>  				}
>  				refs_to_send++;
>  			}
> -			free(my_id);
>  		}
>  
>  		/* Check if their object exists locally. */
> @@ -563,14 +522,9 @@ got_send_pack(const char *remote_name, struct got_path
>  		if (have_their_id) {
>  			/* Enforce linear ancestry if required. */
>  			if (!overwrite_refs && my_ref && !is_tag) {
> -				struct got_object_id *my_id;
> -				err = got_ref_resolve(&my_id, repo, my_ref);
> -				if (err)
> -					goto done;
> +				struct got_object_id *my_id = my_ref->data;
>  				err = check_common_ancestry(refname, my_id,
>  				    their_id, repo, cancel_cb, cancel_arg);
> -				free(my_id);
> -				my_id = NULL;
>  				if (err)
>  					goto done;
>  			}
> @@ -609,16 +563,16 @@ got_send_pack(const char *remote_name, struct got_path
>  	}
>  
>  	/* Account for any new references we are going to upload. */
> -	TAILQ_FOREACH(re, &refs, entry) {
> -		if (find_their_ref(&their_refs,
> -		    got_ref_get_name(re->ref)) == NULL)
> +	TAILQ_FOREACH(pe, &have_refs, entry) {
> +		const char *ref = pe->path;
> +		if (find_ref(&their_refs, ref) == NULL)
>  			refs_to_send++;
>  	}
>  
>  	/* Account for any existing references we are going to delete. */
>  	TAILQ_FOREACH(pe, delete_branches, entry) {
>  		const char *branchname = pe->path;
> -		if (find_their_ref(&their_refs, branchname))
> +		if (find_ref(&their_refs, branchname))
>  			refs_to_delete++;
>  	}
>  
> @@ -670,12 +624,12 @@ got_send_pack(const char *remote_name, struct got_path
>  			goto done;
>  		if (refname && got_ref_name_is_valid(refname) && success &&
>  		    strncmp(refname, "refs/tags/", 10) != 0) {
> -			struct got_reference *my_ref;
> +			struct got_pathlist_entry *my_ref;
>  			/*
>  			 * The server has accepted our changes.
>  			 * Update our reference in refs/remotes/ accordingly.
>  			 */
> -			my_ref = find_ref(&refs, refname);
> +			my_ref = find_ref(&have_refs, refname);
>  			if (my_ref) {
>  				err = update_remote_ref(my_ref, remote_name,
>  				    repo);
> @@ -716,11 +670,11 @@ done:
>  	if (npackfd != -1 && close(npackfd) == -1 && err == NULL)
>  		err = got_error_from_errno("close");
>  
> -	got_ref_list_free(&refs);
> -	got_pathlist_free(&have_refs, GOT_PATHLIST_FREE_NONE);
> -	got_pathlist_free(&their_refs, GOT_PATHLIST_FREE_NONE);
> -	for (i = 0; i < nours; i++)
> -		free(our_ids[i]);
> +	got_pathlist_free(&have_refs,
> +		GOT_PATHLIST_FREE_PATH|GOT_PATHLIST_FREE_DATA);
> +	got_pathlist_free(&their_refs,
> +		GOT_PATHLIST_FREE_PATH|GOT_PATHLIST_FREE_DATA);

nit: they could be shortened to save wrapping with:

	got_pathlist_free(&have_refs, GOT_PATHLIST_FREE_ALL);
	got_pathlist_free(&their_refs, GOT_PATHLIST_FREE_ALL);

> +	/* our_ids object ids are owned by have_refs, and already freed */
>  	free(our_ids);
>  	for (i = 0; i < ntheirs; i++)
>  		free(their_ids[i]);


-- 
Mark Jamsek <https://bsdbox.org>
GPG: F2FF 13DE 6A06 C471 CA80  E6E2 2930 DC66 86EE CF68