From: Mark Jamsek Subject: Re: simplify send.c for target refnames To: Josh Rickmar Cc: gameoftrees@openbsd.org Date: Thu, 06 Jul 2023 00:28:11 +1000 Josh Rickmar wrote: > On Wed, Jul 05, 2023 at 08:46:13AM -0400, Josh Rickmar wrote: > > On Wed, Jul 05, 2023 at 08:38:09AM -0400, Josh Rickmar wrote: > > > got_send_pack in send.c stuffed into a single reflist only to > > > > > > 1) resolve these references multiple times over > > > 2) convert the damn thing to a pathlist in the end because the reflist isn't > > > linked into the privsep handlers. > > > > > > Building the pathlist directly seems like the saner approach. This also > > > moves the validation that the reference is 'sendable' (by got policy rules) > > > to insert_ref. > > > > > > The reference names are now provided directly as an insert_ref > > > parameter instead of getting this from the resolved reference (which > > > we now only do to check that it is not a symlink and is a valid type). > > > This will simplify the cvg patch as it will allow us to put arbitrary > > > target reference names into the pathlist, which correspond to object > > > ids (pe->data). > > > > > > Passes send.sh regress. > > > > Fixed diff that resolves a double free (that regress didn't hit?) > > > > our_ids points to object ids that were already freed when freeing the > > have_refs pathlist. > > Sorry for noise, but just noticed the freemask :) > > Now the pathlist path and data elements actually get freed, and this > also plugs a leak of the I believe this also plugs a leak of the > their_refs path and data (their_ids contains copies that must still be > freed). I like this and think it's a good bit simpler! Just a couple minor nits and comments inline. > diff /home/jrick/src/got > commit - 51b56d129f9e0504bdf70209c1011a5b077742a6 > path + /home/jrick/src/got > blob - 2de21da09b554d15bc73174e05f9df16eaa3e746 > file + lib/send.c > --- lib/send.c > +++ lib/send.c > @@ -1,6 +1,7 @@ > /* > * Copyright (c) 2018, 2019 Ori Bernstein > * Copyright (c) 2021 Stefan Sperling > + * Copyright (c) 2023 Josh Rickmar > * > * Permission to use, copy, modify, and distribute this software for any > * purpose with or without fee is hereby granted, provided that the above > @@ -143,21 +144,50 @@ insert_ref(struct got_reflist_head *refs, const char * > } > > static const struct got_error * > -insert_ref(struct got_reflist_head *refs, const char *refname, > +insert_ref(struct got_pathlist_head *refs, const char *refname, > struct got_repository *repo) > { > const struct got_error *err; > struct got_reference *ref; > - struct got_reflist_entry *new; > + struct got_object_id *id = NULL; > + int obj_type; > > err = got_ref_open(&ref, repo, refname, 0); > if (err) > return err; > > - err = got_reflist_insert(&new, refs, ref, got_ref_cmp_by_name, NULL); > - if (err || new == NULL /* duplicate */) > + if (got_ref_is_symbolic(ref)) { > + err = got_error_fmt(GOT_ERR_BAD_REF_TYPE, > + "cannot send symbolic reference %s", > + refname); nit: can be reflowed without the wrap: err = got_error_fmt(GOT_ERR_BAD_REF_TYPE, "cannot send symbolic reference %s", refname); > + goto done; > + } > + > + err = got_ref_resolve(&id, repo, ref); > + if (err) > + goto done; > + err = got_object_get_type(&obj_type, repo, id); > + if (err) > + goto done; > + switch (obj_type) { > + case GOT_OBJ_TYPE_COMMIT: > + case GOT_OBJ_TYPE_TAG: > + break; > + default: > + err = got_error_fmt(GOT_ERR_OBJ_TYPE, > + "cannot send %s", refname); > + goto done; > + } > + > + err = got_pathlist_insert(NULL, refs, refname, id); > + if (err) > + goto done; > + > + return NULL; > +done: > + if (ref) > got_ref_close(ref); > - > + free(id); > return err; > } This made me look twice. Rather than returning NULL before done and making done an exlusive error cleanup, for consistency I think it would be better to stick with the pattern: done: if (err != NULL) { if (ref != NULL) got_ref_close(ref); free(id) return err; } return NULL: Also, this is somewhat subjective but with moving the ref validation logic into insert_ref(), maybe a name change as it no longer fits the pattern of our insert routines (cf. insert_tree_entry()) that are typically just dumb insertions. Maybe insert_valid_ref() or ...? > > @@ -210,31 +240,14 @@ static struct got_reference * > return NULL; > } > > -static struct got_reference * > -find_ref(struct got_reflist_head *refs, const char *refname) > -{ > - struct got_reflist_entry *re; > - > - TAILQ_FOREACH(re, refs, entry) { > - if (got_path_cmp(got_ref_get_name(re->ref), refname, > - strlen(got_ref_get_name(re->ref)), > - strlen(refname)) == 0) { > - return re->ref; > - } > - } > - > - return NULL; > -} > - > static struct got_pathlist_entry * > -find_their_ref(struct got_pathlist_head *their_refs, const char *refname) > +find_ref(struct got_pathlist_head *refs, const char *refname) > { > struct got_pathlist_entry *pe; > > - TAILQ_FOREACH(pe, their_refs, entry) { > - const char *their_refname = pe->path; > - if (got_path_cmp(their_refname, refname, > - strlen(their_refname), strlen(refname)) == 0) { > + TAILQ_FOREACH(pe, refs, entry) { > + if (got_path_cmp(pe->path, refname, strlen(pe->path), > + strlen(refname)) == 0) { > return pe; > } > } > @@ -259,22 +272,18 @@ update_remote_ref(struct got_reference *my_ref, const > } > > static const struct got_error * > -update_remote_ref(struct got_reference *my_ref, const char *remote_name, > +update_remote_ref(struct got_pathlist_entry *my_ref, const char *remote_name, > struct got_repository *repo) > { > const struct got_error *err, *unlock_err; > - struct got_object_id *my_id; > + const char *refname = my_ref->path; > + struct got_object_id *my_id = my_ref->data; > struct got_reference *ref = NULL; > char *remote_refname = NULL; > int ref_locked = 0; > > - err = got_ref_resolve(&my_id, repo, my_ref); > + err = get_remote_refname(&remote_refname, remote_name, refname); > if (err) > - return err; > - > - err = get_remote_refname(&remote_refname, remote_name, > - got_ref_get_name(my_ref)); > - if (err) > goto done; > > err = got_ref_open(&ref, repo, remote_refname, 1 /* lock */); > @@ -301,7 +310,6 @@ done: > } > got_ref_close(ref); > } > - free(my_id); > free(remote_refname); > return err; > } > @@ -320,11 +328,9 @@ got_send_pack(const char *remote_name, struct got_path > const struct got_error *err; > struct imsgbuf sendibuf; > pid_t sendpid = -1; > - struct got_reflist_head refs; > struct got_pathlist_head have_refs; > struct got_pathlist_head their_refs; > struct got_pathlist_entry *pe; > - struct got_reflist_entry *re; > struct got_object_id **our_ids = NULL; > struct got_object_id **their_ids = NULL; > int i, nours = 0, ntheirs = 0; > @@ -336,7 +342,6 @@ got_send_pack(const char *remote_name, struct got_path > int packfd = -1; > FILE *delta_cache = NULL; > > - TAILQ_INIT(&refs); > TAILQ_INIT(&have_refs); > TAILQ_INIT(&their_refs); > > @@ -348,10 +353,9 @@ got_send_pack(const char *remote_name, struct got_path > err = got_error_from_errno("asprintf"); > goto done; > } > - err = insert_ref(&refs, s, repo); > - free(s); > + err = insert_ref(&have_refs, s, repo); > } else { > - err = insert_ref(&refs, branchname, repo); > + err = insert_ref(&have_refs, strdup(branchname), repo); > } Please check strdup for failure ^^^^^^^^^^^^^^^^^^ > if (err) > goto done; > @@ -359,13 +363,13 @@ got_send_pack(const char *remote_name, struct got_path > > TAILQ_FOREACH(pe, delete_branches, entry) { > const char *branchname = pe->path; > - struct got_reference *ref; > + struct got_pathlist_entry *ref; > if (strncmp(branchname, "refs/heads/", 11) != 0) { > err = got_error_fmt(GOT_ERR_SEND_DELETE_REF, "%s", > branchname); > goto done; > } > - ref = find_ref(&refs, branchname); > + ref = find_ref(&have_refs, branchname); > if (ref) { > err = got_error_fmt(GOT_ERR_SEND_DELETE_REF, > "changes on %s will be sent to server", > @@ -382,49 +386,19 @@ got_send_pack(const char *remote_name, struct got_path > err = got_error_from_errno("asprintf"); > goto done; > } > - err = insert_ref(&refs, s, repo); > - free(s); > + err = insert_ref(&have_refs, s, repo); > } else { > - err = insert_ref(&refs, tagname, repo); > + err = insert_ref(&have_refs, strdup(tagname), repo); Likewise, strdup() ^^^^^^^^^^^^^^^ > } > if (err) > goto done; > } > > - if (TAILQ_EMPTY(&refs) && TAILQ_EMPTY(delete_branches)) { > + if (TAILQ_EMPTY(&have_refs) && TAILQ_EMPTY(delete_branches)) { > err = got_error(GOT_ERR_SEND_EMPTY); > goto done; > } > > - TAILQ_FOREACH(re, &refs, entry) { > - struct got_object_id *id; > - int obj_type; > - > - if (got_ref_is_symbolic(re->ref)) { > - err = got_error_fmt(GOT_ERR_BAD_REF_TYPE, > - "cannot send symbolic reference %s", > - got_ref_get_name(re->ref)); > - goto done; > - } > - > - err = got_ref_resolve(&id, repo, re->ref); > - if (err) > - goto done; > - err = got_object_get_type(&obj_type, repo, id); > - free(id); > - if (err) > - goto done; > - switch (obj_type) { > - case GOT_OBJ_TYPE_COMMIT: > - case GOT_OBJ_TYPE_TAG: > - break; > - default: > - err = got_error_fmt(GOT_ERR_OBJ_TYPE, > - "cannot send %s", got_ref_get_name(re->ref)); > - goto done; > - } > - } > - > packfd = got_opentempfd(); > if (packfd == -1) { > err = got_error_from_errno("got_opentempfd"); > @@ -463,22 +437,12 @@ got_send_pack(const char *remote_name, struct got_path > } > > /* > - * Convert reflist to pathlist since the privsep layer > - * is linked into helper programs which lack reference.c. > + * Also prepare the array of our object IDs which > + * will be needed for generating a pack file. > */ > - TAILQ_FOREACH(re, &refs, entry) { > - struct got_object_id *id; > - err = got_ref_resolve(&id, repo, re->ref); > - if (err) > - goto done; > - err = got_pathlist_append(&have_refs, > - got_ref_get_name(re->ref), id); > - if (err) > - goto done; > - /* > - * Also prepare the array of our object IDs which > - * will be needed for generating a pack file. > - */ > + TAILQ_FOREACH(pe, &have_refs, entry) { > + struct got_object_id *id = pe->data; > + > err = realloc_ids(&our_ids, &nalloc_ours, nours + 1); > if (err) > goto done; > @@ -507,7 +471,7 @@ got_send_pack(const char *remote_name, struct got_path > struct got_object_id *their_id = pe->data; > int have_their_id; > struct got_object *obj; > - struct got_reference *my_ref = NULL; > + struct got_pathlist_entry *my_ref = NULL; > int is_tag = 0; > > /* Don't blindly trust the server to send us valid names. */ > @@ -521,23 +485,18 @@ got_send_pack(const char *remote_name, struct got_path > * Otherwise we can still use this reference as a hint to > * avoid uploading any objects the server already has. > */ > - my_ref = find_ref(&refs, refname); > + my_ref = find_ref(&have_refs, refname); > if (my_ref) { > - struct got_object_id *my_id; > - err = got_ref_resolve(&my_id, repo, my_ref); > - if (err) > - goto done; > + struct got_object_id *my_id = my_ref->data; > if (got_object_id_cmp(my_id, their_id) != 0) { > if (!overwrite_refs && is_tag) { > err = got_error_fmt( > GOT_ERR_SEND_TAG_EXISTS, > "%s", refname); > - free(my_id); > goto done; > } > refs_to_send++; > } > - free(my_id); > } > > /* Check if their object exists locally. */ > @@ -563,14 +522,9 @@ got_send_pack(const char *remote_name, struct got_path > if (have_their_id) { > /* Enforce linear ancestry if required. */ > if (!overwrite_refs && my_ref && !is_tag) { > - struct got_object_id *my_id; > - err = got_ref_resolve(&my_id, repo, my_ref); > - if (err) > - goto done; > + struct got_object_id *my_id = my_ref->data; > err = check_common_ancestry(refname, my_id, > their_id, repo, cancel_cb, cancel_arg); > - free(my_id); > - my_id = NULL; > if (err) > goto done; > } > @@ -609,16 +563,16 @@ got_send_pack(const char *remote_name, struct got_path > } > > /* Account for any new references we are going to upload. */ > - TAILQ_FOREACH(re, &refs, entry) { > - if (find_their_ref(&their_refs, > - got_ref_get_name(re->ref)) == NULL) > + TAILQ_FOREACH(pe, &have_refs, entry) { > + const char *ref = pe->path; > + if (find_ref(&their_refs, ref) == NULL) > refs_to_send++; > } > > /* Account for any existing references we are going to delete. */ > TAILQ_FOREACH(pe, delete_branches, entry) { > const char *branchname = pe->path; > - if (find_their_ref(&their_refs, branchname)) > + if (find_ref(&their_refs, branchname)) > refs_to_delete++; > } > > @@ -670,12 +624,12 @@ got_send_pack(const char *remote_name, struct got_path > goto done; > if (refname && got_ref_name_is_valid(refname) && success && > strncmp(refname, "refs/tags/", 10) != 0) { > - struct got_reference *my_ref; > + struct got_pathlist_entry *my_ref; > /* > * The server has accepted our changes. > * Update our reference in refs/remotes/ accordingly. > */ > - my_ref = find_ref(&refs, refname); > + my_ref = find_ref(&have_refs, refname); > if (my_ref) { > err = update_remote_ref(my_ref, remote_name, > repo); > @@ -716,11 +670,11 @@ done: > if (npackfd != -1 && close(npackfd) == -1 && err == NULL) > err = got_error_from_errno("close"); > > - got_ref_list_free(&refs); > - got_pathlist_free(&have_refs, GOT_PATHLIST_FREE_NONE); > - got_pathlist_free(&their_refs, GOT_PATHLIST_FREE_NONE); > - for (i = 0; i < nours; i++) > - free(our_ids[i]); > + got_pathlist_free(&have_refs, > + GOT_PATHLIST_FREE_PATH|GOT_PATHLIST_FREE_DATA); > + got_pathlist_free(&their_refs, > + GOT_PATHLIST_FREE_PATH|GOT_PATHLIST_FREE_DATA); nit: they could be shortened to save wrapping with: got_pathlist_free(&have_refs, GOT_PATHLIST_FREE_ALL); got_pathlist_free(&their_refs, GOT_PATHLIST_FREE_ALL); > + /* our_ids object ids are owned by have_refs, and already freed */ > free(our_ids); > for (i = 0; i < ntheirs; i++) > free(their_ids[i]); -- Mark Jamsek GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68