Download raw body.
two pack file creation fixes
On Thu, Jan 26, 2023 at 10:16:56PM +0100, Stefan Sperling wrote:
> Two patches below fix two issues in got-read-pack.
> The first patch is a bug fix, the second is a small performance fix.
>
> Regarding the bug:
>
> Using 'got send' to send the entire history of a branch to an empty
> repository can result in some objects missing. A gotd server will
> end up with a broken repository. I suspect git-daemon would catch
> this problem, but I haven't tried to trigger this with git-daemon yet.
>
> To reproduce this, run:
> got clone ssh://git@github.com/yellowman/nsh
> to get this particular repo from Github which happens to trigger the bug.
>
> Then set up a fresh repository on a gotd server and add it as a new remote
> to the nsh repo cloned from github. Now use 'got send' to try to populate
> this empty repository on the server. This will apparently succeed, but if
> you now try to run e.g. 'got log' on the repository served by gotd, it
> will erorr out because some commits will be missing.
>
> Notably, in this situation the source repository has just one pack file
> and no loose objects, so everything will be enumerated by got-read-pack.
> In situations where not all objects can be enumerated from a single pack
> file this bug won't trigger because we'll load the missing commits in
> secondary pass.
>
> ok?
ok
>
> -----------------------------------------------
>
> fix missing commits in pack files created with packed object enumeration
>
> got-read-pack forgot to send a tree-enumeration-done message to the
> main process if the tree of a given commit had already been traversed.
> The main process would then not add the corresponding commit to the
> pack file, even though it should be added.
>
> Found while using 'got send' towards gotd in order to populate an
> empty repository on the server with non-trivial history, where some
> commits always ended up missing due to this bug.
>
> diff 87332a0e6e71182f1fb148d7f42e78105713285b 74aa0dc01f1f739557393098718e63131e75acd7
> commit - 87332a0e6e71182f1fb148d7f42e78105713285b
> commit + 74aa0dc01f1f739557393098718e63131e75acd7
> blob - dd9fcfea23f63268577c4d5b91298a5a5106c61a
> blob + 63ea6eca0b02d1b08d712fe7e8163f04098f3934
> --- libexec/got-read-pack/got-read-pack.c
> +++ libexec/got-read-pack/got-read-pack.c
> @@ -1471,6 +1471,9 @@ enumeration_request(struct imsg *imsg, struct imsgbuf
> }
>
> if (got_object_idset_contains(idset, tree_id)) {
> + err = send_tree_enumeration_done(ibuf);
> + if (err)
> + goto done;
> got_object_qid_free(qid);
> qid = NULL;
> continue;
>
> -----------------------------------------------
>
> avoid traversing enumrated commits more than once in got-read-pack
>
> Keep track of parent commits that will be processed as part
> of looping over the commit queue provided by the main process,
> and do not add these commits to the queue again.
>
> Fixes pointless traversal of commits on the queue which will
> simply be skipped. The end result is the same either way.
>
> diff 74aa0dc01f1f739557393098718e63131e75acd7 00f162a8d0597c71a98d1429cb6ed38f13747d01
> commit - 74aa0dc01f1f739557393098718e63131e75acd7
> commit + 00f162a8d0597c71a98d1429cb6ed38f13747d01
> blob - 63ea6eca0b02d1b08d712fe7e8163f04098f3934
> blob + a64b44bf058deda0f69f2cd1ff50d1f307b0ffdf
> --- libexec/got-read-pack/got-read-pack.c
> +++ libexec/got-read-pack/got-read-pack.c
> @@ -1019,7 +1019,8 @@ recv_object_id_queue(struct got_object_id_queue *queue
> }
>
> static const struct got_error *
> -recv_object_id_queue(struct got_object_id_queue *queue, struct imsgbuf *ibuf)
> +recv_object_id_queue(struct got_object_id_queue *queue,
> + struct got_object_idset *queued_ids, struct imsgbuf *ibuf)
> {
> const struct got_error *err = NULL;
> int done = 0;
> @@ -1037,6 +1038,9 @@ recv_object_id_queue(struct got_object_id_queue *queue
> return err;
> memcpy(&qid->id, &ids[i], sizeof(qid->id));
> STAILQ_INSERT_TAIL(queue, qid, entry);
> + err = got_object_idset_add(queued_ids, &qid->id, NULL);
> + if (err)
> + return err;
> }
> }
>
> @@ -1362,7 +1366,7 @@ enumeration_request(struct imsg *imsg, struct imsgbuf
> struct got_commit_object *commit = NULL;
> struct got_object_id *tree_id = NULL;
> size_t totlen = 0;
> - struct got_object_idset *idset;
> + struct got_object_idset *idset, *queued_ids = NULL;
> int i, idx, have_all_entries = 1;
> struct enumerated_tree *trees = NULL;
> size_t ntrees = 0, nalloc = 16;
> @@ -1379,7 +1383,13 @@ enumeration_request(struct imsg *imsg, struct imsgbuf
> goto done;
> }
>
> - err = recv_object_id_queue(&commit_ids, ibuf);
> + queued_ids = got_object_idset_alloc();
> + if (queued_ids == NULL) {
> + err = got_error_from_errno("got_object_idset_alloc");
> + goto done;
> + }
> +
> + err = recv_object_id_queue(&commit_ids, queued_ids, ibuf);
> if (err)
> goto done;
>
> @@ -1497,6 +1507,8 @@ enumeration_request(struct imsg *imsg, struct imsgbuf
> STAILQ_FOREACH(pid, parents, entry) {
> if (got_object_idset_contains(idset, &pid->id))
> continue;
> + if (got_object_idset_contains(queued_ids, &pid->id))
> + continue;
> err = got_object_qid_alloc_partial(&qid);
> if (err)
> goto done;
> @@ -1528,6 +1540,8 @@ done:
> got_object_id_queue_free(&commit_ids);
> if (idset)
> got_object_idset_free(idset);
> + if (queued_ids)
> + got_object_idset_free(queued_ids);
> for (i = 0; i < ntrees; i++) {
> struct enumerated_tree *tree = &trees[i];
> free(tree->buf);
>
>
>
--
Tracey Emery
two pack file creation fixes