Download raw body.
two pack file creation fixes
On Thu, Jan 26, 2023 at 10:16:56PM +0100, Stefan Sperling wrote: > Two patches below fix two issues in got-read-pack. > The first patch is a bug fix, the second is a small performance fix. > > Regarding the bug: > > Using 'got send' to send the entire history of a branch to an empty > repository can result in some objects missing. A gotd server will > end up with a broken repository. I suspect git-daemon would catch > this problem, but I haven't tried to trigger this with git-daemon yet. > > To reproduce this, run: > got clone ssh://git@github.com/yellowman/nsh > to get this particular repo from Github which happens to trigger the bug. > > Then set up a fresh repository on a gotd server and add it as a new remote > to the nsh repo cloned from github. Now use 'got send' to try to populate > this empty repository on the server. This will apparently succeed, but if > you now try to run e.g. 'got log' on the repository served by gotd, it > will erorr out because some commits will be missing. > > Notably, in this situation the source repository has just one pack file > and no loose objects, so everything will be enumerated by got-read-pack. > In situations where not all objects can be enumerated from a single pack > file this bug won't trigger because we'll load the missing commits in > secondary pass. > > ok? ok > > ----------------------------------------------- > > fix missing commits in pack files created with packed object enumeration > > got-read-pack forgot to send a tree-enumeration-done message to the > main process if the tree of a given commit had already been traversed. > The main process would then not add the corresponding commit to the > pack file, even though it should be added. > > Found while using 'got send' towards gotd in order to populate an > empty repository on the server with non-trivial history, where some > commits always ended up missing due to this bug. > > diff 87332a0e6e71182f1fb148d7f42e78105713285b 74aa0dc01f1f739557393098718e63131e75acd7 > commit - 87332a0e6e71182f1fb148d7f42e78105713285b > commit + 74aa0dc01f1f739557393098718e63131e75acd7 > blob - dd9fcfea23f63268577c4d5b91298a5a5106c61a > blob + 63ea6eca0b02d1b08d712fe7e8163f04098f3934 > --- libexec/got-read-pack/got-read-pack.c > +++ libexec/got-read-pack/got-read-pack.c > @@ -1471,6 +1471,9 @@ enumeration_request(struct imsg *imsg, struct imsgbuf > } > > if (got_object_idset_contains(idset, tree_id)) { > + err = send_tree_enumeration_done(ibuf); > + if (err) > + goto done; > got_object_qid_free(qid); > qid = NULL; > continue; > > ----------------------------------------------- > > avoid traversing enumrated commits more than once in got-read-pack > > Keep track of parent commits that will be processed as part > of looping over the commit queue provided by the main process, > and do not add these commits to the queue again. > > Fixes pointless traversal of commits on the queue which will > simply be skipped. The end result is the same either way. > > diff 74aa0dc01f1f739557393098718e63131e75acd7 00f162a8d0597c71a98d1429cb6ed38f13747d01 > commit - 74aa0dc01f1f739557393098718e63131e75acd7 > commit + 00f162a8d0597c71a98d1429cb6ed38f13747d01 > blob - 63ea6eca0b02d1b08d712fe7e8163f04098f3934 > blob + a64b44bf058deda0f69f2cd1ff50d1f307b0ffdf > --- libexec/got-read-pack/got-read-pack.c > +++ libexec/got-read-pack/got-read-pack.c > @@ -1019,7 +1019,8 @@ recv_object_id_queue(struct got_object_id_queue *queue > } > > static const struct got_error * > -recv_object_id_queue(struct got_object_id_queue *queue, struct imsgbuf *ibuf) > +recv_object_id_queue(struct got_object_id_queue *queue, > + struct got_object_idset *queued_ids, struct imsgbuf *ibuf) > { > const struct got_error *err = NULL; > int done = 0; > @@ -1037,6 +1038,9 @@ recv_object_id_queue(struct got_object_id_queue *queue > return err; > memcpy(&qid->id, &ids[i], sizeof(qid->id)); > STAILQ_INSERT_TAIL(queue, qid, entry); > + err = got_object_idset_add(queued_ids, &qid->id, NULL); > + if (err) > + return err; > } > } > > @@ -1362,7 +1366,7 @@ enumeration_request(struct imsg *imsg, struct imsgbuf > struct got_commit_object *commit = NULL; > struct got_object_id *tree_id = NULL; > size_t totlen = 0; > - struct got_object_idset *idset; > + struct got_object_idset *idset, *queued_ids = NULL; > int i, idx, have_all_entries = 1; > struct enumerated_tree *trees = NULL; > size_t ntrees = 0, nalloc = 16; > @@ -1379,7 +1383,13 @@ enumeration_request(struct imsg *imsg, struct imsgbuf > goto done; > } > > - err = recv_object_id_queue(&commit_ids, ibuf); > + queued_ids = got_object_idset_alloc(); > + if (queued_ids == NULL) { > + err = got_error_from_errno("got_object_idset_alloc"); > + goto done; > + } > + > + err = recv_object_id_queue(&commit_ids, queued_ids, ibuf); > if (err) > goto done; > > @@ -1497,6 +1507,8 @@ enumeration_request(struct imsg *imsg, struct imsgbuf > STAILQ_FOREACH(pid, parents, entry) { > if (got_object_idset_contains(idset, &pid->id)) > continue; > + if (got_object_idset_contains(queued_ids, &pid->id)) > + continue; > err = got_object_qid_alloc_partial(&qid); > if (err) > goto done; > @@ -1528,6 +1540,8 @@ done: > got_object_id_queue_free(&commit_ids); > if (idset) > got_object_idset_free(idset); > + if (queued_ids) > + got_object_idset_free(queued_ids); > for (i = 0; i < ntrees; i++) { > struct enumerated_tree *tree = &trees[i]; > free(tree->buf); > > > -- Tracey Emery
two pack file creation fixes