"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
avoid idset loop for unused objects
To:
gameoftrees@openbsd.org
Date:
Sun, 1 May 2022 20:00:37 +0200

Download raw body.

Thread
Similar to the previous patch I just sent, this avoids the 'remove unused'
loop by storing excluded objects in a separate set. This way, unused objects
never appear in the set of objects we want to include, so we don't need to
remove them again.

Applies on top of the diff which removes the loop for delta-reused objects.

ok?
 
diff fe8175d3d5b2cce6073a5a6f382e6076aeac824f 5a04da888353ccaeee0387c1384f7e692f5612dc
blob - ca90c2008f13c7d8d23491fdf83419879cbc986d
blob + 696da4ba5a2fbbd6c9d585af41c372b9e5769666
--- lib/pack_create.c
+++ lib/pack_create.c
@@ -939,7 +939,8 @@ add_object(int want_meta, struct got_object_idset *ids
 
 static const struct got_error *
 load_tree_entries(struct got_object_id_queue *ids, int want_meta,
-    struct got_object_idset *idset, struct got_object_id *tree_id,
+    struct got_object_idset *idset, struct got_object_idset *idset_exclude,
+    struct got_object_id *tree_id,
     const char *dpath, time_t mtime, struct got_repository *repo,
     int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
     got_pack_progress_cb progress_cb, void *progress_arg,
@@ -972,7 +973,8 @@ load_tree_entries(struct got_object_id_queue *ids, int
 		}
 
 		if (got_object_tree_entry_is_submodule(e) ||
-		    got_object_idset_contains(idset, id))
+		    got_object_idset_contains(idset, id) ||
+		    got_object_idset_contains(idset_exclude, id))
 			continue;
 		
 		if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "",
@@ -988,7 +990,8 @@ load_tree_entries(struct got_object_id_queue *ids, int
 				break;
 			STAILQ_INSERT_TAIL(ids, qid, entry);
 		} else if (S_ISREG(mode) || S_ISLNK(mode)) {
-			err = add_object(want_meta, idset, id, p,
+			err = add_object(want_meta,
+			    want_meta ? idset : idset_exclude, id, p,
 			    GOT_OBJ_TYPE_BLOB, mtime, loose_obj_only, repo,
 			    ncolored, nfound, ntrees,
 			    progress_cb, progress_arg, rl);
@@ -1006,6 +1009,7 @@ load_tree_entries(struct got_object_id_queue *ids, int
 
 static const struct got_error *
 load_tree(int want_meta, struct got_object_idset *idset,
+    struct got_object_idset *idset_exclude,
     struct got_object_id *tree_id, const char *dpath, time_t mtime,
     struct got_repository *repo, int loose_obj_only,
     int *ncolored, int *nfound, int *ntrees,
@@ -1016,7 +1020,8 @@ load_tree(int want_meta, struct got_object_idset *idse
 	struct got_object_id_queue tree_ids;
 	struct got_object_qid *qid;
 
-	if (got_object_idset_contains(idset, tree_id))
+	if (got_object_idset_contains(idset, tree_id) ||
+	    got_object_idset_contains(idset_exclude, tree_id))
 		return NULL;
 
 	err = got_object_qid_alloc(&qid, tree_id);
@@ -1036,20 +1041,23 @@ load_tree(int want_meta, struct got_object_idset *idse
 		qid = STAILQ_FIRST(&tree_ids);
 		STAILQ_REMOVE_HEAD(&tree_ids, entry);
 
-		if (got_object_idset_contains(idset, &qid->id)) {
+		if (got_object_idset_contains(idset, &qid->id) ||
+		    got_object_idset_contains(idset_exclude, &qid->id)) {
 			got_object_qid_free(qid);
 			continue;
 		}
 
-		err = add_object(want_meta, idset, &qid->id, dpath,
-		    GOT_OBJ_TYPE_TREE, mtime, loose_obj_only, repo,
+		err = add_object(want_meta, want_meta ? idset : idset_exclude,
+		    &qid->id, dpath, GOT_OBJ_TYPE_TREE,
+		    mtime, loose_obj_only, repo,
 		    ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
 		if (err) {
 			got_object_qid_free(qid);
 			break;
 		}
 
-		err = load_tree_entries(&tree_ids, want_meta, idset, &qid->id,
+		err = load_tree_entries(&tree_ids, want_meta, idset,
+		    idset_exclude, &qid->id,
 		    dpath, mtime, repo, loose_obj_only, ncolored, nfound,
 		    ntrees, progress_cb, progress_arg, rl,
 		    cancel_cb, cancel_arg);
@@ -1064,6 +1072,7 @@ load_tree(int want_meta, struct got_object_idset *idse
 
 static const struct got_error *
 load_commit(int want_meta, struct got_object_idset *idset,
+    struct got_object_idset *idset_exclude,
     struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
     int *ncolored, int *nfound, int *ntrees,
     got_pack_progress_cb progress_cb, void *progress_arg,
@@ -1072,7 +1081,8 @@ load_commit(int want_meta, struct got_object_idset *id
 	const struct got_error *err;
 	struct got_commit_object *commit;
 
-	if (got_object_idset_contains(idset, id))
+	if (got_object_idset_contains(idset, id) ||
+	    got_object_idset_contains(idset_exclude, id))
 		return NULL;
 
 	if (loose_obj_only) {
@@ -1088,14 +1098,16 @@ load_commit(int want_meta, struct got_object_idset *id
 	if (err)
 		return err;
 
-	err = add_object(want_meta, idset, id, "", GOT_OBJ_TYPE_COMMIT,
+	err = add_object(want_meta, want_meta ? idset : idset_exclude,
+	    id, "", GOT_OBJ_TYPE_COMMIT,
 	    got_object_commit_get_committer_time(commit),
 	    loose_obj_only, repo,
 	    ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
 	if (err)
 		goto done;
 
-	err = load_tree(want_meta, idset, got_object_commit_get_tree_id(commit),
+	err = load_tree(want_meta, idset, idset_exclude,
+	    got_object_commit_get_tree_id(commit),
 	    "", got_object_commit_get_committer_time(commit),
 	    repo, loose_obj_only, ncolored, nfound, ntrees,
 	    progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
@@ -1106,6 +1118,7 @@ done:
 
 static const struct got_error *
 load_tag(int want_meta, struct got_object_idset *idset,
+    struct got_object_idset *idset_exclude,
     struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
     int *ncolored, int *nfound, int *ntrees,
     got_pack_progress_cb progress_cb, void *progress_arg,
@@ -1114,7 +1127,8 @@ load_tag(int want_meta, struct got_object_idset *idset
 	const struct got_error *err;
 	struct got_tag_object *tag = NULL;
 
-	if (got_object_idset_contains(idset, id))
+	if (got_object_idset_contains(idset, id) ||
+	    got_object_idset_contains(idset_exclude, id))
 		return NULL;
 
 	if (loose_obj_only) {
@@ -1130,7 +1144,8 @@ load_tag(int want_meta, struct got_object_idset *idset
 	if (err)
 		return err;
 
-	err = add_object(want_meta, idset, id, "", GOT_OBJ_TYPE_TAG,
+	err = add_object(want_meta, want_meta ? idset : idset_exclude,
+	    id, "", GOT_OBJ_TYPE_TAG,
 	    got_object_tag_get_tagger_time(tag), loose_obj_only, repo,
 	    ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
 	if (err)
@@ -1138,13 +1153,13 @@ load_tag(int want_meta, struct got_object_idset *idset
 
 	switch (got_object_tag_get_object_type(tag)) {
 	case GOT_OBJ_TYPE_COMMIT:
-		err = load_commit(want_meta, idset,
+		err = load_commit(want_meta, idset, idset_exclude,
 		    got_object_tag_get_object_id(tag), repo, loose_obj_only,
 		    ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
 		    cancel_cb, cancel_arg);
 		break;
 	case GOT_OBJ_TYPE_TREE:
-		err = load_tree(want_meta, idset,
+		err = load_tree(want_meta, idset, idset_exclude,
 		    got_object_tag_get_object_id(tag), "",
 		    got_object_tag_get_tagger_time(tag), repo, loose_obj_only,
 		    ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
@@ -1465,7 +1480,12 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
 	const struct got_error *err = NULL;
 	struct got_object_id **ids = NULL;
 	int i, nobj = 0, obj_type;
+	struct got_object_idset *idset_exclude;
 
+	idset_exclude = got_object_idset_alloc();
+	if (idset_exclude == NULL)
+		return got_error_from_errno("got_object_idset_alloc");
+
 	*ncolored = 0;
 	*nfound = 0;
 	*ntrees = 0;
@@ -1483,14 +1503,14 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
 		if (err)
 			return err;
 		if (obj_type == GOT_OBJ_TYPE_COMMIT) {
-			err = load_commit(0, idset, id, repo,
+			err = load_commit(0, idset, idset_exclude, id, repo,
 			    loose_obj_only, ncolored, nfound, ntrees,
 			    progress_cb, progress_arg, rl,
 			    cancel_cb, cancel_arg);
 			if (err)
 				goto done;
 		} else if (obj_type == GOT_OBJ_TYPE_TAG) {
-			err = load_tag(0, idset, id, repo,
+			err = load_tag(0, idset, idset_exclude, id, repo,
 			    loose_obj_only, ncolored, nfound, ntrees,
 			    progress_cb, progress_arg, rl,
 			    cancel_cb, cancel_arg);
@@ -1500,9 +1520,9 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
 	}
 
 	for (i = 0; i < nobj; i++) {
-		err = load_commit(1, idset, ids[i], repo, loose_obj_only,
-		    ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
-		    cancel_cb, cancel_arg);
+		err = load_commit(1, idset, idset_exclude,
+		    ids[i], repo, loose_obj_only, ncolored, nfound, ntrees,
+		    progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
 		if (err)
 			goto done;
 	}
@@ -1521,9 +1541,9 @@ load_object_ids(int *ncolored, int *nfound, int *ntree
 			obj_type = m->obj_type;
 		if (obj_type != GOT_OBJ_TYPE_TAG)
 			continue;
-		err = load_tag(1, idset, id, repo, loose_obj_only,
-		    ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
-		    cancel_cb, cancel_arg);
+		err = load_tag(1, idset, idset_exclude, id, repo,
+		    loose_obj_only, ncolored, nfound, ntrees,
+		    progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
 		if (err)
 			goto done;
 	}
@@ -1532,6 +1552,7 @@ done:
 		free(ids[i]);
 	}
 	free(ids);
+	got_object_idset_free(idset_exclude);
 	return err;
 }
 
@@ -1859,17 +1880,6 @@ done:
 }
 
 static const struct got_error *
-remove_unused_object(struct got_object_id *id, void *data, void *arg)
-{
-	struct got_object_idset *idset = arg;
-
-	if (data == NULL)
-		got_object_idset_remove(NULL, idset, id);
-
-	return NULL;
-}
-
-static const struct got_error *
 add_meta_idset_cb(struct got_object_id *id, void *data, void *arg)
 {
 	struct got_pack_meta *m = data;
@@ -1913,10 +1923,6 @@ got_pack_create(uint8_t *packsha1, FILE *packfile,
 	if (err)
 		return err;
 
-	err = got_object_idset_for_each(idset, remove_unused_object, idset);
-	if (err)
-		goto done;
-
 	if (progress_cb) {
 		err = progress_cb(progress_arg, ncolored, nfound, ntrees,
 		    0L, nours, got_object_idset_num_elements(idset), 0, 0);