"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
cache pack index file list when repository is opened
To:
gameoftrees@openbsd.org
Date:
Sun, 13 Mar 2022 01:35:20 +0100

Download raw body.

Thread
Profiling shows that many calls to readdir(3) of the objects/pack/
directory can cause significant overhead during 'gotadmin pack -a'
if the repository has many pack files that need to be searched.

This patch caches the list of known pack index files when the
repository is opened. This speeds up searches by skipping readdir(3)
if the pack index cache cannot prevent us from hitting the disk.

Pack files are not really expected to appear or disappear out
of the blue while the repository is in use, even though this
is possible. But even readdir(3) followed open(2) of an entry
could suffer from this. If this becomes a problem it was already
possible to hit with the old code, and could only be fixed by
catching relevant errors and retrying.

ok?

diff c0c2b3d1e2c80fe9ee36670cff033f948cca9fd6 444513f3327fd65edaef0102c8a213f463c30829
blob - 798762e89557feb475f650e62e549abe41eb43c4
blob + 5fb86bb46e7a4329db5383980128ed5c1b627aec
--- lib/got_lib_repository.h
+++ lib/got_lib_repository.h
@@ -51,6 +51,8 @@ struct got_repository {
 	char *path_git_dir;
 	int gitdir_fd;
 
+	struct got_pathlist_head packidx_paths;
+
 	/* The pack index cache speeds up search for packed objects. */
 	struct got_packidx *packidx_cache[GOT_PACK_CACHE_SIZE];
 
blob - 3b6af380cd3ae1ac103c402a5365107c232529dd
blob + 2becf2c24a773043a066d66c62839e1323134ce4
--- lib/pack_create.c
+++ lib/pack_create.c
@@ -489,19 +489,13 @@ find_pack_for_reuse(struct got_packidx **best_packidx,
     struct got_repository *repo)
 {
 	const struct got_error *err;
-	struct got_pathlist_head packidx_paths;
 	struct got_pathlist_entry *pe;
 	const char *best_packidx_path = NULL;
 	int nobj_max = 0;
 
-	TAILQ_INIT(&packidx_paths);
 	*best_packidx = NULL;
 
-	err = got_repo_list_packidx(&packidx_paths, repo);
-	if (err)
-		return err;
-
-	TAILQ_FOREACH(pe, &packidx_paths, entry) {
+	TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
 		const char *path_packidx = pe->path;
 		struct got_packidx *packidx;
 		int nobj;
@@ -522,9 +516,6 @@ find_pack_for_reuse(struct got_packidx **best_packidx,
 		    repo);
 	}
 
-	TAILQ_FOREACH(pe, &packidx_paths, entry)
-		free((void *)pe->path);
-	got_pathlist_free(&packidx_paths);
 	return err;
 }
 
blob - d4175545865a20ba9a3cff43736298aaae5e8c37
blob + bea8c8ad1e87cf8b7547d55c5b292380e389a38f
--- lib/repository.c
+++ lib/repository.c
@@ -670,6 +670,7 @@ got_repo_open(struct got_repository **repop, const cha
 	}
 
 	RB_INIT(&repo->packidx_bloom_filters);
+	TAILQ_INIT(&repo->packidx_paths);
 
 	for (i = 0; i < nitems(repo->privsep_children); i++) {
 		memset(&repo->privsep_children[i], 0,
@@ -750,6 +751,8 @@ got_repo_open(struct got_repository **repop, const cha
 			goto done;
 		}
 	}
+
+	err = got_repo_list_packidx(&repo->packidx_paths, repo);
 done:
 	if (err)
 		got_repo_close(repo);
@@ -764,6 +767,7 @@ got_repo_close(struct got_repository *repo)
 {
 	const struct got_error *err = NULL, *child_err;
 	struct got_packidx_bloom_filter *bf;
+	struct got_pathlist_entry *pe;
 	size_t i;
 
 	for (i = 0; i < repo->pack_cache_size; i++) {
@@ -824,6 +828,10 @@ got_repo_close(struct got_repository *repo)
 	for (i = 0; i < repo->nextensions; i++)
 		free(repo->extensions[i]);
 	free(repo->extensions);
+
+	TAILQ_FOREACH(pe, &repo->packidx_paths, entry)
+		free((void *)pe->path);
+	got_pathlist_free(&repo->packidx_paths);
 	free(repo);
 
 	return err;
@@ -1092,11 +1100,8 @@ got_repo_search_packidx(struct got_packidx **packidx, 
     struct got_repository *repo, struct got_object_id *id)
 {
 	const struct got_error *err;
-	DIR *packdir = NULL;
-	struct dirent *dent;
-	char *path_packidx;
+	struct got_pathlist_entry *pe;
 	size_t i;
-	int packdir_fd;
 
 	/* Search pack index cache. */
 	for (i = 0; i < repo->pack_cache_size; i++) {
@@ -1123,41 +1128,13 @@ got_repo_search_packidx(struct got_packidx **packidx, 
 	}
 	/* No luck. Search the filesystem. */
 
-	packdir_fd = openat(got_repo_get_fd(repo),
-	    GOT_OBJECTS_PACK_DIR, O_DIRECTORY | O_CLOEXEC);
-	if (packdir_fd == -1) {
-		if (errno == ENOENT)
-			err = got_error_no_obj(id);
-		else
-			err = got_error_from_errno_fmt("openat: %s/%s",
-			    got_repo_get_path_git_dir(repo),
-			    GOT_OBJECTS_PACK_DIR);
-		goto done;
-	}
-
-	packdir = fdopendir(packdir_fd);
-	if (packdir == NULL) {
-		err = got_error_from_errno("fdopendir");
-		goto done;
-	}
-
-	while ((dent = readdir(packdir)) != NULL) {
+	TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
+		const char *path_packidx = pe->path;
 		int is_cached = 0;
 
-		if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen))
-			continue;
-
-		if (asprintf(&path_packidx, "%s/%s", GOT_OBJECTS_PACK_DIR,
-		    dent->d_name) == -1) {
-			err = got_error_from_errno("asprintf");
-			goto done;
-		}
-
 		if (!got_repo_check_packidx_bloom_filter(repo,
-		    path_packidx, id)) {
-			free(path_packidx);
+		    pe->path, id))
 			continue; /* object will not be found in this index */
-		}
 
 		for (i = 0; i < repo->pack_cache_size; i++) {
 			if (repo->packidx_cache[i] == NULL)
@@ -1168,26 +1145,19 @@ got_repo_search_packidx(struct got_packidx **packidx, 
 				break;
 			}
 		}
-		if (is_cached) {
-			free(path_packidx);
+		if (is_cached)
 			continue; /* already searched */
-		}
 
 		err = got_packidx_open(packidx, got_repo_get_fd(repo),
 		    path_packidx, 0);
-		if (err) {
-			free(path_packidx);
+		if (err)
 			goto done;
-		}
 
 		err = add_packidx_bloom_filter(repo, *packidx, path_packidx);
-		if (err) {
-			free(path_packidx);
+		if (err)
 			goto done;
-		}
 
 		err = cache_packidx(repo, *packidx, path_packidx);
-		free(path_packidx);
 		if (err)
 			goto done;
 
@@ -1200,8 +1170,6 @@ got_repo_search_packidx(struct got_packidx **packidx, 
 
 	err = got_error_no_obj(id);
 done:
-	if (packdir && closedir(packdir) != 0 && err == NULL)
-		err = got_error_from_errno("closedir");
 	return err;
 }
 
@@ -1485,46 +1453,18 @@ match_packed_object(struct got_object_id **unique_id,
     struct got_repository *repo, const char *id_str_prefix, int obj_type)
 {
 	const struct got_error *err = NULL;
-	DIR *packdir = NULL;
-	struct dirent *dent;
-	char *path_packidx;
 	struct got_object_id_queue matched_ids;
-	int packdir_fd;
+	struct got_pathlist_entry *pe;
 
 	STAILQ_INIT(&matched_ids);
 
-	packdir_fd = openat(got_repo_get_fd(repo),
-	    GOT_OBJECTS_PACK_DIR, O_DIRECTORY | O_CLOEXEC);
-	if (packdir_fd == -1) {
-		if (errno != ENOENT) {
-			err = got_error_from_errno2("openat",
-			    GOT_OBJECTS_PACK_DIR);
-		}
-		goto done;
-	}
-
-	packdir = fdopendir(packdir_fd);
-	if (packdir == NULL) {
-		err = got_error_from_errno("fdopendir");
-		goto done;
-	}
-
-	while ((dent = readdir(packdir)) != NULL) {
+	TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
+		const char *path_packidx = pe->path;
 		struct got_packidx *packidx;
 		struct got_object_qid *qid;
 
-		if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen))
-			continue;
-
-		if (asprintf(&path_packidx, "%s/%s", GOT_OBJECTS_PACK_DIR,
-		    dent->d_name) == -1) {
-			err = got_error_from_errno("strdup");
-			break;
-		}
-
 		err = got_packidx_open(&packidx, got_repo_get_fd(repo),
 		    path_packidx, 0);
-		free(path_packidx);
 		if (err)
 			break;
 
@@ -1564,8 +1504,6 @@ match_packed_object(struct got_object_id **unique_id,
 	}
 done:
 	got_object_id_queue_free(&matched_ids);
-	if (packdir && closedir(packdir) != 0 && err == NULL)
-		err = got_error_from_errno("closedir");
 	if (err) {
 		free(*unique_id);
 		*unique_id = NULL;