"GOT", but the "O" is a cute, smiling sun Index | Thread

From:
Stefan Sperling <stsp@stsp.name>
Subject:
fix pack cache size scaling for low ulimit -n
To:
gameoftrees@openbsd.org
Date:
Mon, 31 Oct 2022 18:35:55 +0100

Download raw body.

Landry noticed "too many open files" errors during 'got checkout' on arm64.

I eventually figured out that commit 0ae84acc1f0a0584e4f26ccbe029c895043b3abe
broke down-scaling of the pack file cache for low ulimit -n situations.
Since then, the pack_fd array has been fully populated with open file handles.
The current logic is such that we do not use all of those files if ulimit -n
is low. But the unused files should not even be opened in the first place.

The patch below fixes this issue, and adds a test case which triggers the
problem seen by Landry.

ok?

diff ad8ccd554bb28e347cf0e997429a408457280182 e746bab55d4e28eaac6cbcdd49c1878070815e55
commit - ad8ccd554bb28e347cf0e997429a408457280182
commit + e746bab55d4e28eaac6cbcdd49c1878070815e55
blob - 50237bbf7e4f90efc06e68c84cedd6c84631551e
blob + c172bdfa834cbcbf2bd89e3875832ed19ece76b0
--- lib/repository.c
+++ lib/repository.c
@@ -268,16 +268,16 @@ open_tempfiles(int **fds, size_t nfds)
 }
 
 static const struct got_error *
-open_tempfiles(int **fds, size_t nfds)
+open_tempfiles(int **fds, size_t array_size, size_t nfds)
 {
 	const struct got_error *err = NULL;
 	int i;
 
-	*fds = calloc(nfds, sizeof(**fds));
+	*fds = calloc(array_size, sizeof(**fds));
 	if (*fds == NULL)
 		return got_error_from_errno("calloc");
 
-	for (i = 0; i < nfds; i++)
+	for (i = 0; i < array_size; i++)
 		(*fds)[i] = -1;
 
 	for (i = 0; i < nfds; i++) {
@@ -293,10 +293,40 @@ const struct got_error *
 	return NULL;
 }
 
+static const struct got_error *
+get_pack_cache_size(int *pack_cache_size)
+{
+	struct rlimit rl;
+
+	if (getrlimit(RLIMIT_NOFILE, &rl) == -1)
+		return got_error_from_errno("getrlimit");
+
+	*pack_cache_size = GOT_PACK_CACHE_SIZE;
+	if (*pack_cache_size > rl.rlim_cur / 8)
+		*pack_cache_size = rl.rlim_cur / 8;
+
+	return NULL;
+}
+
 const struct got_error *
 got_repo_pack_fds_open(int **pack_fds)
 {
-	return open_tempfiles(pack_fds, GOT_PACK_NUM_TEMPFILES);
+	const struct got_error *err;
+	int nfds;
+
+	err = get_pack_cache_size(&nfds);
+	if (err)
+		return err;
+
+	/*
+	 * We need one basefd and one accumfd per cached pack.
+	 * Our constants should be set up in a way such that
+	 * this error never triggers.
+	 */
+	if (nfds * 2 > GOT_PACK_NUM_TEMPFILES)
+		return got_error(GOT_ERR_NO_SPACE);
+
+	return open_tempfiles(pack_fds, GOT_PACK_NUM_TEMPFILES, nfds * 2);
 }
 
 const struct got_error *
@@ -308,7 +338,8 @@ got_repo_temp_fds_open(int **temp_fds) 
 const struct got_error *
 got_repo_temp_fds_open(int **temp_fds) 
 {
-	return open_tempfiles(temp_fds, GOT_REPO_NUM_TEMPFILES);
+	return open_tempfiles(temp_fds, GOT_REPO_NUM_TEMPFILES,
+	    GOT_REPO_NUM_TEMPFILES);
 }
 
 void
@@ -638,13 +669,9 @@ got_repo_open(struct got_repository **repop, const cha
 	const struct got_error *err = NULL;
 	char *repo_path = NULL;
 	size_t i, j = 0;
-	struct rlimit rl;
 
 	*repop = NULL;
 
-	if (getrlimit(RLIMIT_NOFILE, &rl) == -1)
-		return got_error_from_errno("getrlimit");
-
 	repo = calloc(1, sizeof(*repo));
 	if (repo == NULL)
 		return got_error_from_errno("calloc");
@@ -679,9 +706,9 @@ got_repo_open(struct got_repository **repop, const cha
 	if (err)
 		goto done;
 
-	repo->pack_cache_size = GOT_PACK_CACHE_SIZE;
-	if (repo->pack_cache_size > rl.rlim_cur / 8)
-		repo->pack_cache_size = rl.rlim_cur / 8;
+	err = get_pack_cache_size(&repo->pack_cache_size);
+	if (err)
+		goto done;
 	for (i = 0; i < nitems(repo->packs); i++) {
 		if (pack_fds != NULL && i < repo->pack_cache_size) {
 			repo->packs[i].basefd = pack_fds[j++];
blob - 3ceb6774f7bce8a411b960566268aea4c45554cc
blob + fa93789a82f9ac4e822dda641bca1e8841f33829
--- regress/cmdline/checkout.sh
+++ regress/cmdline/checkout.sh
@@ -889,6 +889,51 @@ test_parseargs "$@"
 	test_done "$testroot" 0
 }
 
+test_checkout_ulimit_n() {
+	local testroot=`test_init checkout_ulimit_n`
+
+	echo -n "Checked out refs/heads/master: " >> $testroot/stdout.expected
+	git_show_head $testroot/repo >> $testroot/stdout.expected
+	printf "\nNow shut up and hack\n" >> $testroot/stdout.expected
+
+	# Drastically reduce the number of files we are allowed to use.
+	# This tests our down-scaling of caches which store open file handles.
+	# Checkout should still work; if it does not, then either there is
+	# a bug or the fixed limit used by this test case is no longer valid
+	# and must be raised.
+	ulimit -n 20
+
+	got checkout -q $testroot/repo $testroot/wt > $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	echo "alpha" > $testroot/content.expected
+	echo "beta" >> $testroot/content.expected
+	echo "zeta" >> $testroot/content.expected
+	echo "delta" >> $testroot/content.expected
+	cat $testroot/wt/alpha $testroot/wt/beta $testroot/wt/epsilon/zeta \
+	    $testroot/wt/gamma/delta > $testroot/content
+
+	cmp -s $testroot/content.expected $testroot/content
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/content.expected $testroot/content
+	fi
+	test_done "$testroot" "$ret"
+}
+
+
 test_parseargs "$@"
 run_test test_checkout_basic
 run_test test_checkout_dir_exists
@@ -904,3 +949,4 @@ run_test test_checkout_umask
 run_test test_checkout_repo_with_unknown_extension
 run_test test_checkout_quiet
 run_test test_checkout_umask
+run_test test_checkout_ulimit_n