From: Stefan Sperling Subject: fix pack cache size scaling for low ulimit -n To: gameoftrees@openbsd.org Date: Mon, 31 Oct 2022 18:35:55 +0100 Landry noticed "too many open files" errors during 'got checkout' on arm64. I eventually figured out that commit 0ae84acc1f0a0584e4f26ccbe029c895043b3abe broke down-scaling of the pack file cache for low ulimit -n situations. Since then, the pack_fd array has been fully populated with open file handles. The current logic is such that we do not use all of those files if ulimit -n is low. But the unused files should not even be opened in the first place. The patch below fixes this issue, and adds a test case which triggers the problem seen by Landry. ok? diff ad8ccd554bb28e347cf0e997429a408457280182 e746bab55d4e28eaac6cbcdd49c1878070815e55 commit - ad8ccd554bb28e347cf0e997429a408457280182 commit + e746bab55d4e28eaac6cbcdd49c1878070815e55 blob - 50237bbf7e4f90efc06e68c84cedd6c84631551e blob + c172bdfa834cbcbf2bd89e3875832ed19ece76b0 --- lib/repository.c +++ lib/repository.c @@ -268,16 +268,16 @@ open_tempfiles(int **fds, size_t nfds) } static const struct got_error * -open_tempfiles(int **fds, size_t nfds) +open_tempfiles(int **fds, size_t array_size, size_t nfds) { const struct got_error *err = NULL; int i; - *fds = calloc(nfds, sizeof(**fds)); + *fds = calloc(array_size, sizeof(**fds)); if (*fds == NULL) return got_error_from_errno("calloc"); - for (i = 0; i < nfds; i++) + for (i = 0; i < array_size; i++) (*fds)[i] = -1; for (i = 0; i < nfds; i++) { @@ -293,10 +293,40 @@ const struct got_error * return NULL; } +static const struct got_error * +get_pack_cache_size(int *pack_cache_size) +{ + struct rlimit rl; + + if (getrlimit(RLIMIT_NOFILE, &rl) == -1) + return got_error_from_errno("getrlimit"); + + *pack_cache_size = GOT_PACK_CACHE_SIZE; + if (*pack_cache_size > rl.rlim_cur / 8) + *pack_cache_size = rl.rlim_cur / 8; + + return NULL; +} + const struct got_error * got_repo_pack_fds_open(int **pack_fds) { - return open_tempfiles(pack_fds, GOT_PACK_NUM_TEMPFILES); + const struct got_error *err; + int nfds; + + err = get_pack_cache_size(&nfds); + if (err) + return err; + + /* + * We need one basefd and one accumfd per cached pack. + * Our constants should be set up in a way such that + * this error never triggers. + */ + if (nfds * 2 > GOT_PACK_NUM_TEMPFILES) + return got_error(GOT_ERR_NO_SPACE); + + return open_tempfiles(pack_fds, GOT_PACK_NUM_TEMPFILES, nfds * 2); } const struct got_error * @@ -308,7 +338,8 @@ got_repo_temp_fds_open(int **temp_fds) const struct got_error * got_repo_temp_fds_open(int **temp_fds) { - return open_tempfiles(temp_fds, GOT_REPO_NUM_TEMPFILES); + return open_tempfiles(temp_fds, GOT_REPO_NUM_TEMPFILES, + GOT_REPO_NUM_TEMPFILES); } void @@ -638,13 +669,9 @@ got_repo_open(struct got_repository **repop, const cha const struct got_error *err = NULL; char *repo_path = NULL; size_t i, j = 0; - struct rlimit rl; *repop = NULL; - if (getrlimit(RLIMIT_NOFILE, &rl) == -1) - return got_error_from_errno("getrlimit"); - repo = calloc(1, sizeof(*repo)); if (repo == NULL) return got_error_from_errno("calloc"); @@ -679,9 +706,9 @@ got_repo_open(struct got_repository **repop, const cha if (err) goto done; - repo->pack_cache_size = GOT_PACK_CACHE_SIZE; - if (repo->pack_cache_size > rl.rlim_cur / 8) - repo->pack_cache_size = rl.rlim_cur / 8; + err = get_pack_cache_size(&repo->pack_cache_size); + if (err) + goto done; for (i = 0; i < nitems(repo->packs); i++) { if (pack_fds != NULL && i < repo->pack_cache_size) { repo->packs[i].basefd = pack_fds[j++]; blob - 3ceb6774f7bce8a411b960566268aea4c45554cc blob + fa93789a82f9ac4e822dda641bca1e8841f33829 --- regress/cmdline/checkout.sh +++ regress/cmdline/checkout.sh @@ -889,6 +889,51 @@ test_parseargs "$@" test_done "$testroot" 0 } +test_checkout_ulimit_n() { + local testroot=`test_init checkout_ulimit_n` + + echo -n "Checked out refs/heads/master: " >> $testroot/stdout.expected + git_show_head $testroot/repo >> $testroot/stdout.expected + printf "\nNow shut up and hack\n" >> $testroot/stdout.expected + + # Drastically reduce the number of files we are allowed to use. + # This tests our down-scaling of caches which store open file handles. + # Checkout should still work; if it does not, then either there is + # a bug or the fixed limit used by this test case is no longer valid + # and must be raised. + ulimit -n 20 + + got checkout -q $testroot/repo $testroot/wt > $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + test_done "$testroot" "$ret" + return 1 + fi + + cmp -s $testroot/stdout.expected $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + echo "alpha" > $testroot/content.expected + echo "beta" >> $testroot/content.expected + echo "zeta" >> $testroot/content.expected + echo "delta" >> $testroot/content.expected + cat $testroot/wt/alpha $testroot/wt/beta $testroot/wt/epsilon/zeta \ + $testroot/wt/gamma/delta > $testroot/content + + cmp -s $testroot/content.expected $testroot/content + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/content.expected $testroot/content + fi + test_done "$testroot" "$ret" +} + + test_parseargs "$@" run_test test_checkout_basic run_test test_checkout_dir_exists @@ -904,3 +949,4 @@ run_test test_checkout_umask run_test test_checkout_repo_with_unknown_extension run_test test_checkout_quiet run_test test_checkout_umask +run_test test_checkout_ulimit_n