"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Mark Jamsek <mark@jamsek.com>
Subject:
Re: fix 'got import' output sorting issue
To:
Stefan Sperling <stsp@stsp.name>
Cc:
gameoftrees@openbsd.org
Date:
Sun, 05 Jan 2025 17:14:06 +1100

Download raw body.

Thread
Stefan Sperling <stsp@stsp.name> wrote:
> While running the -portable regression tests on Debian I noticed
> failures related to path sorting in the output of 'got import':
> 
> @@ -1,5 +1,5 @@
> -A  /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta
>  A  /tmp/got-test-import_basic-QbjC1yFQpw/tree/epsilon/zeta
> +A  /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta
>  A  /tmp/got-test-import_basic-QbjC1yFQpw/tree/alpha
>  A  /tmp/got-test-import_basic-QbjC1yFQpw/tree/beta
> 
> We currently process directory entries in readdir() order, which is
> not guaranteed to match the order of got_path_cmp(). The patch below
> lets 'got import' traverse entries in got_path_cmp() order and produces
> the same test failures on OpenBSD. Adjust test expectations accordingly.
> 
> ok?

Yes, makes sense and reads good.

ok with one question: should we check the telldir(3) calls for failure?

POSIX.1-2024 specifies no errors are defined, but telldir(3) on OpenBSD
and Alpine specify that on failure, -1 is returned and errno is set;
and OpenBSD also states that telldir() can fail for any of the errors
specified for realloc(3).


> make 'got import' output independent of readdir(3) entry order
> 
> M  lib/repository.c           |  100+  25-
> M  regress/cmdline/import.sh  |    6+   6-
> 
> 2 files changed, 106 insertions(+), 31 deletions(-)
> 
> commit - 3fd8274f36c449b4db01aa4f5e6ab9709facd8f3
> commit + adb4fd4412553b1d3fc59eba2fc3343e5fa436d7
> blob - f1c0ce0284eb71c8b01a826d9a452c4324bbb32d
> blob + 169d68adf1edeb5a8126017d8c850731d2689ad6
> --- lib/repository.c
> +++ lib/repository.c
> @@ -2284,6 +2284,41 @@ done:
>  }
>  
>  static const struct got_error *
> +insert_dir_entry(struct dirent *de, long pos,
> +    struct got_pathlist_head *dirents)
> +{
> +	const struct got_error *err = NULL;
> +	struct got_pathlist_entry *new_pe;
> +	char *d_name;
> +	long *ppos;
> +
> +	d_name = strdup(de->d_name);
> +	if (d_name == NULL)
> +		return got_error_from_errno("strdup");
> +
> +	/* Record the offset to this dirent in struct DIR as path data. */
> +	ppos = malloc(sizeof(*ppos));
> +	if (ppos == NULL) {
> +		err = got_error_from_errno("malloc");
> +		goto done;
> +	}
> +	*ppos = pos;
> +
> +	err = got_pathlist_insert(&new_pe, dirents, d_name, ppos);
> +	if (err)
> +		goto done;
> +
> +	if (new_pe == NULL)
> +		err = got_error(GOT_ERR_TREE_DUP_ENTRY);
> +done:
> +	if (err) {
> +		free(d_name);
> +		free(ppos);
> +	}
> +	return err;
> +}
> +
> +static const struct got_error *
>  insert_tree_entry(struct got_tree_entry *new_te,
>      struct got_pathlist_head *paths)
>  {
> @@ -2342,6 +2377,37 @@ done:
>  }
>  
>  static const struct got_error *
> +match_ignores(int *ignore, char *d_name, int type,
> +    struct got_pathlist_head *ignores)
> +{
> +	struct got_pathlist_entry *pe;
> +
> +	*ignore = 0;
> +
> +	RB_FOREACH(pe, got_pathlist_head, ignores) {
> +		if (type == DT_DIR && pe->path_len > 0 &&
> +		    pe->path[pe->path_len - 1] == '/') {
> +			char stripped[PATH_MAX];
> +
> +			if (strlcpy(stripped, pe->path,
> +			    sizeof(stripped)) >= sizeof(stripped))
> +				return got_error(GOT_ERR_NO_SPACE);
> +
> +			got_path_strip_trailing_slashes(stripped);
> +			if (fnmatch(stripped, d_name, 0) == 0) {
> +				*ignore = 1;
> +				break;
> +			}
> +		} else if (fnmatch(pe->path, d_name, 0) == 0) {
> +			*ignore = 1;
> +			break;
> +		}
> +	}
> +
> +	return NULL;
> +}
> +
> +static const struct got_error *
>  write_tree(struct got_object_id **new_tree_id, const char *path_dir,
>      struct got_pathlist_head *ignores, struct got_repository *repo,
>      got_repo_import_cb progress_cb, void *progress_arg)
> @@ -2351,12 +2417,14 @@ write_tree(struct got_object_id **new_tree_id, const c
>  	struct dirent *de;
>  	int nentries;
>  	struct got_tree_entry *new_te = NULL;
> -	struct got_pathlist_head paths;
> +	struct got_pathlist_head paths, dirents;
>  	struct got_pathlist_entry *pe;
> +	long pos;
>  
>  	*new_tree_id = NULL;
>  
>  	RB_INIT(&paths);
> +	RB_INIT(&dirents);
>  
>  	dir = opendir(path_dir);
>  	if (dir == NULL) {
> @@ -2364,39 +2432,45 @@ write_tree(struct got_object_id **new_tree_id, const c
>  		goto done;
>  	}
>  
> -	nentries = 0;
> +	/*
> +	 * Sort dirents into a pathlist. Otherwise subdir-iteration order
> +	 * depends on the readdir() function, which varies between different
> +	 * operating systems, making regression testing more difficult.
> +	 * This smells of TOCTOU, but if dirents appear, disappear, or change
> +	 * during import then we have a racy situation no matter what.
> +	 */
> +	pos = telldir(dir);
>  	while ((de = readdir(dir)) != NULL) {
> +		if (strcmp(de->d_name, ".") != 0 &&
> +		    strcmp(de->d_name, "..") != 0) {
> +			err = insert_dir_entry(de, pos, &dirents);
> +			if (err)
> +				goto done;
> +		}
> +		pos = telldir(dir);
> +	}
> +
> +	nentries = 0;
> +	RB_FOREACH(pe, got_pathlist_head, &dirents) {
>  		int ignore = 0;
>  		int type;
> +		long *pos = pe->data;
>  
> -		if (strcmp(de->d_name, ".") == 0 ||
> -		    strcmp(de->d_name, "..") == 0)
> -			continue;
> +		seekdir(dir, *pos);
> +		de = readdir(dir);
> +		if (de == NULL) { /* should not happen */
> +			err = got_error_fmt(GOT_ERR_EOF,
> +			    "unexpected EOF on directory '%s' while seeking "
> +			    "to entry '%s' at offset '%ld'\n", path_dir,
> +			    pe->path, *pos);
> +			goto done;
> +		}
>  
>  		err = got_path_dirent_type(&type, path_dir, de);
>  		if (err)
>  			goto done;
>  
> -		RB_FOREACH(pe, got_pathlist_head, ignores) {
> -			if (type == DT_DIR && pe->path_len > 0 &&
> -			    pe->path[pe->path_len - 1] == '/') {
> -				char stripped[PATH_MAX];
> -
> -				if (strlcpy(stripped, pe->path,
> -				    sizeof(stripped)) >= sizeof(stripped)) {
> -					err = got_error(GOT_ERR_NO_SPACE);
> -					goto done;
> -				}
> -				got_path_strip_trailing_slashes(stripped);
> -				if (fnmatch(stripped, de->d_name, 0) == 0) {
> -					ignore = 1;
> -					break;
> -				}
> -			} else if (fnmatch(pe->path, de->d_name, 0) == 0) {
> -				ignore = 1;
> -				break;
> -			}
> -		}
> +		err = match_ignores(&ignore, de->d_name, type, ignores);
>  		if (ignore)
>  			continue;
>  
> @@ -2447,6 +2521,7 @@ write_tree(struct got_object_id **new_tree_id, const c
>  done:
>  	if (dir)
>  		closedir(dir);
> +	got_pathlist_free(&dirents, GOT_PATHLIST_FREE_ALL);
>  	got_pathlist_free(&paths, GOT_PATHLIST_FREE_NONE);
>  	return err;
>  }
> blob - b1a6fa4f2a8b5e436b7e88a234aec2a2d44796b8
> blob + 3baa6e847545365463b84422ebbc3e2212274bdc
> --- regress/cmdline/import.sh
> +++ regress/cmdline/import.sh
> @@ -40,8 +40,8 @@ test_import_basic() {
>  	fi
>  
>  	local head_commit=`git_show_head $testroot/repo`
> -	echo "A  $testroot/tree/gamma/delta" > $testroot/stdout.expected
> -	echo "A  $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected
> +	echo "A  $testroot/tree/epsilon/zeta" > $testroot/stdout.expected
> +	echo "A  $testroot/tree/gamma/delta" >> $testroot/stdout.expected
>  	echo "A  $testroot/tree/alpha" >> $testroot/stdout.expected
>  	echo "A  $testroot/tree/beta" >> $testroot/stdout.expected
>  	echo "Created branch refs/heads/main with commit $head_commit" \
> @@ -165,8 +165,8 @@ test_import_specified_head() {
>  	fi
>  
>  	local head_commit=`git_show_head $testroot/repo`
> -	echo "A  $testroot/tree/gamma/delta" > $testroot/stdout.expected
> -	echo "A  $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected
> +	echo "A  $testroot/tree/epsilon/zeta" > $testroot/stdout.expected
> +	echo "A  $testroot/tree/gamma/delta" >> $testroot/stdout.expected
>  	echo "A  $testroot/tree/alpha" >> $testroot/stdout.expected
>  	echo "A  $testroot/tree/beta" >> $testroot/stdout.expected
>  	echo "Created branch refs/heads/$headref with commit $head_commit" \
> @@ -324,8 +324,8 @@ test_import_detached_head() {
>  
>  	local main_commit=`git -C $testroot/repo show-ref main | \
>  	    cut -d ' ' -f 1`
> -	echo "A  $testroot/import/gamma/delta" > $testroot/stdout.expected
> -	echo "A  $testroot/import/epsilon/zeta" >> $testroot/stdout.expected
> +	echo "A  $testroot/import/epsilon/zeta" > $testroot/stdout.expected
> +	echo "A  $testroot/import/gamma/delta" >> $testroot/stdout.expected
>  	echo "A  $testroot/import/alpha" >> $testroot/stdout.expected
>  	echo "A  $testroot/import/beta" >> $testroot/stdout.expected
>  	echo "Created branch refs/heads/main with commit $main_commit" \


-- 
Mark Jamsek <https://bsdbox.org>
GPG: F2FF 13DE 6A06 C471 CA80  E6E2 2930 DC66 86EE CF68