"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
fix 'got import' output sorting issue
To:
gameoftrees@openbsd.org
Date:
Fri, 3 Jan 2025 16:11:56 +0100

Download raw body.

Thread
While running the -portable regression tests on Debian I noticed
failures related to path sorting in the output of 'got import':

@@ -1,5 +1,5 @@
-A  /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta
 A  /tmp/got-test-import_basic-QbjC1yFQpw/tree/epsilon/zeta
+A  /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta
 A  /tmp/got-test-import_basic-QbjC1yFQpw/tree/alpha
 A  /tmp/got-test-import_basic-QbjC1yFQpw/tree/beta

We currently process directory entries in readdir() order, which is
not guaranteed to match the order of got_path_cmp(). The patch below
lets 'got import' traverse entries in got_path_cmp() order and produces
the same test failures on OpenBSD. Adjust test expectations accordingly.

ok?

make 'got import' output independent of readdir(3) entry order

M  lib/repository.c           |  100+  25-
M  regress/cmdline/import.sh  |    6+   6-

2 files changed, 106 insertions(+), 31 deletions(-)

commit - 3fd8274f36c449b4db01aa4f5e6ab9709facd8f3
commit + adb4fd4412553b1d3fc59eba2fc3343e5fa436d7
blob - f1c0ce0284eb71c8b01a826d9a452c4324bbb32d
blob + 169d68adf1edeb5a8126017d8c850731d2689ad6
--- lib/repository.c
+++ lib/repository.c
@@ -2284,6 +2284,41 @@ done:
 }
 
 static const struct got_error *
+insert_dir_entry(struct dirent *de, long pos,
+    struct got_pathlist_head *dirents)
+{
+	const struct got_error *err = NULL;
+	struct got_pathlist_entry *new_pe;
+	char *d_name;
+	long *ppos;
+
+	d_name = strdup(de->d_name);
+	if (d_name == NULL)
+		return got_error_from_errno("strdup");
+
+	/* Record the offset to this dirent in struct DIR as path data. */
+	ppos = malloc(sizeof(*ppos));
+	if (ppos == NULL) {
+		err = got_error_from_errno("malloc");
+		goto done;
+	}
+	*ppos = pos;
+
+	err = got_pathlist_insert(&new_pe, dirents, d_name, ppos);
+	if (err)
+		goto done;
+
+	if (new_pe == NULL)
+		err = got_error(GOT_ERR_TREE_DUP_ENTRY);
+done:
+	if (err) {
+		free(d_name);
+		free(ppos);
+	}
+	return err;
+}
+
+static const struct got_error *
 insert_tree_entry(struct got_tree_entry *new_te,
     struct got_pathlist_head *paths)
 {
@@ -2342,6 +2377,37 @@ done:
 }
 
 static const struct got_error *
+match_ignores(int *ignore, char *d_name, int type,
+    struct got_pathlist_head *ignores)
+{
+	struct got_pathlist_entry *pe;
+
+	*ignore = 0;
+
+	RB_FOREACH(pe, got_pathlist_head, ignores) {
+		if (type == DT_DIR && pe->path_len > 0 &&
+		    pe->path[pe->path_len - 1] == '/') {
+			char stripped[PATH_MAX];
+
+			if (strlcpy(stripped, pe->path,
+			    sizeof(stripped)) >= sizeof(stripped))
+				return got_error(GOT_ERR_NO_SPACE);
+
+			got_path_strip_trailing_slashes(stripped);
+			if (fnmatch(stripped, d_name, 0) == 0) {
+				*ignore = 1;
+				break;
+			}
+		} else if (fnmatch(pe->path, d_name, 0) == 0) {
+			*ignore = 1;
+			break;
+		}
+	}
+
+	return NULL;
+}
+
+static const struct got_error *
 write_tree(struct got_object_id **new_tree_id, const char *path_dir,
     struct got_pathlist_head *ignores, struct got_repository *repo,
     got_repo_import_cb progress_cb, void *progress_arg)
@@ -2351,12 +2417,14 @@ write_tree(struct got_object_id **new_tree_id, const c
 	struct dirent *de;
 	int nentries;
 	struct got_tree_entry *new_te = NULL;
-	struct got_pathlist_head paths;
+	struct got_pathlist_head paths, dirents;
 	struct got_pathlist_entry *pe;
+	long pos;
 
 	*new_tree_id = NULL;
 
 	RB_INIT(&paths);
+	RB_INIT(&dirents);
 
 	dir = opendir(path_dir);
 	if (dir == NULL) {
@@ -2364,39 +2432,45 @@ write_tree(struct got_object_id **new_tree_id, const c
 		goto done;
 	}
 
-	nentries = 0;
+	/*
+	 * Sort dirents into a pathlist. Otherwise subdir-iteration order
+	 * depends on the readdir() function, which varies between different
+	 * operating systems, making regression testing more difficult.
+	 * This smells of TOCTOU, but if dirents appear, disappear, or change
+	 * during import then we have a racy situation no matter what.
+	 */
+	pos = telldir(dir);
 	while ((de = readdir(dir)) != NULL) {
+		if (strcmp(de->d_name, ".") != 0 &&
+		    strcmp(de->d_name, "..") != 0) {
+			err = insert_dir_entry(de, pos, &dirents);
+			if (err)
+				goto done;
+		}
+		pos = telldir(dir);
+	}
+
+	nentries = 0;
+	RB_FOREACH(pe, got_pathlist_head, &dirents) {
 		int ignore = 0;
 		int type;
+		long *pos = pe->data;
 
-		if (strcmp(de->d_name, ".") == 0 ||
-		    strcmp(de->d_name, "..") == 0)
-			continue;
+		seekdir(dir, *pos);
+		de = readdir(dir);
+		if (de == NULL) { /* should not happen */
+			err = got_error_fmt(GOT_ERR_EOF,
+			    "unexpected EOF on directory '%s' while seeking "
+			    "to entry '%s' at offset '%ld'\n", path_dir,
+			    pe->path, *pos);
+			goto done;
+		}
 
 		err = got_path_dirent_type(&type, path_dir, de);
 		if (err)
 			goto done;
 
-		RB_FOREACH(pe, got_pathlist_head, ignores) {
-			if (type == DT_DIR && pe->path_len > 0 &&
-			    pe->path[pe->path_len - 1] == '/') {
-				char stripped[PATH_MAX];
-
-				if (strlcpy(stripped, pe->path,
-				    sizeof(stripped)) >= sizeof(stripped)) {
-					err = got_error(GOT_ERR_NO_SPACE);
-					goto done;
-				}
-				got_path_strip_trailing_slashes(stripped);
-				if (fnmatch(stripped, de->d_name, 0) == 0) {
-					ignore = 1;
-					break;
-				}
-			} else if (fnmatch(pe->path, de->d_name, 0) == 0) {
-				ignore = 1;
-				break;
-			}
-		}
+		err = match_ignores(&ignore, de->d_name, type, ignores);
 		if (ignore)
 			continue;
 
@@ -2447,6 +2521,7 @@ write_tree(struct got_object_id **new_tree_id, const c
 done:
 	if (dir)
 		closedir(dir);
+	got_pathlist_free(&dirents, GOT_PATHLIST_FREE_ALL);
 	got_pathlist_free(&paths, GOT_PATHLIST_FREE_NONE);
 	return err;
 }
blob - b1a6fa4f2a8b5e436b7e88a234aec2a2d44796b8
blob + 3baa6e847545365463b84422ebbc3e2212274bdc
--- regress/cmdline/import.sh
+++ regress/cmdline/import.sh
@@ -40,8 +40,8 @@ test_import_basic() {
 	fi
 
 	local head_commit=`git_show_head $testroot/repo`
-	echo "A  $testroot/tree/gamma/delta" > $testroot/stdout.expected
-	echo "A  $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected
+	echo "A  $testroot/tree/epsilon/zeta" > $testroot/stdout.expected
+	echo "A  $testroot/tree/gamma/delta" >> $testroot/stdout.expected
 	echo "A  $testroot/tree/alpha" >> $testroot/stdout.expected
 	echo "A  $testroot/tree/beta" >> $testroot/stdout.expected
 	echo "Created branch refs/heads/main with commit $head_commit" \
@@ -165,8 +165,8 @@ test_import_specified_head() {
 	fi
 
 	local head_commit=`git_show_head $testroot/repo`
-	echo "A  $testroot/tree/gamma/delta" > $testroot/stdout.expected
-	echo "A  $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected
+	echo "A  $testroot/tree/epsilon/zeta" > $testroot/stdout.expected
+	echo "A  $testroot/tree/gamma/delta" >> $testroot/stdout.expected
 	echo "A  $testroot/tree/alpha" >> $testroot/stdout.expected
 	echo "A  $testroot/tree/beta" >> $testroot/stdout.expected
 	echo "Created branch refs/heads/$headref with commit $head_commit" \
@@ -324,8 +324,8 @@ test_import_detached_head() {
 
 	local main_commit=`git -C $testroot/repo show-ref main | \
 	    cut -d ' ' -f 1`
-	echo "A  $testroot/import/gamma/delta" > $testroot/stdout.expected
-	echo "A  $testroot/import/epsilon/zeta" >> $testroot/stdout.expected
+	echo "A  $testroot/import/epsilon/zeta" > $testroot/stdout.expected
+	echo "A  $testroot/import/gamma/delta" >> $testroot/stdout.expected
 	echo "A  $testroot/import/alpha" >> $testroot/stdout.expected
 	echo "A  $testroot/import/beta" >> $testroot/stdout.expected
 	echo "Created branch refs/heads/main with commit $main_commit" \