From: Stefan Sperling Subject: fix 'got import' output sorting issue To: gameoftrees@openbsd.org Date: Fri, 3 Jan 2025 16:11:56 +0100 While running the -portable regression tests on Debian I noticed failures related to path sorting in the output of 'got import': @@ -1,5 +1,5 @@ -A /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta A /tmp/got-test-import_basic-QbjC1yFQpw/tree/epsilon/zeta +A /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta A /tmp/got-test-import_basic-QbjC1yFQpw/tree/alpha A /tmp/got-test-import_basic-QbjC1yFQpw/tree/beta We currently process directory entries in readdir() order, which is not guaranteed to match the order of got_path_cmp(). The patch below lets 'got import' traverse entries in got_path_cmp() order and produces the same test failures on OpenBSD. Adjust test expectations accordingly. ok? make 'got import' output independent of readdir(3) entry order M lib/repository.c | 100+ 25- M regress/cmdline/import.sh | 6+ 6- 2 files changed, 106 insertions(+), 31 deletions(-) commit - 3fd8274f36c449b4db01aa4f5e6ab9709facd8f3 commit + adb4fd4412553b1d3fc59eba2fc3343e5fa436d7 blob - f1c0ce0284eb71c8b01a826d9a452c4324bbb32d blob + 169d68adf1edeb5a8126017d8c850731d2689ad6 --- lib/repository.c +++ lib/repository.c @@ -2284,6 +2284,41 @@ done: } static const struct got_error * +insert_dir_entry(struct dirent *de, long pos, + struct got_pathlist_head *dirents) +{ + const struct got_error *err = NULL; + struct got_pathlist_entry *new_pe; + char *d_name; + long *ppos; + + d_name = strdup(de->d_name); + if (d_name == NULL) + return got_error_from_errno("strdup"); + + /* Record the offset to this dirent in struct DIR as path data. */ + ppos = malloc(sizeof(*ppos)); + if (ppos == NULL) { + err = got_error_from_errno("malloc"); + goto done; + } + *ppos = pos; + + err = got_pathlist_insert(&new_pe, dirents, d_name, ppos); + if (err) + goto done; + + if (new_pe == NULL) + err = got_error(GOT_ERR_TREE_DUP_ENTRY); +done: + if (err) { + free(d_name); + free(ppos); + } + return err; +} + +static const struct got_error * insert_tree_entry(struct got_tree_entry *new_te, struct got_pathlist_head *paths) { @@ -2342,6 +2377,37 @@ done: } static const struct got_error * +match_ignores(int *ignore, char *d_name, int type, + struct got_pathlist_head *ignores) +{ + struct got_pathlist_entry *pe; + + *ignore = 0; + + RB_FOREACH(pe, got_pathlist_head, ignores) { + if (type == DT_DIR && pe->path_len > 0 && + pe->path[pe->path_len - 1] == '/') { + char stripped[PATH_MAX]; + + if (strlcpy(stripped, pe->path, + sizeof(stripped)) >= sizeof(stripped)) + return got_error(GOT_ERR_NO_SPACE); + + got_path_strip_trailing_slashes(stripped); + if (fnmatch(stripped, d_name, 0) == 0) { + *ignore = 1; + break; + } + } else if (fnmatch(pe->path, d_name, 0) == 0) { + *ignore = 1; + break; + } + } + + return NULL; +} + +static const struct got_error * write_tree(struct got_object_id **new_tree_id, const char *path_dir, struct got_pathlist_head *ignores, struct got_repository *repo, got_repo_import_cb progress_cb, void *progress_arg) @@ -2351,12 +2417,14 @@ write_tree(struct got_object_id **new_tree_id, const c struct dirent *de; int nentries; struct got_tree_entry *new_te = NULL; - struct got_pathlist_head paths; + struct got_pathlist_head paths, dirents; struct got_pathlist_entry *pe; + long pos; *new_tree_id = NULL; RB_INIT(&paths); + RB_INIT(&dirents); dir = opendir(path_dir); if (dir == NULL) { @@ -2364,39 +2432,45 @@ write_tree(struct got_object_id **new_tree_id, const c goto done; } - nentries = 0; + /* + * Sort dirents into a pathlist. Otherwise subdir-iteration order + * depends on the readdir() function, which varies between different + * operating systems, making regression testing more difficult. + * This smells of TOCTOU, but if dirents appear, disappear, or change + * during import then we have a racy situation no matter what. + */ + pos = telldir(dir); while ((de = readdir(dir)) != NULL) { + if (strcmp(de->d_name, ".") != 0 && + strcmp(de->d_name, "..") != 0) { + err = insert_dir_entry(de, pos, &dirents); + if (err) + goto done; + } + pos = telldir(dir); + } + + nentries = 0; + RB_FOREACH(pe, got_pathlist_head, &dirents) { int ignore = 0; int type; + long *pos = pe->data; - if (strcmp(de->d_name, ".") == 0 || - strcmp(de->d_name, "..") == 0) - continue; + seekdir(dir, *pos); + de = readdir(dir); + if (de == NULL) { /* should not happen */ + err = got_error_fmt(GOT_ERR_EOF, + "unexpected EOF on directory '%s' while seeking " + "to entry '%s' at offset '%ld'\n", path_dir, + pe->path, *pos); + goto done; + } err = got_path_dirent_type(&type, path_dir, de); if (err) goto done; - RB_FOREACH(pe, got_pathlist_head, ignores) { - if (type == DT_DIR && pe->path_len > 0 && - pe->path[pe->path_len - 1] == '/') { - char stripped[PATH_MAX]; - - if (strlcpy(stripped, pe->path, - sizeof(stripped)) >= sizeof(stripped)) { - err = got_error(GOT_ERR_NO_SPACE); - goto done; - } - got_path_strip_trailing_slashes(stripped); - if (fnmatch(stripped, de->d_name, 0) == 0) { - ignore = 1; - break; - } - } else if (fnmatch(pe->path, de->d_name, 0) == 0) { - ignore = 1; - break; - } - } + err = match_ignores(&ignore, de->d_name, type, ignores); if (ignore) continue; @@ -2447,6 +2521,7 @@ write_tree(struct got_object_id **new_tree_id, const c done: if (dir) closedir(dir); + got_pathlist_free(&dirents, GOT_PATHLIST_FREE_ALL); got_pathlist_free(&paths, GOT_PATHLIST_FREE_NONE); return err; } blob - b1a6fa4f2a8b5e436b7e88a234aec2a2d44796b8 blob + 3baa6e847545365463b84422ebbc3e2212274bdc --- regress/cmdline/import.sh +++ regress/cmdline/import.sh @@ -40,8 +40,8 @@ test_import_basic() { fi local head_commit=`git_show_head $testroot/repo` - echo "A $testroot/tree/gamma/delta" > $testroot/stdout.expected - echo "A $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected + echo "A $testroot/tree/epsilon/zeta" > $testroot/stdout.expected + echo "A $testroot/tree/gamma/delta" >> $testroot/stdout.expected echo "A $testroot/tree/alpha" >> $testroot/stdout.expected echo "A $testroot/tree/beta" >> $testroot/stdout.expected echo "Created branch refs/heads/main with commit $head_commit" \ @@ -165,8 +165,8 @@ test_import_specified_head() { fi local head_commit=`git_show_head $testroot/repo` - echo "A $testroot/tree/gamma/delta" > $testroot/stdout.expected - echo "A $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected + echo "A $testroot/tree/epsilon/zeta" > $testroot/stdout.expected + echo "A $testroot/tree/gamma/delta" >> $testroot/stdout.expected echo "A $testroot/tree/alpha" >> $testroot/stdout.expected echo "A $testroot/tree/beta" >> $testroot/stdout.expected echo "Created branch refs/heads/$headref with commit $head_commit" \ @@ -324,8 +324,8 @@ test_import_detached_head() { local main_commit=`git -C $testroot/repo show-ref main | \ cut -d ' ' -f 1` - echo "A $testroot/import/gamma/delta" > $testroot/stdout.expected - echo "A $testroot/import/epsilon/zeta" >> $testroot/stdout.expected + echo "A $testroot/import/epsilon/zeta" > $testroot/stdout.expected + echo "A $testroot/import/gamma/delta" >> $testroot/stdout.expected echo "A $testroot/import/alpha" >> $testroot/stdout.expected echo "A $testroot/import/beta" >> $testroot/stdout.expected echo "Created branch refs/heads/main with commit $main_commit" \