Download raw body.
fix 'got import' output sorting issue
Stefan Sperling <stsp@stsp.name> wrote:
> While running the -portable regression tests on Debian I noticed
> failures related to path sorting in the output of 'got import':
>
> @@ -1,5 +1,5 @@
> -A /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta
> A /tmp/got-test-import_basic-QbjC1yFQpw/tree/epsilon/zeta
> +A /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta
> A /tmp/got-test-import_basic-QbjC1yFQpw/tree/alpha
> A /tmp/got-test-import_basic-QbjC1yFQpw/tree/beta
>
> We currently process directory entries in readdir() order, which is
> not guaranteed to match the order of got_path_cmp(). The patch below
> lets 'got import' traverse entries in got_path_cmp() order and produces
> the same test failures on OpenBSD. Adjust test expectations accordingly.
>
> ok?
Yes, makes sense and reads good.
ok with one question: should we check the telldir(3) calls for failure?
POSIX.1-2024 specifies no errors are defined, but telldir(3) on OpenBSD
and Alpine specify that on failure, -1 is returned and errno is set;
and OpenBSD also states that telldir() can fail for any of the errors
specified for realloc(3).
> make 'got import' output independent of readdir(3) entry order
>
> M lib/repository.c | 100+ 25-
> M regress/cmdline/import.sh | 6+ 6-
>
> 2 files changed, 106 insertions(+), 31 deletions(-)
>
> commit - 3fd8274f36c449b4db01aa4f5e6ab9709facd8f3
> commit + adb4fd4412553b1d3fc59eba2fc3343e5fa436d7
> blob - f1c0ce0284eb71c8b01a826d9a452c4324bbb32d
> blob + 169d68adf1edeb5a8126017d8c850731d2689ad6
> --- lib/repository.c
> +++ lib/repository.c
> @@ -2284,6 +2284,41 @@ done:
> }
>
> static const struct got_error *
> +insert_dir_entry(struct dirent *de, long pos,
> + struct got_pathlist_head *dirents)
> +{
> + const struct got_error *err = NULL;
> + struct got_pathlist_entry *new_pe;
> + char *d_name;
> + long *ppos;
> +
> + d_name = strdup(de->d_name);
> + if (d_name == NULL)
> + return got_error_from_errno("strdup");
> +
> + /* Record the offset to this dirent in struct DIR as path data. */
> + ppos = malloc(sizeof(*ppos));
> + if (ppos == NULL) {
> + err = got_error_from_errno("malloc");
> + goto done;
> + }
> + *ppos = pos;
> +
> + err = got_pathlist_insert(&new_pe, dirents, d_name, ppos);
> + if (err)
> + goto done;
> +
> + if (new_pe == NULL)
> + err = got_error(GOT_ERR_TREE_DUP_ENTRY);
> +done:
> + if (err) {
> + free(d_name);
> + free(ppos);
> + }
> + return err;
> +}
> +
> +static const struct got_error *
> insert_tree_entry(struct got_tree_entry *new_te,
> struct got_pathlist_head *paths)
> {
> @@ -2342,6 +2377,37 @@ done:
> }
>
> static const struct got_error *
> +match_ignores(int *ignore, char *d_name, int type,
> + struct got_pathlist_head *ignores)
> +{
> + struct got_pathlist_entry *pe;
> +
> + *ignore = 0;
> +
> + RB_FOREACH(pe, got_pathlist_head, ignores) {
> + if (type == DT_DIR && pe->path_len > 0 &&
> + pe->path[pe->path_len - 1] == '/') {
> + char stripped[PATH_MAX];
> +
> + if (strlcpy(stripped, pe->path,
> + sizeof(stripped)) >= sizeof(stripped))
> + return got_error(GOT_ERR_NO_SPACE);
> +
> + got_path_strip_trailing_slashes(stripped);
> + if (fnmatch(stripped, d_name, 0) == 0) {
> + *ignore = 1;
> + break;
> + }
> + } else if (fnmatch(pe->path, d_name, 0) == 0) {
> + *ignore = 1;
> + break;
> + }
> + }
> +
> + return NULL;
> +}
> +
> +static const struct got_error *
> write_tree(struct got_object_id **new_tree_id, const char *path_dir,
> struct got_pathlist_head *ignores, struct got_repository *repo,
> got_repo_import_cb progress_cb, void *progress_arg)
> @@ -2351,12 +2417,14 @@ write_tree(struct got_object_id **new_tree_id, const c
> struct dirent *de;
> int nentries;
> struct got_tree_entry *new_te = NULL;
> - struct got_pathlist_head paths;
> + struct got_pathlist_head paths, dirents;
> struct got_pathlist_entry *pe;
> + long pos;
>
> *new_tree_id = NULL;
>
> RB_INIT(&paths);
> + RB_INIT(&dirents);
>
> dir = opendir(path_dir);
> if (dir == NULL) {
> @@ -2364,39 +2432,45 @@ write_tree(struct got_object_id **new_tree_id, const c
> goto done;
> }
>
> - nentries = 0;
> + /*
> + * Sort dirents into a pathlist. Otherwise subdir-iteration order
> + * depends on the readdir() function, which varies between different
> + * operating systems, making regression testing more difficult.
> + * This smells of TOCTOU, but if dirents appear, disappear, or change
> + * during import then we have a racy situation no matter what.
> + */
> + pos = telldir(dir);
> while ((de = readdir(dir)) != NULL) {
> + if (strcmp(de->d_name, ".") != 0 &&
> + strcmp(de->d_name, "..") != 0) {
> + err = insert_dir_entry(de, pos, &dirents);
> + if (err)
> + goto done;
> + }
> + pos = telldir(dir);
> + }
> +
> + nentries = 0;
> + RB_FOREACH(pe, got_pathlist_head, &dirents) {
> int ignore = 0;
> int type;
> + long *pos = pe->data;
>
> - if (strcmp(de->d_name, ".") == 0 ||
> - strcmp(de->d_name, "..") == 0)
> - continue;
> + seekdir(dir, *pos);
> + de = readdir(dir);
> + if (de == NULL) { /* should not happen */
> + err = got_error_fmt(GOT_ERR_EOF,
> + "unexpected EOF on directory '%s' while seeking "
> + "to entry '%s' at offset '%ld'\n", path_dir,
> + pe->path, *pos);
> + goto done;
> + }
>
> err = got_path_dirent_type(&type, path_dir, de);
> if (err)
> goto done;
>
> - RB_FOREACH(pe, got_pathlist_head, ignores) {
> - if (type == DT_DIR && pe->path_len > 0 &&
> - pe->path[pe->path_len - 1] == '/') {
> - char stripped[PATH_MAX];
> -
> - if (strlcpy(stripped, pe->path,
> - sizeof(stripped)) >= sizeof(stripped)) {
> - err = got_error(GOT_ERR_NO_SPACE);
> - goto done;
> - }
> - got_path_strip_trailing_slashes(stripped);
> - if (fnmatch(stripped, de->d_name, 0) == 0) {
> - ignore = 1;
> - break;
> - }
> - } else if (fnmatch(pe->path, de->d_name, 0) == 0) {
> - ignore = 1;
> - break;
> - }
> - }
> + err = match_ignores(&ignore, de->d_name, type, ignores);
> if (ignore)
> continue;
>
> @@ -2447,6 +2521,7 @@ write_tree(struct got_object_id **new_tree_id, const c
> done:
> if (dir)
> closedir(dir);
> + got_pathlist_free(&dirents, GOT_PATHLIST_FREE_ALL);
> got_pathlist_free(&paths, GOT_PATHLIST_FREE_NONE);
> return err;
> }
> blob - b1a6fa4f2a8b5e436b7e88a234aec2a2d44796b8
> blob + 3baa6e847545365463b84422ebbc3e2212274bdc
> --- regress/cmdline/import.sh
> +++ regress/cmdline/import.sh
> @@ -40,8 +40,8 @@ test_import_basic() {
> fi
>
> local head_commit=`git_show_head $testroot/repo`
> - echo "A $testroot/tree/gamma/delta" > $testroot/stdout.expected
> - echo "A $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected
> + echo "A $testroot/tree/epsilon/zeta" > $testroot/stdout.expected
> + echo "A $testroot/tree/gamma/delta" >> $testroot/stdout.expected
> echo "A $testroot/tree/alpha" >> $testroot/stdout.expected
> echo "A $testroot/tree/beta" >> $testroot/stdout.expected
> echo "Created branch refs/heads/main with commit $head_commit" \
> @@ -165,8 +165,8 @@ test_import_specified_head() {
> fi
>
> local head_commit=`git_show_head $testroot/repo`
> - echo "A $testroot/tree/gamma/delta" > $testroot/stdout.expected
> - echo "A $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected
> + echo "A $testroot/tree/epsilon/zeta" > $testroot/stdout.expected
> + echo "A $testroot/tree/gamma/delta" >> $testroot/stdout.expected
> echo "A $testroot/tree/alpha" >> $testroot/stdout.expected
> echo "A $testroot/tree/beta" >> $testroot/stdout.expected
> echo "Created branch refs/heads/$headref with commit $head_commit" \
> @@ -324,8 +324,8 @@ test_import_detached_head() {
>
> local main_commit=`git -C $testroot/repo show-ref main | \
> cut -d ' ' -f 1`
> - echo "A $testroot/import/gamma/delta" > $testroot/stdout.expected
> - echo "A $testroot/import/epsilon/zeta" >> $testroot/stdout.expected
> + echo "A $testroot/import/epsilon/zeta" > $testroot/stdout.expected
> + echo "A $testroot/import/gamma/delta" >> $testroot/stdout.expected
> echo "A $testroot/import/alpha" >> $testroot/stdout.expected
> echo "A $testroot/import/beta" >> $testroot/stdout.expected
> echo "Created branch refs/heads/main with commit $main_commit" \
--
Mark Jamsek <https://bsdbox.org>
GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68
fix 'got import' output sorting issue