Download raw body.
fix 'got import' output sorting issue
Stefan Sperling <stsp@stsp.name> wrote: > While running the -portable regression tests on Debian I noticed > failures related to path sorting in the output of 'got import': > > @@ -1,5 +1,5 @@ > -A /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta > A /tmp/got-test-import_basic-QbjC1yFQpw/tree/epsilon/zeta > +A /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta > A /tmp/got-test-import_basic-QbjC1yFQpw/tree/alpha > A /tmp/got-test-import_basic-QbjC1yFQpw/tree/beta > > We currently process directory entries in readdir() order, which is > not guaranteed to match the order of got_path_cmp(). The patch below > lets 'got import' traverse entries in got_path_cmp() order and produces > the same test failures on OpenBSD. Adjust test expectations accordingly. > > ok? Yes, makes sense and reads good. ok with one question: should we check the telldir(3) calls for failure? POSIX.1-2024 specifies no errors are defined, but telldir(3) on OpenBSD and Alpine specify that on failure, -1 is returned and errno is set; and OpenBSD also states that telldir() can fail for any of the errors specified for realloc(3). > make 'got import' output independent of readdir(3) entry order > > M lib/repository.c | 100+ 25- > M regress/cmdline/import.sh | 6+ 6- > > 2 files changed, 106 insertions(+), 31 deletions(-) > > commit - 3fd8274f36c449b4db01aa4f5e6ab9709facd8f3 > commit + adb4fd4412553b1d3fc59eba2fc3343e5fa436d7 > blob - f1c0ce0284eb71c8b01a826d9a452c4324bbb32d > blob + 169d68adf1edeb5a8126017d8c850731d2689ad6 > --- lib/repository.c > +++ lib/repository.c > @@ -2284,6 +2284,41 @@ done: > } > > static const struct got_error * > +insert_dir_entry(struct dirent *de, long pos, > + struct got_pathlist_head *dirents) > +{ > + const struct got_error *err = NULL; > + struct got_pathlist_entry *new_pe; > + char *d_name; > + long *ppos; > + > + d_name = strdup(de->d_name); > + if (d_name == NULL) > + return got_error_from_errno("strdup"); > + > + /* Record the offset to this dirent in struct DIR as path data. */ > + ppos = malloc(sizeof(*ppos)); > + if (ppos == NULL) { > + err = got_error_from_errno("malloc"); > + goto done; > + } > + *ppos = pos; > + > + err = got_pathlist_insert(&new_pe, dirents, d_name, ppos); > + if (err) > + goto done; > + > + if (new_pe == NULL) > + err = got_error(GOT_ERR_TREE_DUP_ENTRY); > +done: > + if (err) { > + free(d_name); > + free(ppos); > + } > + return err; > +} > + > +static const struct got_error * > insert_tree_entry(struct got_tree_entry *new_te, > struct got_pathlist_head *paths) > { > @@ -2342,6 +2377,37 @@ done: > } > > static const struct got_error * > +match_ignores(int *ignore, char *d_name, int type, > + struct got_pathlist_head *ignores) > +{ > + struct got_pathlist_entry *pe; > + > + *ignore = 0; > + > + RB_FOREACH(pe, got_pathlist_head, ignores) { > + if (type == DT_DIR && pe->path_len > 0 && > + pe->path[pe->path_len - 1] == '/') { > + char stripped[PATH_MAX]; > + > + if (strlcpy(stripped, pe->path, > + sizeof(stripped)) >= sizeof(stripped)) > + return got_error(GOT_ERR_NO_SPACE); > + > + got_path_strip_trailing_slashes(stripped); > + if (fnmatch(stripped, d_name, 0) == 0) { > + *ignore = 1; > + break; > + } > + } else if (fnmatch(pe->path, d_name, 0) == 0) { > + *ignore = 1; > + break; > + } > + } > + > + return NULL; > +} > + > +static const struct got_error * > write_tree(struct got_object_id **new_tree_id, const char *path_dir, > struct got_pathlist_head *ignores, struct got_repository *repo, > got_repo_import_cb progress_cb, void *progress_arg) > @@ -2351,12 +2417,14 @@ write_tree(struct got_object_id **new_tree_id, const c > struct dirent *de; > int nentries; > struct got_tree_entry *new_te = NULL; > - struct got_pathlist_head paths; > + struct got_pathlist_head paths, dirents; > struct got_pathlist_entry *pe; > + long pos; > > *new_tree_id = NULL; > > RB_INIT(&paths); > + RB_INIT(&dirents); > > dir = opendir(path_dir); > if (dir == NULL) { > @@ -2364,39 +2432,45 @@ write_tree(struct got_object_id **new_tree_id, const c > goto done; > } > > - nentries = 0; > + /* > + * Sort dirents into a pathlist. Otherwise subdir-iteration order > + * depends on the readdir() function, which varies between different > + * operating systems, making regression testing more difficult. > + * This smells of TOCTOU, but if dirents appear, disappear, or change > + * during import then we have a racy situation no matter what. > + */ > + pos = telldir(dir); > while ((de = readdir(dir)) != NULL) { > + if (strcmp(de->d_name, ".") != 0 && > + strcmp(de->d_name, "..") != 0) { > + err = insert_dir_entry(de, pos, &dirents); > + if (err) > + goto done; > + } > + pos = telldir(dir); > + } > + > + nentries = 0; > + RB_FOREACH(pe, got_pathlist_head, &dirents) { > int ignore = 0; > int type; > + long *pos = pe->data; > > - if (strcmp(de->d_name, ".") == 0 || > - strcmp(de->d_name, "..") == 0) > - continue; > + seekdir(dir, *pos); > + de = readdir(dir); > + if (de == NULL) { /* should not happen */ > + err = got_error_fmt(GOT_ERR_EOF, > + "unexpected EOF on directory '%s' while seeking " > + "to entry '%s' at offset '%ld'\n", path_dir, > + pe->path, *pos); > + goto done; > + } > > err = got_path_dirent_type(&type, path_dir, de); > if (err) > goto done; > > - RB_FOREACH(pe, got_pathlist_head, ignores) { > - if (type == DT_DIR && pe->path_len > 0 && > - pe->path[pe->path_len - 1] == '/') { > - char stripped[PATH_MAX]; > - > - if (strlcpy(stripped, pe->path, > - sizeof(stripped)) >= sizeof(stripped)) { > - err = got_error(GOT_ERR_NO_SPACE); > - goto done; > - } > - got_path_strip_trailing_slashes(stripped); > - if (fnmatch(stripped, de->d_name, 0) == 0) { > - ignore = 1; > - break; > - } > - } else if (fnmatch(pe->path, de->d_name, 0) == 0) { > - ignore = 1; > - break; > - } > - } > + err = match_ignores(&ignore, de->d_name, type, ignores); > if (ignore) > continue; > > @@ -2447,6 +2521,7 @@ write_tree(struct got_object_id **new_tree_id, const c > done: > if (dir) > closedir(dir); > + got_pathlist_free(&dirents, GOT_PATHLIST_FREE_ALL); > got_pathlist_free(&paths, GOT_PATHLIST_FREE_NONE); > return err; > } > blob - b1a6fa4f2a8b5e436b7e88a234aec2a2d44796b8 > blob + 3baa6e847545365463b84422ebbc3e2212274bdc > --- regress/cmdline/import.sh > +++ regress/cmdline/import.sh > @@ -40,8 +40,8 @@ test_import_basic() { > fi > > local head_commit=`git_show_head $testroot/repo` > - echo "A $testroot/tree/gamma/delta" > $testroot/stdout.expected > - echo "A $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected > + echo "A $testroot/tree/epsilon/zeta" > $testroot/stdout.expected > + echo "A $testroot/tree/gamma/delta" >> $testroot/stdout.expected > echo "A $testroot/tree/alpha" >> $testroot/stdout.expected > echo "A $testroot/tree/beta" >> $testroot/stdout.expected > echo "Created branch refs/heads/main with commit $head_commit" \ > @@ -165,8 +165,8 @@ test_import_specified_head() { > fi > > local head_commit=`git_show_head $testroot/repo` > - echo "A $testroot/tree/gamma/delta" > $testroot/stdout.expected > - echo "A $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected > + echo "A $testroot/tree/epsilon/zeta" > $testroot/stdout.expected > + echo "A $testroot/tree/gamma/delta" >> $testroot/stdout.expected > echo "A $testroot/tree/alpha" >> $testroot/stdout.expected > echo "A $testroot/tree/beta" >> $testroot/stdout.expected > echo "Created branch refs/heads/$headref with commit $head_commit" \ > @@ -324,8 +324,8 @@ test_import_detached_head() { > > local main_commit=`git -C $testroot/repo show-ref main | \ > cut -d ' ' -f 1` > - echo "A $testroot/import/gamma/delta" > $testroot/stdout.expected > - echo "A $testroot/import/epsilon/zeta" >> $testroot/stdout.expected > + echo "A $testroot/import/epsilon/zeta" > $testroot/stdout.expected > + echo "A $testroot/import/gamma/delta" >> $testroot/stdout.expected > echo "A $testroot/import/alpha" >> $testroot/stdout.expected > echo "A $testroot/import/beta" >> $testroot/stdout.expected > echo "Created branch refs/heads/main with commit $main_commit" \ -- Mark Jamsek <https://bsdbox.org> GPG: F2FF 13DE 6A06 C471 CA80 E6E2 2930 DC66 86EE CF68
fix 'got import' output sorting issue