Download raw body.
fix 'got import' output sorting issue
While running the -portable regression tests on Debian I noticed
failures related to path sorting in the output of 'got import':
@@ -1,5 +1,5 @@
-A /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta
A /tmp/got-test-import_basic-QbjC1yFQpw/tree/epsilon/zeta
+A /tmp/got-test-import_basic-QbjC1yFQpw/tree/gamma/delta
A /tmp/got-test-import_basic-QbjC1yFQpw/tree/alpha
A /tmp/got-test-import_basic-QbjC1yFQpw/tree/beta
We currently process directory entries in readdir() order, which is
not guaranteed to match the order of got_path_cmp(). The patch below
lets 'got import' traverse entries in got_path_cmp() order and produces
the same test failures on OpenBSD. Adjust test expectations accordingly.
ok?
make 'got import' output independent of readdir(3) entry order
M lib/repository.c | 100+ 25-
M regress/cmdline/import.sh | 6+ 6-
2 files changed, 106 insertions(+), 31 deletions(-)
commit - 3fd8274f36c449b4db01aa4f5e6ab9709facd8f3
commit + adb4fd4412553b1d3fc59eba2fc3343e5fa436d7
blob - f1c0ce0284eb71c8b01a826d9a452c4324bbb32d
blob + 169d68adf1edeb5a8126017d8c850731d2689ad6
--- lib/repository.c
+++ lib/repository.c
@@ -2284,6 +2284,41 @@ done:
}
static const struct got_error *
+insert_dir_entry(struct dirent *de, long pos,
+ struct got_pathlist_head *dirents)
+{
+ const struct got_error *err = NULL;
+ struct got_pathlist_entry *new_pe;
+ char *d_name;
+ long *ppos;
+
+ d_name = strdup(de->d_name);
+ if (d_name == NULL)
+ return got_error_from_errno("strdup");
+
+ /* Record the offset to this dirent in struct DIR as path data. */
+ ppos = malloc(sizeof(*ppos));
+ if (ppos == NULL) {
+ err = got_error_from_errno("malloc");
+ goto done;
+ }
+ *ppos = pos;
+
+ err = got_pathlist_insert(&new_pe, dirents, d_name, ppos);
+ if (err)
+ goto done;
+
+ if (new_pe == NULL)
+ err = got_error(GOT_ERR_TREE_DUP_ENTRY);
+done:
+ if (err) {
+ free(d_name);
+ free(ppos);
+ }
+ return err;
+}
+
+static const struct got_error *
insert_tree_entry(struct got_tree_entry *new_te,
struct got_pathlist_head *paths)
{
@@ -2342,6 +2377,37 @@ done:
}
static const struct got_error *
+match_ignores(int *ignore, char *d_name, int type,
+ struct got_pathlist_head *ignores)
+{
+ struct got_pathlist_entry *pe;
+
+ *ignore = 0;
+
+ RB_FOREACH(pe, got_pathlist_head, ignores) {
+ if (type == DT_DIR && pe->path_len > 0 &&
+ pe->path[pe->path_len - 1] == '/') {
+ char stripped[PATH_MAX];
+
+ if (strlcpy(stripped, pe->path,
+ sizeof(stripped)) >= sizeof(stripped))
+ return got_error(GOT_ERR_NO_SPACE);
+
+ got_path_strip_trailing_slashes(stripped);
+ if (fnmatch(stripped, d_name, 0) == 0) {
+ *ignore = 1;
+ break;
+ }
+ } else if (fnmatch(pe->path, d_name, 0) == 0) {
+ *ignore = 1;
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+static const struct got_error *
write_tree(struct got_object_id **new_tree_id, const char *path_dir,
struct got_pathlist_head *ignores, struct got_repository *repo,
got_repo_import_cb progress_cb, void *progress_arg)
@@ -2351,12 +2417,14 @@ write_tree(struct got_object_id **new_tree_id, const c
struct dirent *de;
int nentries;
struct got_tree_entry *new_te = NULL;
- struct got_pathlist_head paths;
+ struct got_pathlist_head paths, dirents;
struct got_pathlist_entry *pe;
+ long pos;
*new_tree_id = NULL;
RB_INIT(&paths);
+ RB_INIT(&dirents);
dir = opendir(path_dir);
if (dir == NULL) {
@@ -2364,39 +2432,45 @@ write_tree(struct got_object_id **new_tree_id, const c
goto done;
}
- nentries = 0;
+ /*
+ * Sort dirents into a pathlist. Otherwise subdir-iteration order
+ * depends on the readdir() function, which varies between different
+ * operating systems, making regression testing more difficult.
+ * This smells of TOCTOU, but if dirents appear, disappear, or change
+ * during import then we have a racy situation no matter what.
+ */
+ pos = telldir(dir);
while ((de = readdir(dir)) != NULL) {
+ if (strcmp(de->d_name, ".") != 0 &&
+ strcmp(de->d_name, "..") != 0) {
+ err = insert_dir_entry(de, pos, &dirents);
+ if (err)
+ goto done;
+ }
+ pos = telldir(dir);
+ }
+
+ nentries = 0;
+ RB_FOREACH(pe, got_pathlist_head, &dirents) {
int ignore = 0;
int type;
+ long *pos = pe->data;
- if (strcmp(de->d_name, ".") == 0 ||
- strcmp(de->d_name, "..") == 0)
- continue;
+ seekdir(dir, *pos);
+ de = readdir(dir);
+ if (de == NULL) { /* should not happen */
+ err = got_error_fmt(GOT_ERR_EOF,
+ "unexpected EOF on directory '%s' while seeking "
+ "to entry '%s' at offset '%ld'\n", path_dir,
+ pe->path, *pos);
+ goto done;
+ }
err = got_path_dirent_type(&type, path_dir, de);
if (err)
goto done;
- RB_FOREACH(pe, got_pathlist_head, ignores) {
- if (type == DT_DIR && pe->path_len > 0 &&
- pe->path[pe->path_len - 1] == '/') {
- char stripped[PATH_MAX];
-
- if (strlcpy(stripped, pe->path,
- sizeof(stripped)) >= sizeof(stripped)) {
- err = got_error(GOT_ERR_NO_SPACE);
- goto done;
- }
- got_path_strip_trailing_slashes(stripped);
- if (fnmatch(stripped, de->d_name, 0) == 0) {
- ignore = 1;
- break;
- }
- } else if (fnmatch(pe->path, de->d_name, 0) == 0) {
- ignore = 1;
- break;
- }
- }
+ err = match_ignores(&ignore, de->d_name, type, ignores);
if (ignore)
continue;
@@ -2447,6 +2521,7 @@ write_tree(struct got_object_id **new_tree_id, const c
done:
if (dir)
closedir(dir);
+ got_pathlist_free(&dirents, GOT_PATHLIST_FREE_ALL);
got_pathlist_free(&paths, GOT_PATHLIST_FREE_NONE);
return err;
}
blob - b1a6fa4f2a8b5e436b7e88a234aec2a2d44796b8
blob + 3baa6e847545365463b84422ebbc3e2212274bdc
--- regress/cmdline/import.sh
+++ regress/cmdline/import.sh
@@ -40,8 +40,8 @@ test_import_basic() {
fi
local head_commit=`git_show_head $testroot/repo`
- echo "A $testroot/tree/gamma/delta" > $testroot/stdout.expected
- echo "A $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected
+ echo "A $testroot/tree/epsilon/zeta" > $testroot/stdout.expected
+ echo "A $testroot/tree/gamma/delta" >> $testroot/stdout.expected
echo "A $testroot/tree/alpha" >> $testroot/stdout.expected
echo "A $testroot/tree/beta" >> $testroot/stdout.expected
echo "Created branch refs/heads/main with commit $head_commit" \
@@ -165,8 +165,8 @@ test_import_specified_head() {
fi
local head_commit=`git_show_head $testroot/repo`
- echo "A $testroot/tree/gamma/delta" > $testroot/stdout.expected
- echo "A $testroot/tree/epsilon/zeta" >> $testroot/stdout.expected
+ echo "A $testroot/tree/epsilon/zeta" > $testroot/stdout.expected
+ echo "A $testroot/tree/gamma/delta" >> $testroot/stdout.expected
echo "A $testroot/tree/alpha" >> $testroot/stdout.expected
echo "A $testroot/tree/beta" >> $testroot/stdout.expected
echo "Created branch refs/heads/$headref with commit $head_commit" \
@@ -324,8 +324,8 @@ test_import_detached_head() {
local main_commit=`git -C $testroot/repo show-ref main | \
cut -d ' ' -f 1`
- echo "A $testroot/import/gamma/delta" > $testroot/stdout.expected
- echo "A $testroot/import/epsilon/zeta" >> $testroot/stdout.expected
+ echo "A $testroot/import/epsilon/zeta" > $testroot/stdout.expected
+ echo "A $testroot/import/gamma/delta" >> $testroot/stdout.expected
echo "A $testroot/import/alpha" >> $testroot/stdout.expected
echo "A $testroot/import/beta" >> $testroot/stdout.expected
echo "Created branch refs/heads/main with commit $main_commit" \
fix 'got import' output sorting issue