From: Stefan Sperling Subject: Re: capsicum work: add fd field to got_repository, change got_packidx To: Yang Zhong Cc: gameoftrees@openbsd.org Date: Mon, 14 Dec 2020 20:15:28 +0100 On Mon, Dec 14, 2020 at 10:31:58AM -0800, Yang Zhong wrote: > Here is more work on the Capsicum adaptation. Like the previous patch, > the attached diff includes two changes: one adds an fd field to the > got_repository struct, and the other makes use of it. Here, I changed > got_packidx_open() and related functions to use fds. > > A consequence of these changes is that the path_packidx field of the > got_packidx struct is now stored relative to the repo's pack directory. > The field is only used in a few places, and looking at the code it > seems like this change doesn't present any issues. I've also tested > these changes with 'make regress' and 'make regress GOT_TEST_PACK=1'. Looks good overall. I have mostly cosmetic suggestions. Comments inline: > diff --git a/include/got_repository.h b/include/got_repository.h > index 9e816e24..b070948c 100644 > --- a/include/got_repository.h > +++ b/include/got_repository.h > @@ -32,6 +32,9 @@ const char *got_repo_get_path(struct got_repository *); > */ > const char *got_repo_get_path_git_dir(struct got_repository *); > > +/* Obtain the file descriptor of the repository's .git directory. */ > +int got_repo_get_path_git_dir_fd(struct got_repository *); > + Perhaps try to find a slightly shorter name? Given that this has nothing to do with a path anymore, I guess we can drop _path_ from this? I would suggest: int got_repo_get_gitdir_fd(struct got_repository *); Or maybe even this, if you prefer: int got_repo_get_fd(struct got_repository *); > /* Obtain the commit author name if parsed from gitconfig, else NULL. */ > const char *got_repo_get_gitconfig_author_name(struct got_repository *); > > diff --git a/lib/got_lib_pack.h b/lib/got_lib_pack.h > index c56e16c4..39aa4333 100644 > --- a/lib/got_lib_pack.h > +++ b/lib/got_lib_pack.h > @@ -168,7 +168,7 @@ struct got_packfile_obj_data { > > const struct got_error *got_packidx_init_hdr(struct got_packidx *, int); > const struct got_error *got_packidx_open(struct got_packidx **, > - const char *, int); > + int, const char *, int); > const struct got_error *got_packidx_close(struct got_packidx *); > int got_packidx_get_object_idx(struct got_packidx *, struct got_object_id *); > const struct got_error *got_packidx_match_id_str_prefix( > diff --git a/lib/got_lib_repository.h b/lib/got_lib_repository.h > index d2a6b78c..a60ae530 100644 > --- a/lib/got_lib_repository.h > +++ b/lib/got_lib_repository.h > @@ -34,6 +34,7 @@ > struct got_repository { > char *path; > char *path_git_dir; > + int path_git_dir_fd; This could be called; int gitdir_fd; I think this would align well with the name 'packdir_fd' which you have chosen to use elsewhere. Or we could align it with struct got_worktree, like this: int root_fd; Alternatively, I'd also be fine with just: int fd; If we go for this version, we could later on also rename 'root_fd' to just 'fd' in struct got_worktree. > /* The pack index cache speeds up search for packed objects. */ > struct got_packidx *packidx_cache[GOT_PACKIDX_CACHE_SIZE]; > diff --git a/lib/pack.c b/lib/pack.c > index 71b5e073..f2356c7f 100644 > --- a/lib/pack.c > +++ b/lib/pack.c > @@ -328,7 +328,8 @@ done: > } > > const struct got_error * > -got_packidx_open(struct got_packidx **packidx, const char *path, int verify) > +got_packidx_open(struct got_packidx **packidx, > + int packdir_fd, const char *path, int verify) > { Please also rename the 'path' parameter to 'relpath' to clearly convey the intention that this path is a relative one. > const struct got_error *err = NULL; > struct got_packidx *p; > @@ -340,9 +341,9 @@ got_packidx_open(struct got_packidx **packidx, const char *path, int verify) > if (p == NULL) > return got_error_from_errno("calloc"); > > - p->fd = open(path, O_RDONLY | O_NOFOLLOW); > + p->fd = openat(packdir_fd, path, O_RDONLY | O_NOFOLLOW); > if (p->fd == -1) { > - err = got_error_from_errno2("open", path); > + err = got_error_from_errno2("openat", path); > free(p); > return err; > } > diff --git a/lib/repository.c b/lib/repository.c > index 4c8c8ac6..df606169 100644 > --- a/lib/repository.c > +++ b/lib/repository.c > @@ -77,6 +77,12 @@ got_repo_get_path_git_dir(struct got_repository *repo) > return repo->path_git_dir; > } > > +int > +got_repo_get_path_git_dir_fd(struct got_repository *repo) > +{ > + return repo->path_git_dir_fd; > +} > + > const char * > got_repo_get_gitconfig_author_name(struct got_repository *repo) > { > @@ -333,6 +339,8 @@ open_repo(struct got_repository *repo, const char *path) > { > const struct got_error *err = NULL; > > + repo->path_git_dir_fd = -1; > + > /* bare git repository? */ > repo->path_git_dir = strdup(path); > if (repo->path_git_dir == NULL) > @@ -343,6 +351,11 @@ open_repo(struct got_repository *repo, const char *path) > err = got_error_from_errno("strdup"); > goto done; > } > + repo->path_git_dir_fd = open(repo->path_git_dir, O_DIRECTORY); > + if (repo->path_git_dir_fd == -1) { > + err = got_error_from_errno("open"); > + goto done; > + } > return NULL; > } > > @@ -359,6 +372,11 @@ open_repo(struct got_repository *repo, const char *path) > err = got_error_from_errno("strdup"); > goto done; > } > + repo->path_git_dir_fd = open(repo->path_git_dir, O_DIRECTORY); > + if (repo->path_git_dir_fd == -1) { > + err = got_error_from_errno("open"); > + goto done; > + } > return NULL; > } > > @@ -369,6 +387,10 @@ done: > repo->path = NULL; > free(repo->path_git_dir); > repo->path_git_dir = NULL; > + if (repo->path_git_dir_fd != -1) > + close(repo->path_git_dir_fd); > + repo->path_git_dir_fd = -1; > + > } > return err; > } > @@ -916,11 +938,11 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx, > struct got_repository *repo, struct got_object_id *id) > { > const struct got_error *err; > - char *path_packdir; > - DIR *packdir; > + DIR *packdir = NULL; > struct dirent *dent; > char *path_packidx; > size_t i; > + int packdir_fd; > > /* Search pack index cache. */ > for (i = 0; i < nitems(repo->packidx_cache); i++) { > @@ -944,16 +966,20 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx, > } > /* No luck. Search the filesystem. */ > > - path_packdir = got_repo_get_path_objects_pack(repo); > - if (path_packdir == NULL) > - return got_error_from_errno("got_repo_get_path_objects_pack"); > - > - packdir = opendir(path_packdir); > - if (packdir == NULL) { > + packdir_fd = openat(got_repo_get_path_git_dir_fd(repo), > + GOT_OBJECTS_PACK_DIR, O_DIRECTORY); > + if (packdir_fd == -1) { > if (errno == ENOENT) > err = got_error_no_obj(id); > else > - err = got_error_from_errno2("opendir", path_packdir); > + err = got_error_from_errno2("openat", > + GOT_OBJECTS_PACK_DIR); It's slightly sad that we're losing precision in the error message here, because the path reported in the error message is no longer absolute. Couldn't we keep path_packdir around for this purpose, even if it's just used to generate an error message? > + goto done; > + } > + > + packdir = fdopendir(packdir_fd); > + if (packdir == NULL) { > + err = got_error_from_errno("fdopendir"); > goto done; > } > > @@ -963,9 +989,9 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx, > if (!is_packidx_filename(dent->d_name, dent->d_namlen)) > continue; > > - if (asprintf(&path_packidx, "%s/%s", path_packdir, > - dent->d_name) == -1) { > - err = got_error_from_errno("asprintf"); > + path_packidx = strdup(dent->d_name); Making a deep copy of path_packidx seems like needless work. Couldn't we directly pass dent->d_name to got_packidx_open below? > + if (path_packidx == NULL) { > + err = got_error_from_errno("strdup"); > goto done; > } > > @@ -983,7 +1009,7 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx, > continue; /* already searched */ > } > > - err = got_packidx_open(packidx, path_packidx, 0); > + err = got_packidx_open(packidx, packdir_fd, path_packidx, 0); > if (err) { > free(path_packidx); > goto done; > @@ -1003,7 +1029,6 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx, > > err = got_error_no_obj(id); > done: > - free(path_packdir); > if (packdir && closedir(packdir) != 0 && err == NULL) > err = got_error_from_errno("closedir"); > return err; > @@ -1032,13 +1057,14 @@ read_packfile_hdr(int fd, struct got_packidx *packidx) > } > > static const struct got_error * > -open_packfile(int *fd, const char *path_packfile, struct got_packidx *packidx) > +open_packfile(int *fd, int packdir_fd, > + const char *path_packfile, struct got_packidx *packidx) path_packfile is now a relative path. Since the function name implies that we're opening a packfile, just 'relpath' would work here, too. > { > const struct got_error *err = NULL; > > - *fd = open(path_packfile, O_RDONLY | O_NOFOLLOW); > + *fd = openat(packdir_fd, path_packfile, O_RDONLY | O_NOFOLLOW); > if (*fd == -1) > - return got_error_from_errno2("open", path_packfile); > + return got_error_from_errno2("openat", path_packfile); Here we lose precision in the error message, too. Not sure how to fix that. > if (packidx) { > err = read_packfile_hdr(*fd, packidx); > @@ -1059,6 +1085,7 @@ got_repo_cache_pack(struct got_pack **packp, struct got_repository *repo, > struct got_pack *pack = NULL; > struct stat sb; > size_t i; > + int packdir_fd; > > if (packp) > *packp = NULL; > @@ -1088,7 +1115,10 @@ got_repo_cache_pack(struct got_pack **packp, struct got_repository *repo, > goto done; > } > > - err = open_packfile(&pack->fd, path_packfile, packidx); > + packdir_fd = openat(got_repo_get_path_git_dir_fd(repo), > + GOT_OBJECTS_PACK_DIR, O_DIRECTORY); Missing return value check! Please handle errors: if (packdir_fd == -1) ... > + > + err = open_packfile(&pack->fd, packdir_fd, path_packfile, packidx); > if (err) > goto done; > > @@ -1200,22 +1230,25 @@ match_packed_object(struct got_object_id **unique_id, > struct got_repository *repo, const char *id_str_prefix, int obj_type) > { > const struct got_error *err = NULL; > - char *path_packdir; > - DIR *packdir; > + DIR *packdir = NULL; > struct dirent *dent; > char *path_packidx; > struct got_object_id_queue matched_ids; > + int packdir_fd; > > SIMPLEQ_INIT(&matched_ids); > > - path_packdir = got_repo_get_path_objects_pack(repo); > - if (path_packdir == NULL) > - return got_error_from_errno("got_repo_get_path_objects_pack"); > + packdir_fd = openat(got_repo_get_path_git_dir_fd(repo), > + GOT_OBJECTS_PACK_DIR, O_DIRECTORY); > + if (packdir_fd == -1) { > + if (errno != ENOENT) > + err = got_error_from_errno2("openat", GOT_OBJECTS_PACK_DIR); > + goto done; > + } > > - packdir = opendir(path_packdir); > + packdir = fdopendir(packdir_fd); > if (packdir == NULL) { > - if (errno != ENOENT) > - err = got_error_from_errno2("opendir", path_packdir); > + err = got_error_from_errno("fdopendir"); > goto done; > } > > @@ -1223,17 +1256,16 @@ match_packed_object(struct got_object_id **unique_id, > struct got_packidx *packidx; > struct got_object_qid *qid; > > - > if (!is_packidx_filename(dent->d_name, dent->d_namlen)) > continue; > > - if (asprintf(&path_packidx, "%s/%s", path_packdir, > - dent->d_name) == -1) { > - err = got_error_from_errno("asprintf"); > + path_packidx = strdup(dent->d_name); Again, can we get away with passing dent->d_name directly? > + if (path_packidx == NULL) { > + err = got_error_from_errno("strdup"); > break; > } > > - err = got_packidx_open(&packidx, path_packidx, 0); > + err = got_packidx_open(&packidx, packdir_fd, path_packidx, 0); > free(path_packidx); > if (err) > break; > @@ -1274,7 +1306,6 @@ match_packed_object(struct got_object_id **unique_id, > } > done: > got_object_id_queue_free(&matched_ids); > - free(path_packdir); > if (packdir && closedir(packdir) != 0 && err == NULL) > err = got_error_from_errno("closedir"); > if (err) {