Download raw body.
capsicum work: add fd field to got_repository, change got_packidx
capsicum work: add fd field to got_repository, change got_packidx
On Mon, Dec 14, 2020 at 10:31:58AM -0800, Yang Zhong wrote:
> Here is more work on the Capsicum adaptation. Like the previous patch,
> the attached diff includes two changes: one adds an fd field to the
> got_repository struct, and the other makes use of it. Here, I changed
> got_packidx_open() and related functions to use fds.
>
> A consequence of these changes is that the path_packidx field of the
> got_packidx struct is now stored relative to the repo's pack directory.
> The field is only used in a few places, and looking at the code it
> seems like this change doesn't present any issues. I've also tested
> these changes with 'make regress' and 'make regress GOT_TEST_PACK=1'.
Looks good overall. I have mostly cosmetic suggestions.
Comments inline:
> diff --git a/include/got_repository.h b/include/got_repository.h
> index 9e816e24..b070948c 100644
> --- a/include/got_repository.h
> +++ b/include/got_repository.h
> @@ -32,6 +32,9 @@ const char *got_repo_get_path(struct got_repository *);
> */
> const char *got_repo_get_path_git_dir(struct got_repository *);
>
> +/* Obtain the file descriptor of the repository's .git directory. */
> +int got_repo_get_path_git_dir_fd(struct got_repository *);
> +
Perhaps try to find a slightly shorter name?
Given that this has nothing to do with a path anymore, I guess we can
drop _path_ from this?
I would suggest:
int got_repo_get_gitdir_fd(struct got_repository *);
Or maybe even this, if you prefer:
int got_repo_get_fd(struct got_repository *);
> /* Obtain the commit author name if parsed from gitconfig, else NULL. */
> const char *got_repo_get_gitconfig_author_name(struct got_repository *);
>
> diff --git a/lib/got_lib_pack.h b/lib/got_lib_pack.h
> index c56e16c4..39aa4333 100644
> --- a/lib/got_lib_pack.h
> +++ b/lib/got_lib_pack.h
> @@ -168,7 +168,7 @@ struct got_packfile_obj_data {
>
> const struct got_error *got_packidx_init_hdr(struct got_packidx *, int);
> const struct got_error *got_packidx_open(struct got_packidx **,
> - const char *, int);
> + int, const char *, int);
> const struct got_error *got_packidx_close(struct got_packidx *);
> int got_packidx_get_object_idx(struct got_packidx *, struct got_object_id *);
> const struct got_error *got_packidx_match_id_str_prefix(
> diff --git a/lib/got_lib_repository.h b/lib/got_lib_repository.h
> index d2a6b78c..a60ae530 100644
> --- a/lib/got_lib_repository.h
> +++ b/lib/got_lib_repository.h
> @@ -34,6 +34,7 @@
> struct got_repository {
> char *path;
> char *path_git_dir;
> + int path_git_dir_fd;
This could be called;
int gitdir_fd;
I think this would align well with the name 'packdir_fd' which you have
chosen to use elsewhere.
Or we could align it with struct got_worktree, like this:
int root_fd;
Alternatively, I'd also be fine with just:
int fd;
If we go for this version, we could later on also rename 'root_fd' to
just 'fd' in struct got_worktree.
> /* The pack index cache speeds up search for packed objects. */
> struct got_packidx *packidx_cache[GOT_PACKIDX_CACHE_SIZE];
> diff --git a/lib/pack.c b/lib/pack.c
> index 71b5e073..f2356c7f 100644
> --- a/lib/pack.c
> +++ b/lib/pack.c
> @@ -328,7 +328,8 @@ done:
> }
>
> const struct got_error *
> -got_packidx_open(struct got_packidx **packidx, const char *path, int verify)
> +got_packidx_open(struct got_packidx **packidx,
> + int packdir_fd, const char *path, int verify)
> {
Please also rename the 'path' parameter to 'relpath' to clearly convey
the intention that this path is a relative one.
> const struct got_error *err = NULL;
> struct got_packidx *p;
> @@ -340,9 +341,9 @@ got_packidx_open(struct got_packidx **packidx, const char *path, int verify)
> if (p == NULL)
> return got_error_from_errno("calloc");
>
> - p->fd = open(path, O_RDONLY | O_NOFOLLOW);
> + p->fd = openat(packdir_fd, path, O_RDONLY | O_NOFOLLOW);
> if (p->fd == -1) {
> - err = got_error_from_errno2("open", path);
> + err = got_error_from_errno2("openat", path);
> free(p);
> return err;
> }
> diff --git a/lib/repository.c b/lib/repository.c
> index 4c8c8ac6..df606169 100644
> --- a/lib/repository.c
> +++ b/lib/repository.c
> @@ -77,6 +77,12 @@ got_repo_get_path_git_dir(struct got_repository *repo)
> return repo->path_git_dir;
> }
>
> +int
> +got_repo_get_path_git_dir_fd(struct got_repository *repo)
> +{
> + return repo->path_git_dir_fd;
> +}
> +
> const char *
> got_repo_get_gitconfig_author_name(struct got_repository *repo)
> {
> @@ -333,6 +339,8 @@ open_repo(struct got_repository *repo, const char *path)
> {
> const struct got_error *err = NULL;
>
> + repo->path_git_dir_fd = -1;
> +
> /* bare git repository? */
> repo->path_git_dir = strdup(path);
> if (repo->path_git_dir == NULL)
> @@ -343,6 +351,11 @@ open_repo(struct got_repository *repo, const char *path)
> err = got_error_from_errno("strdup");
> goto done;
> }
> + repo->path_git_dir_fd = open(repo->path_git_dir, O_DIRECTORY);
> + if (repo->path_git_dir_fd == -1) {
> + err = got_error_from_errno("open");
> + goto done;
> + }
> return NULL;
> }
>
> @@ -359,6 +372,11 @@ open_repo(struct got_repository *repo, const char *path)
> err = got_error_from_errno("strdup");
> goto done;
> }
> + repo->path_git_dir_fd = open(repo->path_git_dir, O_DIRECTORY);
> + if (repo->path_git_dir_fd == -1) {
> + err = got_error_from_errno("open");
> + goto done;
> + }
> return NULL;
> }
>
> @@ -369,6 +387,10 @@ done:
> repo->path = NULL;
> free(repo->path_git_dir);
> repo->path_git_dir = NULL;
> + if (repo->path_git_dir_fd != -1)
> + close(repo->path_git_dir_fd);
> + repo->path_git_dir_fd = -1;
> +
> }
> return err;
> }
> @@ -916,11 +938,11 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx,
> struct got_repository *repo, struct got_object_id *id)
> {
> const struct got_error *err;
> - char *path_packdir;
> - DIR *packdir;
> + DIR *packdir = NULL;
> struct dirent *dent;
> char *path_packidx;
> size_t i;
> + int packdir_fd;
>
> /* Search pack index cache. */
> for (i = 0; i < nitems(repo->packidx_cache); i++) {
> @@ -944,16 +966,20 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx,
> }
> /* No luck. Search the filesystem. */
>
> - path_packdir = got_repo_get_path_objects_pack(repo);
> - if (path_packdir == NULL)
> - return got_error_from_errno("got_repo_get_path_objects_pack");
> -
> - packdir = opendir(path_packdir);
> - if (packdir == NULL) {
> + packdir_fd = openat(got_repo_get_path_git_dir_fd(repo),
> + GOT_OBJECTS_PACK_DIR, O_DIRECTORY);
> + if (packdir_fd == -1) {
> if (errno == ENOENT)
> err = got_error_no_obj(id);
> else
> - err = got_error_from_errno2("opendir", path_packdir);
> + err = got_error_from_errno2("openat",
> + GOT_OBJECTS_PACK_DIR);
It's slightly sad that we're losing precision in the error message here,
because the path reported in the error message is no longer absolute.
Couldn't we keep path_packdir around for this purpose, even if it's just
used to generate an error message?
> + goto done;
> + }
> +
> + packdir = fdopendir(packdir_fd);
> + if (packdir == NULL) {
> + err = got_error_from_errno("fdopendir");
> goto done;
> }
>
> @@ -963,9 +989,9 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx,
> if (!is_packidx_filename(dent->d_name, dent->d_namlen))
> continue;
>
> - if (asprintf(&path_packidx, "%s/%s", path_packdir,
> - dent->d_name) == -1) {
> - err = got_error_from_errno("asprintf");
> + path_packidx = strdup(dent->d_name);
Making a deep copy of path_packidx seems like needless work.
Couldn't we directly pass dent->d_name to got_packidx_open below?
> + if (path_packidx == NULL) {
> + err = got_error_from_errno("strdup");
> goto done;
> }
>
> @@ -983,7 +1009,7 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx,
> continue; /* already searched */
> }
>
> - err = got_packidx_open(packidx, path_packidx, 0);
> + err = got_packidx_open(packidx, packdir_fd, path_packidx, 0);
> if (err) {
> free(path_packidx);
> goto done;
> @@ -1003,7 +1029,6 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx,
>
> err = got_error_no_obj(id);
> done:
> - free(path_packdir);
> if (packdir && closedir(packdir) != 0 && err == NULL)
> err = got_error_from_errno("closedir");
> return err;
> @@ -1032,13 +1057,14 @@ read_packfile_hdr(int fd, struct got_packidx *packidx)
> }
>
> static const struct got_error *
> -open_packfile(int *fd, const char *path_packfile, struct got_packidx *packidx)
> +open_packfile(int *fd, int packdir_fd,
> + const char *path_packfile, struct got_packidx *packidx)
path_packfile is now a relative path. Since the function name implies that
we're opening a packfile, just 'relpath' would work here, too.
> {
> const struct got_error *err = NULL;
>
> - *fd = open(path_packfile, O_RDONLY | O_NOFOLLOW);
> + *fd = openat(packdir_fd, path_packfile, O_RDONLY | O_NOFOLLOW);
> if (*fd == -1)
> - return got_error_from_errno2("open", path_packfile);
> + return got_error_from_errno2("openat", path_packfile);
Here we lose precision in the error message, too. Not sure how to fix that.
> if (packidx) {
> err = read_packfile_hdr(*fd, packidx);
> @@ -1059,6 +1085,7 @@ got_repo_cache_pack(struct got_pack **packp, struct got_repository *repo,
> struct got_pack *pack = NULL;
> struct stat sb;
> size_t i;
> + int packdir_fd;
>
> if (packp)
> *packp = NULL;
> @@ -1088,7 +1115,10 @@ got_repo_cache_pack(struct got_pack **packp, struct got_repository *repo,
> goto done;
> }
>
> - err = open_packfile(&pack->fd, path_packfile, packidx);
> + packdir_fd = openat(got_repo_get_path_git_dir_fd(repo),
> + GOT_OBJECTS_PACK_DIR, O_DIRECTORY);
Missing return value check! Please handle errors:
if (packdir_fd == -1) ...
> +
> + err = open_packfile(&pack->fd, packdir_fd, path_packfile, packidx);
> if (err)
> goto done;
>
> @@ -1200,22 +1230,25 @@ match_packed_object(struct got_object_id **unique_id,
> struct got_repository *repo, const char *id_str_prefix, int obj_type)
> {
> const struct got_error *err = NULL;
> - char *path_packdir;
> - DIR *packdir;
> + DIR *packdir = NULL;
> struct dirent *dent;
> char *path_packidx;
> struct got_object_id_queue matched_ids;
> + int packdir_fd;
>
> SIMPLEQ_INIT(&matched_ids);
>
> - path_packdir = got_repo_get_path_objects_pack(repo);
> - if (path_packdir == NULL)
> - return got_error_from_errno("got_repo_get_path_objects_pack");
> + packdir_fd = openat(got_repo_get_path_git_dir_fd(repo),
> + GOT_OBJECTS_PACK_DIR, O_DIRECTORY);
> + if (packdir_fd == -1) {
> + if (errno != ENOENT)
> + err = got_error_from_errno2("openat", GOT_OBJECTS_PACK_DIR);
> + goto done;
> + }
>
> - packdir = opendir(path_packdir);
> + packdir = fdopendir(packdir_fd);
> if (packdir == NULL) {
> - if (errno != ENOENT)
> - err = got_error_from_errno2("opendir", path_packdir);
> + err = got_error_from_errno("fdopendir");
> goto done;
> }
>
> @@ -1223,17 +1256,16 @@ match_packed_object(struct got_object_id **unique_id,
> struct got_packidx *packidx;
> struct got_object_qid *qid;
>
> -
> if (!is_packidx_filename(dent->d_name, dent->d_namlen))
> continue;
>
> - if (asprintf(&path_packidx, "%s/%s", path_packdir,
> - dent->d_name) == -1) {
> - err = got_error_from_errno("asprintf");
> + path_packidx = strdup(dent->d_name);
Again, can we get away with passing dent->d_name directly?
> + if (path_packidx == NULL) {
> + err = got_error_from_errno("strdup");
> break;
> }
>
> - err = got_packidx_open(&packidx, path_packidx, 0);
> + err = got_packidx_open(&packidx, packdir_fd, path_packidx, 0);
> free(path_packidx);
> if (err)
> break;
> @@ -1274,7 +1306,6 @@ match_packed_object(struct got_object_id **unique_id,
> }
> done:
> got_object_id_queue_free(&matched_ids);
> - free(path_packdir);
> if (packdir && closedir(packdir) != 0 && err == NULL)
> err = got_error_from_errno("closedir");
> if (err) {
capsicum work: add fd field to got_repository, change got_packidx
capsicum work: add fd field to got_repository, change got_packidx