"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: capsicum work: add fd field to got_repository, change got_packidx
To:
Yang Zhong <yzhong@freebsdfoundation.org>
Cc:
gameoftrees@openbsd.org
Date:
Mon, 14 Dec 2020 20:15:28 +0100

Download raw body.

Thread
On Mon, Dec 14, 2020 at 10:31:58AM -0800, Yang Zhong wrote:
> Here is more work on the Capsicum adaptation. Like the previous patch,
> the attached diff includes two changes: one adds an fd field to the
> got_repository struct, and the other makes use of it. Here, I changed
> got_packidx_open() and related functions to use fds.
> 
> A consequence of these changes is that the path_packidx field of the
> got_packidx struct is now stored relative to the repo's pack directory.
> The field is only used in a few places, and looking at the code it
> seems like this change doesn't present any issues. I've also tested
> these changes with 'make regress' and 'make regress GOT_TEST_PACK=1'.

Looks good overall. I have mostly cosmetic suggestions.

Comments inline:

> diff --git a/include/got_repository.h b/include/got_repository.h
> index 9e816e24..b070948c 100644
> --- a/include/got_repository.h
> +++ b/include/got_repository.h
> @@ -32,6 +32,9 @@ const char *got_repo_get_path(struct got_repository *);
>   */
>  const char *got_repo_get_path_git_dir(struct got_repository *);
>  
> +/* Obtain the file descriptor of the repository's .git directory. */
> +int got_repo_get_path_git_dir_fd(struct got_repository *);
> +

Perhaps try to find a slightly shorter name?
Given that this has nothing to do with a path anymore, I guess we can
drop _path_ from this?

I would suggest:

  int got_repo_get_gitdir_fd(struct got_repository *);

Or maybe even this, if you prefer:

  int got_repo_get_fd(struct got_repository *);

>  /* Obtain the commit author name if parsed from gitconfig, else NULL. */
>  const char *got_repo_get_gitconfig_author_name(struct got_repository *);
>  
> diff --git a/lib/got_lib_pack.h b/lib/got_lib_pack.h
> index c56e16c4..39aa4333 100644
> --- a/lib/got_lib_pack.h
> +++ b/lib/got_lib_pack.h
> @@ -168,7 +168,7 @@ struct got_packfile_obj_data {
>  
>  const struct got_error *got_packidx_init_hdr(struct got_packidx *, int);
>  const struct got_error *got_packidx_open(struct got_packidx **,
> -    const char *, int);
> +    int, const char *, int);
>  const struct got_error *got_packidx_close(struct got_packidx *);
>  int got_packidx_get_object_idx(struct got_packidx *, struct got_object_id *);
>  const struct got_error *got_packidx_match_id_str_prefix(
> diff --git a/lib/got_lib_repository.h b/lib/got_lib_repository.h
> index d2a6b78c..a60ae530 100644
> --- a/lib/got_lib_repository.h
> +++ b/lib/got_lib_repository.h
> @@ -34,6 +34,7 @@
>  struct got_repository {
>  	char *path;
>  	char *path_git_dir;
> +	int path_git_dir_fd;

This could be called;

	int gitdir_fd;

I think this would align well with the name 'packdir_fd' which you have
chosen to use elsewhere.

Or we could align it with struct got_worktree, like this:

	int root_fd;

Alternatively, I'd also be fine with just:

	int fd;

If we go for this version, we could later on also rename 'root_fd' to
just 'fd' in struct got_worktree.

>  	/* The pack index cache speeds up search for packed objects. */
>  	struct got_packidx *packidx_cache[GOT_PACKIDX_CACHE_SIZE];
> diff --git a/lib/pack.c b/lib/pack.c
> index 71b5e073..f2356c7f 100644
> --- a/lib/pack.c
> +++ b/lib/pack.c
> @@ -328,7 +328,8 @@ done:
>  }
>  
>  const struct got_error *
> -got_packidx_open(struct got_packidx **packidx, const char *path, int verify)
> +got_packidx_open(struct got_packidx **packidx,
> +    int packdir_fd, const char *path, int verify)
>  {

Please also rename the 'path' parameter to 'relpath' to clearly convey
the intention that this path is a relative one.

>  	const struct got_error *err = NULL;
>  	struct got_packidx *p;
> @@ -340,9 +341,9 @@ got_packidx_open(struct got_packidx **packidx, const char *path, int verify)
>  	if (p == NULL)
>  		return got_error_from_errno("calloc");
>  
> -	p->fd = open(path, O_RDONLY | O_NOFOLLOW);
> +	p->fd = openat(packdir_fd, path, O_RDONLY | O_NOFOLLOW);
>  	if (p->fd == -1) {
> -		err = got_error_from_errno2("open", path);
> +		err = got_error_from_errno2("openat", path);
>  		free(p);
>  		return err;
>  	}
> diff --git a/lib/repository.c b/lib/repository.c
> index 4c8c8ac6..df606169 100644
> --- a/lib/repository.c
> +++ b/lib/repository.c
> @@ -77,6 +77,12 @@ got_repo_get_path_git_dir(struct got_repository *repo)
>  	return repo->path_git_dir;
>  }
>  
> +int
> +got_repo_get_path_git_dir_fd(struct got_repository *repo)
> +{
> +	return repo->path_git_dir_fd;
> +}
> +
>  const char *
>  got_repo_get_gitconfig_author_name(struct got_repository *repo)
>  {
> @@ -333,6 +339,8 @@ open_repo(struct got_repository *repo, const char *path)
>  {
>  	const struct got_error *err = NULL;
>  
> +	repo->path_git_dir_fd = -1;
> +
>  	/* bare git repository? */
>  	repo->path_git_dir = strdup(path);
>  	if (repo->path_git_dir == NULL)
> @@ -343,6 +351,11 @@ open_repo(struct got_repository *repo, const char *path)
>  			err = got_error_from_errno("strdup");
>  			goto done;
>  		}
> +		repo->path_git_dir_fd = open(repo->path_git_dir, O_DIRECTORY);
> +		if (repo->path_git_dir_fd == -1) {
> +			err = got_error_from_errno("open");
> +			goto done;
> +		}
>  		return NULL;
>  	}
>  
> @@ -359,6 +372,11 @@ open_repo(struct got_repository *repo, const char *path)
>  			err = got_error_from_errno("strdup");
>  			goto done;
>  		}
> +		repo->path_git_dir_fd = open(repo->path_git_dir, O_DIRECTORY);
> +		if (repo->path_git_dir_fd == -1) {
> +			err = got_error_from_errno("open");
> +			goto done;
> +		}
>  		return NULL;
>  	}
>  
> @@ -369,6 +387,10 @@ done:
>  		repo->path = NULL;
>  		free(repo->path_git_dir);
>  		repo->path_git_dir = NULL;
> +		if (repo->path_git_dir_fd != -1)
> +			close(repo->path_git_dir_fd);
> +		repo->path_git_dir_fd = -1;
> +
>  	}
>  	return err;
>  }
> @@ -916,11 +938,11 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx,
>      struct got_repository *repo, struct got_object_id *id)
>  {
>  	const struct got_error *err;
> -	char *path_packdir;
> -	DIR *packdir;
> +	DIR *packdir = NULL;
>  	struct dirent *dent;
>  	char *path_packidx;
>  	size_t i;
> +	int packdir_fd;
>  
>  	/* Search pack index cache. */
>  	for (i = 0; i < nitems(repo->packidx_cache); i++) {
> @@ -944,16 +966,20 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx,
>  	}
>  	/* No luck. Search the filesystem. */
>  
> -	path_packdir = got_repo_get_path_objects_pack(repo);
> -	if (path_packdir == NULL)
> -		return got_error_from_errno("got_repo_get_path_objects_pack");
> -
> -	packdir = opendir(path_packdir);
> -	if (packdir == NULL) {
> +	packdir_fd = openat(got_repo_get_path_git_dir_fd(repo),
> +	    GOT_OBJECTS_PACK_DIR, O_DIRECTORY);
> +	if (packdir_fd == -1) {
>  		if (errno == ENOENT)
>  			err = got_error_no_obj(id);
>  		else
> -			err = got_error_from_errno2("opendir", path_packdir);
> +			err = got_error_from_errno2("openat",
> +			    GOT_OBJECTS_PACK_DIR);

It's slightly sad that we're losing precision in the error message here,
because the path reported in the error message is no longer absolute.
Couldn't we keep path_packdir around for this purpose, even if it's just
used to generate an error message?

> +		goto done;
> +	}
> +
> +	packdir = fdopendir(packdir_fd);
> +	if (packdir == NULL) {
> +		err = got_error_from_errno("fdopendir");
>  		goto done;
>  	}
>  
> @@ -963,9 +989,9 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx,
>  		if (!is_packidx_filename(dent->d_name, dent->d_namlen))
>  			continue;
>  
> -		if (asprintf(&path_packidx, "%s/%s", path_packdir,
> -		    dent->d_name) == -1) {
> -			err = got_error_from_errno("asprintf");
> +		path_packidx = strdup(dent->d_name);

Making a deep copy of path_packidx seems like needless work.
Couldn't we directly pass dent->d_name to got_packidx_open below?

> +		if (path_packidx == NULL) {
> +			err = got_error_from_errno("strdup");
>  			goto done;
>  		}
>  
> @@ -983,7 +1009,7 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx,
>  			continue; /* already searched */
>  		}
>  
> -		err = got_packidx_open(packidx, path_packidx, 0);
> +		err = got_packidx_open(packidx, packdir_fd, path_packidx, 0);
>  		if (err) {
>  			free(path_packidx);
>  			goto done;
> @@ -1003,7 +1029,6 @@ got_repo_search_packidx(struct got_packidx **packidx, int *idx,
>  
>  	err = got_error_no_obj(id);
>  done:
> -	free(path_packdir);
>  	if (packdir && closedir(packdir) != 0 && err == NULL)
>  		err = got_error_from_errno("closedir");
>  	return err;
> @@ -1032,13 +1057,14 @@ read_packfile_hdr(int fd, struct got_packidx *packidx)
>  }
>  
>  static const struct got_error *
> -open_packfile(int *fd, const char *path_packfile, struct got_packidx *packidx)
> +open_packfile(int *fd, int packdir_fd,
> +    const char *path_packfile, struct got_packidx *packidx)

path_packfile is now a relative path. Since the function name implies that
we're opening a packfile, just 'relpath' would work here, too.

>  {
>  	const struct got_error *err = NULL;
>  
> -	*fd = open(path_packfile, O_RDONLY | O_NOFOLLOW);
> +	*fd = openat(packdir_fd, path_packfile, O_RDONLY | O_NOFOLLOW);
>  	if (*fd == -1)
> -		return got_error_from_errno2("open", path_packfile);
> +		return got_error_from_errno2("openat", path_packfile);

Here we lose precision in the error message, too. Not sure how to fix that.
  
>  	if (packidx) {
>  		err = read_packfile_hdr(*fd, packidx);
> @@ -1059,6 +1085,7 @@ got_repo_cache_pack(struct got_pack **packp, struct got_repository *repo,
>  	struct got_pack *pack = NULL;
>  	struct stat sb;
>  	size_t i;
> +	int packdir_fd;
>  
>  	if (packp)
>  		*packp = NULL;
> @@ -1088,7 +1115,10 @@ got_repo_cache_pack(struct got_pack **packp, struct got_repository *repo,
>  		goto done;
>  	}
>  
> -	err = open_packfile(&pack->fd, path_packfile, packidx);
> +	packdir_fd = openat(got_repo_get_path_git_dir_fd(repo),
> +	    GOT_OBJECTS_PACK_DIR, O_DIRECTORY);

Missing return value check! Please handle errors:

	if (packdir_fd == -1) ...

> +
> +	err = open_packfile(&pack->fd, packdir_fd, path_packfile, packidx);
>  	if (err)
>  		goto done;
>  
> @@ -1200,22 +1230,25 @@ match_packed_object(struct got_object_id **unique_id,
>      struct got_repository *repo, const char *id_str_prefix, int obj_type)
>  {
>  	const struct got_error *err = NULL;
> -	char *path_packdir;
> -	DIR *packdir;
> +	DIR *packdir = NULL;
>  	struct dirent *dent;
>  	char *path_packidx;
>  	struct got_object_id_queue matched_ids;
> +	int packdir_fd;
>  
>  	SIMPLEQ_INIT(&matched_ids);
>  
> -	path_packdir = got_repo_get_path_objects_pack(repo);
> -	if (path_packdir == NULL)
> -		return got_error_from_errno("got_repo_get_path_objects_pack");
> +	packdir_fd = openat(got_repo_get_path_git_dir_fd(repo),
> +	    GOT_OBJECTS_PACK_DIR, O_DIRECTORY);
> +	if (packdir_fd == -1) {
> +		if (errno != ENOENT)
> +			err = got_error_from_errno2("openat", GOT_OBJECTS_PACK_DIR);
> +		goto done;
> +	}
>  
> -	packdir = opendir(path_packdir);
> +	packdir = fdopendir(packdir_fd);
>  	if (packdir == NULL) {
> -		if (errno != ENOENT)
> -			err = got_error_from_errno2("opendir", path_packdir);
> +		err = got_error_from_errno("fdopendir");
>  		goto done;
>  	}
>  
> @@ -1223,17 +1256,16 @@ match_packed_object(struct got_object_id **unique_id,
>  		struct got_packidx *packidx;
>  		struct got_object_qid *qid;
>  
> -
>  		if (!is_packidx_filename(dent->d_name, dent->d_namlen))
>  			continue;
>  
> -		if (asprintf(&path_packidx, "%s/%s", path_packdir,
> -		    dent->d_name) == -1) {
> -			err = got_error_from_errno("asprintf");
> +		path_packidx = strdup(dent->d_name);

Again, can we get away with passing dent->d_name directly?

> +		if (path_packidx == NULL) {
> +			err = got_error_from_errno("strdup");
>  			break;
>  		}
>  
> -		err = got_packidx_open(&packidx, path_packidx, 0);
> +		err = got_packidx_open(&packidx, packdir_fd, path_packidx, 0);
>  		free(path_packidx);
>  		if (err)
>  			break;
> @@ -1274,7 +1306,6 @@ match_packed_object(struct got_object_id **unique_id,
>  	}
>  done:
>  	got_object_id_queue_free(&matched_ids);
> -	free(path_packdir);
>  	if (packdir && closedir(packdir) != 0 && err == NULL)
>  		err = got_error_from_errno("closedir");
>  	if (err) {