"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Benjamin Stürz <benni+got@stuerz.xyz>
Subject:
Re: got-archive(1) (now with a patch)
To:
Stefan Sperling <stsp@stsp.name>
Cc:
gameoftrees@openbsd.org
Date:
Thu, 28 Dec 2023 18:17:05 +0100

Download raw body.

Thread
On 12/28/23 12:51, Benjamin Stürz wrote:
> I'll take a look into the code of got, and see if I can do something,
> if no one is already working on it.
Here's a patch implementing a WIP archive command.
There are still a few things to do,
like adding options and a section in the man page.
But I think it's ready for testing.
Most of the code is copied from the checkout command,
the rest is either written by me or stolen from a man page.

Example usage:
$ got archive /var/git/test.git test-1.0.tgz v1.0

> diff 76a9a38e0c199dfd2ee26f23792b56e2b77952c2 HEAD
> commit - 76a9a38e0c199dfd2ee26f23792b56e2b77952c2
> commit + 6d7e4afd293aecad7a28644c2f34dbdaf5c2ba41
> blob - 488dc79c339ccc7d9f24681ea94bbf19ab7b4309
> blob + 1870a2ae353cd51815b1e845e676c2789c82f223
> --- got/got.c
> +++ got/got.c
> @@ -28,6 +28,7 @@
>  #include <limits.h>
>  #include <locale.h>
>  #include <ctype.h>
> +#include <fts.h>
>  #include <sha1.h>
>  #include <sha2.h>
>  #include <signal.h>
> @@ -41,6 +42,7 @@
>  #include <regex.h>
>  #include <getopt.h>
>  #include <util.h>
> +#include <zlib.h>
>  
>  #include "got_version.h"
>  #include "got_error.h"
> @@ -121,6 +123,7 @@ __dead static void	usage_stage(void);
>  __dead static void	usage_unstage(void);
>  __dead static void	usage_cat(void);
>  __dead static void	usage_info(void);
> +__dead static void	usage_archive(void);
>  
>  static const struct got_error*		cmd_import(int, char *[]);
>  static const struct got_error*		cmd_clone(int, char *[]);
> @@ -151,6 +154,7 @@ static const struct got_error*		cmd_stage(int, char *[
>  static const struct got_error*		cmd_unstage(int, char *[]);
>  static const struct got_error*		cmd_cat(int, char *[]);
>  static const struct got_error*		cmd_info(int, char *[]);
> +static const struct got_error*		cmd_archive(int, char *[]);
>  
>  static const struct got_cmd got_commands[] = {
>  	{ "import",	cmd_import,	usage_import,	"im" },
> @@ -182,6 +186,7 @@ static const struct got_cmd got_commands[] = {
>  	{ "unstage",	cmd_unstage,	usage_unstage,	"ug" },
>  	{ "cat",	cmd_cat,	usage_cat,	"" },
>  	{ "info",	cmd_info,	usage_info,	"" },
> +	{ "archive",	cmd_archive,	usage_archive,	"ar" },
>  };
>  
>  static void
> @@ -14490,3 +14495,417 @@ done:
>  	free(uuidstr);
>  	return error;
>  }
> +
> +__dead static void
> +usage_archive(void)
> +{
> +	fprintf(stderr, "usage: %s archive repository-path output-file ref\n",
> +	    getprogname());
> +	exit(1);
> +}
> +
> +// TODO: I'm not sure, if this is the right place for the helper functions.
> +
> +// Get the "stem" of a filename.
> +// Eg. filestem("/tmp/test.tgz") -> "test"
> +static char *
> +filestem(const char *path)
> +{
> +	char *copy, *name, *dot;
> +	copy = strdup(path);
> +	name = basename(copy);
> +	dot = strchr(name, '.');
> +	if (dot != NULL)
> +		*dot = '\0';
> +	memmove(copy, name, strlen(name) + 1);
> +	return copy;
> +}
> +
> +static const struct got_error *
> +rm_rf(char *path)
> +{
> +	FTSENT *ent;
> +	FTS *fts;
> +	char *argv[2] = { path, NULL };
> +
> +	fts = fts_open(argv, FTS_PHYSICAL | FTS_XDEV | FTS_NOSTAT
> +	    | FTS_NOCHDIR, NULL);
> +	if (fts == NULL)
> +		return got_error_from_errno2("fts_open", path);
> +
> +	while ((ent = fts_read(fts)) != NULL) {
> +		if ((ent->fts_info & (FTS_D | FTS_F)) == FTS_D)
> +			continue;
> +		if (remove(ent->fts_path) != 0) {
> +			warn("remove('%s')", ent->fts_path);
> +		}
> +	}
> +
> +	fts_close(fts);
> +	return NULL;
> +}
> +
> +static const struct got_error *
> +cmd_archive(int argc, char *argv[])
> +{
> +	// TODO:
> +	// - go through all this code and check if it actually is safe.
> +	// - more testing
> +	// - flags: -v, -q, -n name
> +
> +	// See tar(5)
> +	struct header_posix_ustar {
> +		char name[100];
> +		char mode[8];
> +		char uid[8];
> +		char gid[8];
> +		char size[12];
> +		char mtime[12];
> +		char checksum[8];
> +		char typeflag[1];
> +		char linkname[100];
> +		char magic[6];
> +		char version[2];
> +		char uname[32];
> +		char gname[32];
> +		char devmajor[8];
> +		char devminor[8];
> +		char prefix[155];
> +		char pad[12];
> +	};
> +	const struct got_error *error = NULL, *close_err;
> +	struct got_repository *repo = NULL;
> +	struct got_reference *head_ref = NULL, *ref = NULL;
> +	struct got_worktree *worktree = NULL;
> +	struct got_reflist_head refs;
> +	struct got_object_id *commit_id = NULL;
> +	struct got_pathlist_head paths;
> +	struct got_checkout_progress_arg cpa;
> +	const char *branch_name = GOT_REF_HEAD;
> +	char template[] = "/tmp/got.XXXXXXXXXX";
> +	char buffer[1024];
> +	char *name, *repo_path = NULL, *output_path = NULL;
> +	char *commit_id_str = NULL, *worktree_ppath = NULL,
> +	char *keyword_idstr *worktree_path = NULL;
> +	gzFile output_file = NULL;
> +	size_t len_worktree_ppath, len_name;
> +	int *pack_fds = NULL;
> +	int ch;
> +	char *fts_argv[2];
> +	FTS *fts = NULL;
> +	FTSENT *ent;
> +
> +	TAILQ_INIT(&paths);
> +
> +#ifndef PROFILE
> +	if (pledge("stdio rpath wpath cpath fattr flock proc exec sendfd"
> +	    " unveil", NULL) == -1)
> +		err(1, "pledge");
> +#endif
> +
> +	while ((ch = getopt(argc, argv, "n:")) != -1) {
> +		switch (ch) {
> +		default:
> +			usage_archive();
> +			/* NOTREACHED */
> +		}
> +	}
> +
> +	argc -= optind;
> +	argv += optind;
> +
> +	if (argc != 3)
> +		usage_archive();
> +
> +	repo_path = realpath(argv[0], NULL);
> +	if (repo_path == NULL)
> +		return got_error_from_errno2("realpath", argv[0]);
> +	got_path_strip_trailing_slashes(repo_path);
> +
> +	output_path = argv[1];
> +	name = filestem(output_path);
> +	if (name == NULL) {
> +		error = got_error_from_errno("filename");
> +		goto done;
> +	}
> +
> +	output_file = gzopen(output_path, "w");
> +	if (output_file == NULL) {
> +		error = got_error_from_errno2("gzopen", output_path);
> +		goto done;
> +	}
> +
> +	commit_id_str = strdup(argv[2]);
> +	if (commit_id_str == NULL) {
> +		error = got_error_from_errno("strdup");
> +		goto done;
> +	}
> +
> +	worktree_ppath = mkdtemp(template);
> +	if (worktree_ppath == NULL) {
> +		error = got_error_from_errno("mkdtemp");
> +		goto done;
> +	}
> +
> +	asprintf(&worktree_path, "%s/%s", worktree_ppath, name);
> +
> +	if (got_path_is_child(worktree_path, repo_path, strlen(repo_path)) ||
> +	    got_path_is_child(repo_path, worktree_path,
> +	    strlen(worktree_path))) {
> +		error = got_error_fmt(GOT_ERR_BAD_PATH,
> +		    "work tree and repository paths may not overlap: %s",
> +		    worktree_path);
> +		goto done;
> +	}
> +
> +	error = got_repo_pack_fds_open(&pack_fds);
> +	if (error != NULL)
> +		goto done;
> +
> +	error = got_repo_open(&repo, repo_path, NULL, pack_fds);
> +	if (error != NULL)
> +		goto done;
> +
> +	/* Pre-create work tree path for unveil(2) */
> +	error = got_path_mkdir(worktree_path);
> +	if (error) {
> +		if (!(error->code == GOT_ERR_ERRNO && errno == EISDIR) &&
> +		    !(error->code == GOT_ERR_ERRNO && errno == EEXIST))
> +			goto done;
> +		if (!got_path_dir_is_empty(worktree_path)) {
> +			error = got_error_path(worktree_path,
> +			    GOT_ERR_DIR_NOT_EMPTY);
> +			goto done;
> +		}
> +	}
> +
> +	error = apply_unveil(got_repo_get_path(repo), 0, worktree_path);
> +	if (error)
> +		goto done;
> +
> +	error = got_ref_open(&head_ref, repo, branch_name, 0);
> +	if (error != NULL)
> +		goto done;
> +
> +	error = got_worktree_init(worktree_path, head_ref, "",
> +	    GOT_WORKTREE_GOT_DIR, repo);
> +	if (error != NULL && !(error->code == GOT_ERR_ERRNO && errno == EEXIST))
> +		goto done;
> +
> +	error = got_worktree_open(&worktree, worktree_path,
> +	    GOT_WORKTREE_GOT_DIR);
> +	if (error != NULL)
> +		goto done;
> +
> +	TAILQ_INIT(&refs);
> +	error = got_ref_list(&refs, repo, NULL, got_ref_cmp_by_name, NULL);
> +	if (error != NULL)
> +		goto done;
> +
> +	error = got_keyword_to_idstr(&keyword_idstr, commit_id_str, repo,
> +	    worktree);
> +	if (error != NULL)
> +		goto done;
> +
> +	if (keyword_idstr != NULL) {
> +		free(commit_id_str);
> +		commit_id_str = keyword_idstr;
> +	}
> +
> +	error = got_repo_match_object_id(&commit_id, NULL, commit_id_str,
> +	    GOT_OBJ_TYPE_COMMIT, &refs, repo);
> +	got_ref_list_free(&refs);
> +	if (error != NULL)
> +		goto done;
> +
> +	error = check_linear_ancestry(commit_id,
> +	    got_worktree_get_base_commit_id(worktree), 0, repo);
> +	if (error != NULL) {
> +		if (error-> code == GOT_ERR_ANCESTRY) {
> +			error = checkout_ancestry_error(
> +			    head_ref, commit_id_str);
> +		}
> +		goto done;
> +	}
> +
> +	error = check_same_branch(commit_id, head_ref, repo);
> +	if (error != NULL) {
> +		if (error-> code == GOT_ERR_ANCESTRY) {
> +			error = checkout_ancestry_error(
> +			    head_ref, commit_id_str);
> +		}
> +		goto done;
> +	}
> +
> +	error = got_worktree_set_base_commit_id(worktree, repo, commit_id);
> +	if (error != NULL)
> +		goto done;
> +
> +	/* Expand potentially abbreviated commit ID string. */
> +	free(commit_id_str);
> +	error = got_object_id_str(&commit_id_str, commit_id);
> +	if (error)
> +		goto done;
> +
> +	error = got_pathlist_append(&paths, "", NULL);
> +	if (error)
> +		goto done;
> +	cpa.worktree_path = worktree_path;
> +	cpa.had_base_commit_ref_error = 0;
> +	cpa.verbosity = -1;
> +	error = got_worktree_checkout_files(worktree, &paths, repo,
> +	    checkout_progress, &cpa, check_cancelled, NULL);
> +	if (error != NULL)
> +		goto done;
> +
> +	if (got_ref_is_symbolic(head_ref)) {
> +		error = got_ref_resolve_symbolic(&ref, repo, head_ref);
> +		if (error)
> +			goto done;
> +	}
> +	if (cpa.had_base_commit_ref_error)
> +		show_worktree_base_ref_warning();
> +
> +	fts_argv[0] = worktree_path;
> +	fts_argv[1] = NULL;
> +	fts = fts_open(fts_argv, FTS_PHYSICAL | FTS_NOCHDIR | FTS_NOSTAT, NULL);
> +	if (fts == NULL) {
> +		error = got_error_from_errno2("fts_open", worktree_path);
> +		goto done;
> +	}
> +
> +	len_worktree_ppath = strlen(worktree_ppath);
> +	len_name = strlen(name);
> +
> +	// Skip first entry (the worktree itself).
> +	ent = fts_read(fts);
> +	if (ent == NULL) {
> +		// TODO: How do I return a custom error?
> +		abort();
> +	}
> +	while ((ent = fts_read(fts)) != NULL) {
> +		const char *path, *tarpath, *relpath;
> +		struct header_posix_ustar hdr;
> +		unsigned long cksum;
> +		size_t n, total = 0;
> +		struct stat st;
> +		off_t sz;
> +		FILE *f;
> +		int reg;
> +
> +		if ((ent->fts_info & FTS_DP) == FTS_DP)
> +			continue;
> +
> +		path = ent->fts_path;
> +		tarpath = path + len_worktree_ppath + 1;
> +		relpath = tarpath + len_name + 1;
> +		if (strcmp(relpath, ".got") == 0
> +		    || strncmp(relpath, ".got/", 5) == 0)
> +			continue;
> +
> +		f = fopen(ent->fts_path, "r");
> +		if (f == NULL) {
> +			error = got_error_from_errno2("open", path);
> +			goto done;
> +		}
> +
> +		if (fstat(fileno(f), &st) != 0) {
> +			error = got_error_from_errno2("fstat", path);
> +			fclose(f);
> +			goto done;
> +		}
> +
> +		memset(&hdr, 0, sizeof (hdr));
> +		strlcpy(hdr.name, tarpath, sizeof (hdr.name));
> +
> +		reg = S_ISREG(st.st_mode);
> +
> +		if (reg) {
> +			hdr.typeflag[0] = '0';
> +		} else if (S_ISLNK(st.st_mode)) {
> +			hdr.typeflag[0] = '2';
> +		} else if (S_ISCHR(st.st_mode)) {
> +			hdr.typeflag[0] = '3';
> +		} else if (S_ISBLK(st.st_mode)) {
> +			hdr.typeflag[0] = '4';
> +		} else if (S_ISDIR(st.st_mode)) {
> +			hdr.typeflag[0] = '5';
> +		} else if (S_ISFIFO(st.st_mode)) {
> +			hdr.typeflag[0] = '6';
> +		} else {
> +			// TODO: Socket?
> +			fprintf(stderr, "got: cannot archive %s\n", path);
> +			fclose(f);
> +			continue;
> +		}
> +
> +		snprintf(hdr.mode, sizeof(hdr.mode), "%o", st.st_mode);
> +		snprintf(hdr.uid, sizeof(hdr.uid), "%o", st.st_uid);
> +		snprintf(hdr.gid, sizeof(hdr.gid), "%o", st.st_gid);
> +		sz = reg ? st.st_size : 0;
> +		snprintf(hdr.size, sizeof(hdr.size), "%llo", sz);
> +		snprintf(hdr.mtime, sizeof(hdr.size), "%llo", st.st_mtime);
> +		memcpy(hdr.magic, "ustar", 6);
> +		memcpy(hdr.version, "00", 2);
> +		memset(hdr.checksum, ' ', sizeof(hdr.checksum));
> +
> +		cksum = 0;
> +		for (size_t i = 0; i < sizeof(hdr); ++i)
> +			cksum += ((const unsigned char *)&hdr)[i];
> +
> +		snprintf(hdr.checksum, sizeof(hdr.checksum) - 1, "%lo", cksum);
> +
> +		gzwrite(output_file, &hdr, sizeof(hdr));
> +
> +		if (reg) {
> +			while ((n = fread(buffer, 1, sizeof(buffer), f)) != 0) {
> +				total += gzwrite(output_file, buffer, n);
> +			}
> +			n = 512 - total % 512;
> +			if (n > 0) {
> +				memset(buffer, 0, n);
> +				gzwrite(output_file, buffer, n);
> +			}
> +		}
> +
> +		fclose(f);
> +	}
> +
> +	memset(buffer, 0, 1024);
> +	gzwrite(output_file, buffer, 1024);
> +
> +done:
> +	if (pack_fds) {
> +		const struct got_error *pack_err =
> +		    got_repo_pack_fds_close(pack_fds);
> +		if (error == NULL)
> +			error = pack_err;
> +	}
> +	if (head_ref)
> +		got_ref_close(head_ref);
> +	if (ref)
> +		got_ref_close(ref);
> +	if (repo) {
> +		close_err = got_repo_close(repo);
> +		if (error == NULL)
> +			error = close_err;
> +	}
> +	if (worktree != NULL) {
> +		close_err = got_worktree_close(worktree);
> +		if (error == NULL)
> +			error = close_err;
> +	}
> +	if (worktree_ppath != NULL) {
> +		close_err = rm_rf(worktree_ppath);
> +		if (error == NULL)
> +			error = close_err;
> +	}
> +	got_pathlist_free(&paths, GOT_PATHLIST_FREE_NONE);
> +	free(name);
> +	free(repo_path);
> +	free(worktree_path);
> +	free(commit_id_str);
> +	gzclose(output_file);
> +	fts_close(fts);
> +	return error;
> +}