"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: gotadmin init -t sha256 ?
To:
Stefan Sperling <stsp@stsp.name>
Cc:
gameoftrees@openbsd.org
Date:
Wed, 17 Jul 2024 20:07:53 +0200

Download raw body.

Thread
On 2024/07/16 22:42:58 +0200, Stefan Sperling <stsp@stsp.name> wrote:
> On Tue, Jul 16, 2024 at 02:29:29PM +0200, Omar Polo wrote:
> > Here's two more sha256-related diffs that are small enough to fit one
> > mail.
> > 
> > The first is to enable the creation of sha256 repos via `init -t sha256'.
> > Opinions about this?  We have now enough feature to work (albeit locally
> > only) on sha256 repos so we might as well allow people to create them.
> > 
> > (GOT_HASH_SHA1 is hardcoded only when creating repos via git/cvg clone
> > since the network protocol doesn't support sha256 -- we'll need to
> > implement the v2 of it.  Actually maybe I should add a warning against
> > fetch/send in sha256 repos.)
> > 
> > The second one is to show the object-format in the gotadmin info output
> > which is something I believe it's handy.
> > 
> > Ideas/better verbiage/oks/etc? :)
> 
> > +.It Fl t Ar format
> > +Select the hashing function to use.
> 
> My only concern is the mixed bag of terms all of which try to convey
> the same thing: object format type hashing function hash algorithm
> 
> Can we keep this simpler somehow?
> 
> Which terms to use is aribtrary but we should pick a subset and then
> stick to that.

That's a very good point.  I'm not sure from where I picked
'got_hash_algorithm', I gues it just made sense.  Git calls it the
'object-format' with is both more accessible (users don't need to know
what an 'hash' is) and more confusing (why is an hash a format?)

I'm not sure which is more correct between "hash function" and "hash
algorithm", maybe function?  But also hash algorithm doesn't sound bad.

> In the code we use 'algo' everywhere. I suppose we should converge
> towards similar terminology in the UI:
> 
> .It Fl A Ar hashing-algorithm
> Configure the repository's
> .Ar hashing-algorithm
> used for the computation of Git object IDs.
> Possible values are
> .Cm sha1
> .Pq the default
> or
> .Cm sha256 .

I like it much more than my text.  I liked -t because it is lowercase :P
but it doesn't matter much.  it's just a one-off command.

Heres' an updated diff, i've also used 'hashing algorithm' in gotadmin
info.  (i still intend to commit that bit in a separate commit)

diff refs/heads/main a70fa94c5dce2546419c0522ebe3f46e7d8981f6
commit - a5a5a156a91a17c59934179af8eca099a95b6c2d
commit + a70fa94c5dce2546419c0522ebe3f46e7d8981f6
blob - 0efd11eb1ea05278d857c70ca545f17de8f87588
blob + f2dbcb38ea508b565dbea5ebfbccbf5530868b1c
--- cvg/cvg.c
+++ cvg/cvg.c
@@ -1675,7 +1675,7 @@ cmd_clone(int argc, char *argv[])
 		goto done;
 
 	if (!list_refs_only) {
-		error = got_repo_init(repo_path, NULL);
+		error = got_repo_init(repo_path, NULL, GOT_HASH_SHA1);
 		if (error)
 			goto done;
 		error = got_repo_pack_fds_open(&pack_fds);
blob - 82cc6a0781f740c40e728b1018f50ffc72607f9b
blob + 8d7484c497a231b5351f541295c4fa4d6609a5e0
--- got/got.1
+++ got/got.1
@@ -75,7 +75,7 @@ The commands for
 .Nm
 are as follows:
 .Bl -tag -width checkout
-.It Cm init Oo Fl b Ar branch Oc Ar repository-path
+.It Cm init Oo Fl A Ar hashing-algorithm Oc Oo Fl b Ar branch Oc Ar repository-path
 Create a new empty repository at the specified
 .Ar repository-path .
 .Pp
@@ -108,6 +108,15 @@ The options for
 .Cm got init
 are as follows:
 .Bl -tag -width Ds
+.It Fl A Ar hashing-algorithm
+Configure the repository's
+.Ar hashing-algorithm
+used for the computation of Git object IDs.
+Possible values are
+.Cm sha1
+.Pq the default
+or
+.Cm sha256 .
 .It Fl b Ar branch
 Make the repository's HEAD reference point to the specified
 .Ar branch
blob - 3f403ec8e8aec0b1e344d07f677b0cd872cdaa28
blob + eedba272792e095dfe5a62504a47e88c579c69c8
--- got/got.c
+++ got/got.c
@@ -357,7 +357,8 @@ apply_unveil(const char *repo_path, int repo_read_only
 __dead static void
 usage_init(void)
 {
-	fprintf(stderr, "usage: %s init [-b branch] repository-path\n",
+	fprintf(stderr, "usage: %s init [-A hashing-algorithm] [-b branch]"
+	    " repository-path\n",
 	    getprogname());
 	exit(1);
 }
@@ -368,10 +369,20 @@ cmd_init(int argc, char *argv[])
 	const struct got_error *error = NULL;
 	const char *head_name = NULL;
 	char *repo_path = NULL;
+	enum got_hash_algorithm algo = GOT_HASH_SHA1;
 	int ch;
 
-	while ((ch = getopt(argc, argv, "b:")) != -1) {
+	while ((ch = getopt(argc, argv, "A:b:")) != -1) {
 		switch (ch) {
+		case 'A':
+			if (!strcmp(optarg, "sha1"))
+				algo = GOT_HASH_SHA1;
+			else if (!strcmp(optarg, "sha256"))
+				algo = GOT_HASH_SHA256;
+			else
+				return got_error_path(optarg,
+				    GOT_ERR_OBJECT_FORMAT);
+			break;
 		case 'b':
 			head_name = optarg;
 			break;
@@ -406,7 +417,7 @@ cmd_init(int argc, char *argv[])
 	if (error)
 		goto done;
 
-	error = got_repo_init(repo_path, head_name);
+	error = got_repo_init(repo_path, head_name, algo);
 done:
 	free(repo_path);
 	return error;
@@ -1765,7 +1776,7 @@ cmd_clone(int argc, char *argv[])
 		err(1, "pledge");
 #endif
 	if (!list_refs_only) {
-		error = got_repo_init(repo_path, NULL);
+		error = got_repo_init(repo_path, NULL, GOT_HASH_SHA1);
 		if (error)
 			goto done;
 		error = got_repo_pack_fds_open(&pack_fds);
blob - 00bb07ed7939d88c75f1fb6e94a7b77a86d50730
blob + d9461631d623c70a1fa3f0a27dc61e1fbc5f011a
--- gotadmin/gotadmin.1
+++ gotadmin/gotadmin.1
@@ -53,7 +53,7 @@ The commands for
 .Nm
 are as follows:
 .Bl -tag -width checkout
-.It Cm init Oo Fl b Ar branch Oc Ar repository-path
+.It Cm init Oo Fl A Ar hashing-algorithm Oc Oo Fl b Ar branch Oc Ar repository-path
 Create a new empty repository at the specified
 .Ar repository-path .
 .Pp
@@ -86,6 +86,15 @@ The options for
 .Cm gotadmin init
 are as follows:
 .Bl -tag -width Ds
+.It Fl A Ar hashing-algorithm
+Configure the repository's
+.Ar hashing-algorithm
+used for the computation of Git object IDs.
+Possible values are
+.Cm sha1
+.Pq the default
+or
+.Cm sha256 .
 .It Fl b Ar branch
 Make the repository's HEAD reference point to the specified
 .Ar branch
blob - ac13af4d643c13b2ba5b2cdc4285db4bb2b2789c
blob + 6678a9dd097384ee9c7b2545f4a1e9211a83054d
--- gotadmin/gotadmin.c
+++ gotadmin/gotadmin.c
@@ -280,7 +280,8 @@ done:
 __dead static void
 usage_init(void)
 {
-	fprintf(stderr, "usage: %s init [-b branch] repository-path\n",
+	fprintf(stderr, "usage: %s init [-A hashing-algorithm] [-b branch]"
+	    " repository-path\n",
 	    getprogname());
 	exit(1);
 }
@@ -291,6 +292,7 @@ cmd_init(int argc, char *argv[])
 	const struct got_error *error = NULL;
 	const char *head_name = NULL;
 	char *repo_path = NULL;
+	enum got_hash_algorithm algo = GOT_HASH_SHA1;
 	int ch;
 
 #ifndef PROFILE
@@ -298,8 +300,17 @@ cmd_init(int argc, char *argv[])
 		err(1, "pledge");
 #endif
 
-	while ((ch = getopt(argc, argv, "b:")) != -1) {
+	while ((ch = getopt(argc, argv, "A:b:")) != -1) {
 		switch (ch) {
+		case 'A':
+			if (!strcmp(optarg, "sha1"))
+				algo = GOT_HASH_SHA1;
+			else if (!strcmp(optarg, "sha256"))
+				algo = GOT_HASH_SHA256;
+			else
+				return got_error_path(optarg,
+				    GOT_ERR_OBJECT_FORMAT);
+			break;
 		case 'b':
 			head_name = optarg;
 			break;
@@ -330,7 +341,7 @@ cmd_init(int argc, char *argv[])
 	if (error)
 		goto done;
 
-	error = got_repo_init(repo_path, head_name);
+	error = got_repo_init(repo_path, head_name, algo);
 done:
 	free(repo_path);
 	return error;
@@ -394,6 +405,9 @@ cmd_info(int argc, char *argv[])
 		goto done;
 
 	printf("repository: %s\n", got_repo_get_path_git_dir(repo));
+	printf("hashing algorithm: %s\n",
+	    got_repo_get_object_format(repo) == GOT_HASH_SHA1 ? "sha1"
+	    : "sha256");
 
 	gotconfig = got_repo_get_gotconfig(repo);
 	if (gotconfig) {
blob - c53bf9434590b858c1ce59018954e81ff934dfc1
blob + e4dd636f145daee8d00edb26040118dabe291813
--- include/got_repository.h
+++ include/got_repository.h
@@ -144,7 +144,8 @@ const struct got_error *got_repo_map_path(char **, str
  * Create a new repository with optional specified
  * HEAD ref in an empty directory at a specified path.
  */
-const struct got_error *got_repo_init(const char *, const char *);
+const struct got_error *got_repo_init(const char *, const char *,
+    enum got_hash_algorithm);
 
 /* Attempt to find a unique object ID for a given ID string prefix. */
 const struct got_error *got_repo_match_object_id_prefix(struct got_object_id **,
blob - d39172980ea798fa07c161a3cecc055dfc76136c
blob + f6c9b881e64ee0270d643a9dc809378060318b2b
--- lib/repository.c
+++ lib/repository.c
@@ -1723,7 +1723,8 @@ got_repo_unpin_pack(struct got_repository *repo)
 }
 
 const struct got_error *
-got_repo_init(const char *repo_path, const char *head_name)
+got_repo_init(const char *repo_path, const char *head_name,
+    enum got_hash_algorithm algo)
 {
 	const struct got_error *err = NULL;
 	const char *dirnames[] = {
@@ -1734,13 +1735,23 @@ got_repo_init(const char *repo_path, const char *head_
 	const char *description_str = "Unnamed repository; "
 	    "edit this file 'description' to name the repository.";
 	const char *headref = "ref: refs/heads/";
-	const char *gitconfig_str = "[core]\n"
+	const char *gitconfig_sha1 = "[core]\n"
 	    "\trepositoryformatversion = 0\n"
 	    "\tfilemode = true\n"
 	    "\tbare = true\n";
+	const char *gitconfig_sha256 = "[core]\n"
+	    "\trepositoryformatversion = 1\n"
+	    "\tfilemode = true\n"
+	    "\tbare = true\n"
+	    "[extensions]\n"
+	    "\tobjectformat = sha256\n";
+	const char *gitconfig = gitconfig_sha1;
 	char *headref_str, *path;
 	size_t i;
 
+	if (algo == GOT_HASH_SHA256)
+		gitconfig = gitconfig_sha256;
+
 	if (!got_path_dir_is_empty(repo_path))
 		return got_error(GOT_ERR_DIR_NOT_EMPTY);
 
@@ -1776,7 +1787,7 @@ got_repo_init(const char *repo_path, const char *head_
 
 	if (asprintf(&path, "%s/%s", repo_path, "config") == -1)
 		return got_error_from_errno("asprintf");
-	err = got_path_create_file(path, gitconfig_str);
+	err = got_path_create_file(path, gitconfig);
 	free(path);
 	if (err)
 		return err;