From: Stefan Sperling <stsp@stsp.name>
Subject: Re: introduce got_object_id_serialize
To: Omar Polo <op@omarpolo.com>
Cc: gameoftrees@openbsd.org
Date: Fri, 3 Feb 2023 21:11:37 +0100

On Fri, Feb 03, 2023 at 01:42:07PM +0100, Omar Polo wrote:
> On 2023/02/03 12:44:04 +0100, Stefan Sperling <stsp@stsp.name> wrote:
> > On Fri, Feb 03, 2023 at 11:52:20AM +0100, Omar Polo wrote:
> > > add a function that's analogous to got_object_id_str but writes to the
> > > given buffer.  at the moment is just a wrapper around got_sha1_digest_to_str
> > > but will gain some more logic in the future.
> > > 
> > > i've avoided all the calls to got_sha1_digest_to_str that were in the
> > > pack code, the network code and gotd.  There are a few calls to build
> > > a GOT_ERR_OBJ_CSUM error that i'm going to deal with in a follow-up
> > > diff.  The two calls in the patch code were kept too, will revisit
> > > them when we'll start to have some real initial sha256 support.
> > > 
> > > diffstat -s /home/op/w/got
> > >  M  include/got_object.h  |  7+  0-
> > 
> > Does the function need to be declared in exactly this headers?
> > 
> > Would lib/got_lib_object.h work, too? This would reduce visibility.
> > Code outside lib libexec (and now, gotd) isn't really supposed
> > to be using headers from lib/. For new functions it would be better
> > to start out with limited visibility if possible.
> 
> sure; i went with got_object.h only because it's where
> got_object_id_str was defined.
> 
> > >  M  lib/error.c           |  1+  1-
> > >  M  lib/object.c          |  1+  1-
> > >  M  lib/object_parse.c    |  7+  1-
> > > 
> > > 4 files changed, 16 insertions(+), 3 deletions(-)
> > > 
> > > diff -s /home/op/w/got
> > > commit - 09cbb981df84d9c12f0fa371cb6b85d3f1b615c0
> > > path + /home/op/w/got (staged changes)
> > > blob - 9d11c8f4f518f5f911295671d3aafcb494a7ec10
> > > blob + 3dea2df3656c85f425a01b9e66471da1fc10fdaf
> > > --- include/got_object.h
> > > +++ include/got_object.h
> > > @@ -82,6 +82,13 @@ const struct got_error *got_object_id_str(char **, str
> > >  const struct got_error *got_object_id_str(char **, struct got_object_id *);
> > >  
> > >  /*
> > > + * Write the string representation of an object ID in the given buffer.
> > > + * The output depends on the hash function used by the repository format
> > > + * (currently SHA1).
> > > + */
> > > +char *got_object_id_serialize(struct got_object_id *, char *, size_t);
> > 
> > "serialize" sounds as if it was creating a bytes string representation.
> > I would suggest using a different term.
> > For example: got_object_id_to_hex(struct got_object_id *, char *, size_t);
> 
> i didn't like serialize neither; thanks for the suggestion!
> 
> > There should be a constant which callers could use to create a
> > buffer of an appropriate size. For now, we could define it like:
> > 
> > #define GOT_OBJECT_ID_HEX_MAXLEN SHA1_DIGEST_LENGTH
> 
> eheh, this was my next step but why don't do it now?  I've also
> tweaked the callers of got_object_blob_id_str.
> 
> > and adjust this constant later as needed. The docs could say:
> > 
> >  * Write the string representation of an object ID in the given buffer.
> >  * This buffer must be at least GOT_OBJECT_ID_HEX_MAXLEN bytes in size.
> >  * The output depends on the hash function used by the repository format.
> > 
> >  And I would drop (currently SHA1). We don't need to document this
> >  because callers shouldn't have to care.
> 
> agreed!

Thanks, I like this much better. ok

> diff -s /home/op/w/got
> commit - 09cbb981df84d9c12f0fa371cb6b85d3f1b615c0
> path + /home/op/w/got (staged changes)
> blob - 9d11c8f4f518f5f911295671d3aafcb494a7ec10
> blob + 6a161bc20df5aaa7789395593ab2ffc6fbe4b1fb
> --- include/got_object.h
> +++ include/got_object.h
> @@ -14,6 +14,8 @@ struct got_object_id {
>   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>   */
>  
> +#define GOT_OBJECT_ID_HEX_MAXLEN SHA1_DIGEST_STRING_LENGTH
> +
>  struct got_object_id {
>  	u_int8_t sha1[SHA1_DIGEST_LENGTH];
>  };
> blob - 3be41277d7a85a185d21bb7faa43bebe3b8eae07
> blob + 26ad0c1921d76c50b48885cce2fa226b137793d7
> --- lib/diff.c
> +++ lib/diff.c
> @@ -136,8 +136,8 @@ diff_blobs(struct got_diff_line **lines, size_t *nline
>      enum got_diff_algorithm diff_algo)
>  {
>  	const struct got_error *err = NULL, *free_err;
> -	char hex1[SHA1_DIGEST_STRING_LENGTH];
> -	char hex2[SHA1_DIGEST_STRING_LENGTH];
> +	char hex1[GOT_OBJECT_ID_HEX_MAXLEN];
> +	char hex2[GOT_OBJECT_ID_HEX_MAXLEN];
>  	const char *idstr1 = NULL, *idstr2 = NULL;
>  	char *modestr1 = NULL, *modestr2 = NULL;
>  	off_t size1, size2;
> @@ -344,7 +344,7 @@ diff_blob_file(struct got_diffreg_result **resultp,
>      int force_text_diff, struct got_diffstat_cb_arg *diffstat, FILE *outfile)
>  {
>  	const struct got_error *err = NULL, *free_err;
> -	char hex1[SHA1_DIGEST_STRING_LENGTH];
> +	char hex1[GOT_OBJECT_ID_HEX_MAXLEN];
>  	const char *idstr1 = NULL;
>  	struct got_diffreg_result *result = NULL;
>  
> blob - 3c84ba7eeb9c88175a7652495f934263b6fd2328
> blob + 95d7ae9cb011916744442c02343dafa9e4901bb1
> --- lib/error.c
> +++ lib/error.c
> @@ -32,7 +32,7 @@
>  #include "got_lib_delta.h"
>  #include "got_lib_inflate.h"
>  #include "got_lib_object.h"
> -#include "got_lib_sha1.h"
> +#include "got_lib_object_parse.h"
>  
>  #ifndef nitems
>  #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
> @@ -367,11 +367,11 @@ got_error_no_obj(struct got_object_id *id)
>  const struct got_error *
>  got_error_no_obj(struct got_object_id *id)
>  {
> -	char msg[sizeof("object   not found") + SHA1_DIGEST_STRING_LENGTH];
> -	char id_str[SHA1_DIGEST_STRING_LENGTH];
> +	char id_str[GOT_OBJECT_ID_HEX_MAXLEN];
> +	char msg[sizeof("object   not found") + sizeof(id_str)];
>  	int ret;
>  
> -	if (!got_sha1_digest_to_str(id->sha1, id_str, sizeof(id_str)))
> +	if (!got_object_id_hex(id, id_str, sizeof(id_str)))
>  		return got_error(GOT_ERR_NO_OBJ);
>  
>  	ret = snprintf(msg, sizeof(msg), "object %s not found", id_str);
> blob - 37def8a54e88bcdfbc49e4cc2f7a41fd8175f3b5
> blob + 4b96e21f407d84ac399adbccb5d7e21cf86dad1f
> --- lib/got_lib_object_parse.h
> +++ lib/got_lib_object_parse.h
> @@ -16,6 +16,13 @@ const struct got_error *got_object_qid_alloc_partial(s
>  
>  struct got_pathlist_head;
>  
> +/*
> + * Write the string representation fo an object ID in the given buffer.
> + * This buffer must be at least GOT_OBJECT_ID_HEX_MAXLEN bytes in size.
> + * The output depneds on the hash function used by the repository format.
> + */
> +char *got_object_id_hex(struct got_object_id *, char *, size_t);
> +
>  const struct got_error *got_object_qid_alloc_partial(struct got_object_qid **);
>  struct got_commit_object *got_object_commit_alloc_partial(void);
>  struct got_tree_entry *got_alloc_tree_entry_partial(void);
> blob - 52e7c967c9cc041ec94d6baa8ab706eb7434b845
> blob + 57876d8fb763d7736c0cb4fbb6967718793d2cea
> --- lib/object.c
> +++ lib/object.c
> @@ -374,7 +374,7 @@ got_object_blob_id_str(struct got_blob_object *blob, c
>  char *
>  got_object_blob_id_str(struct got_blob_object *blob, char *buf, size_t size)
>  {
> -	return got_sha1_digest_to_str(blob->id.sha1, buf, size);
> +	return got_object_id_hex(&blob->id, buf, size);
>  }
>  
>  size_t
> blob - c9a711f7c6066bf89187c9058634f85a5a61c2e1
> blob + b17d5a9b339c525ac55deed48e604e2b049d96ca
> --- lib/object_parse.c
> +++ lib/object_parse.c
> @@ -88,13 +88,13 @@ got_object_id_str(char **outbuf, struct got_object_id 
>  const struct got_error *
>  got_object_id_str(char **outbuf, struct got_object_id *id)
>  {
> -	static const size_t len = SHA1_DIGEST_STRING_LENGTH;
> +	static const size_t len = GOT_OBJECT_ID_HEX_MAXLEN;
>  
>  	*outbuf = malloc(len);
>  	if (*outbuf == NULL)
>  		return got_error_from_errno("malloc");
>  
> -	if (got_sha1_digest_to_str(id->sha1, *outbuf, len) == NULL) {
> +	if (got_object_id_hex(id, *outbuf, len) == NULL) {
>  		free(*outbuf);
>  		*outbuf = NULL;
>  		return got_error(GOT_ERR_BAD_OBJ_ID_STR);
> @@ -103,6 +103,12 @@ void
>  	return NULL;
>  }
>  
> +char *
> +got_object_id_hex(struct got_object_id *id, char *buf, size_t len)
> +{
> +	return got_sha1_digest_to_str(id->sha1, buf, len);
> +}
> +
>  void
>  got_object_close(struct got_object *obj)
>  {
>