"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: provide got_object_blob_is_binary
To:
Omar Polo <op@omarpolo.com>
Cc:
gameoftrees@openbsd.org
Date:
Tue, 3 Jan 2023 19:54:09 +0100

Download raw body.

Thread
On Tue, Jan 03, 2023 at 05:16:14PM +0100, Omar Polo wrote:
> I'd like to implement a page in gotwebd where a blob can be viewed
> inline, inside the html UI, with linkable lines and bells and
> whistles.
> 
> The blob apis are very block-oriented and don't expose (for good) the
> underlying FILE*.  To achieve what I'd like to do in gotwebd however I
> need to have
> 
>  1. a way to know if a blob is likely to be binary or not.  if it
>     contains binary data then we may not want to display it inline or
>     treat it differently.
> 
>  2. a getline-like function.
> 
> 
> Here I'm doing 1. and showing how it can be already be used to
> simplify the logic in gotwebd.  2. will be done in a follow-up diff.
> 
> The fact that it leaves the underlying stream after the header is done
> on purpose.  User of this function are likely to dump the file reading
> block after block or (in the future) line by line, skipping the
> initial header would be troublesome if not impossible.

Yes, this seems fine. ok.

> diff /home/op/w/got2
> commit - 71cd355cb2711ad528715bbbb5b41be7c26ace2a
> path + /home/op/w/got2
> blob - 87cec74f0c075ba839a114bdb6c50bdca8ae9160
> file + gotwebd/got_operations.c
> --- gotwebd/got_operations.c
> +++ gotwebd/got_operations.c
> @@ -52,13 +52,6 @@ static int
>  static const struct got_error *got_gotweb_blame_cb(void *, int, int,
>      struct got_commit_object *,struct got_object_id *);
>  
> -static int
> -isbinary(const uint8_t *buf, size_t n)
> -{
> -	return memchr(buf, '\0', n) != NULL;
> -}
> -
> -
>  static const struct got_error *
>  got_gotweb_flushfile(FILE *f, int fd)
>  {
> @@ -978,8 +971,8 @@ got_output_file_blob(struct request *c)
>  	struct got_reflist_head refs;
>  	struct got_blob_object *blob = NULL;
>  	char *path = NULL, *in_repo_path = NULL;
> -	int obj_type, set_mime = 0, fd = -1;
> -	size_t len, hdrlen;
> +	int bin, obj_type, fd = -1;
> +	size_t len;
>  	const uint8_t *buf;
>  
>  	TAILQ_INIT(&refs);
> @@ -1028,43 +1021,31 @@ got_output_file_blob(struct request *c)
>  	error = got_object_open_as_blob(&blob, repo, commit_id, BUF, fd);
>  	if (error)
>  		goto done;
> -	hdrlen = got_object_blob_get_hdrlen(blob);
> -	do {
> +
> +	error = got_object_blob_is_binary(&bin, blob);
> +	if (error)
> +		goto done;
> +
> +	if (bin)
> +		error = gotweb_render_content_type_file(c,
> +		    "application/octet-stream", qs->file, NULL);
> +	else
> +		error = gotweb_render_content_type(c, "text/plain");
> +
> +	if (error) {
> +		log_warnx("%s: %s", __func__, error->msg);
> +		goto done;
> +	}
> +
> +	for (;;) {
>  		error = got_object_blob_read_block(&len, blob);
>  		if (error)
>  			goto done;
> +		if (len == 0)
> +			break;
>  		buf = got_object_blob_get_read_buf(blob);
> -
> -		/*
> -		 * Skip blob object header first time around,
> -		 * which also contains a zero byte.
> -		 */
> -		buf += hdrlen;
> -		if (set_mime == 0) {
> -			if (isbinary(buf, len - hdrlen)) {
> -				error = gotweb_render_content_type_file(c,
> -				    "application/octet-stream",
> -				    qs->file, NULL);
> -				if (error) {
> -					log_warnx("%s: %s", __func__,
> -					    error->msg);
> -					goto done;
> -				}
> -			} else {
> -				error = gotweb_render_content_type(c,
> -				  "text/plain");
> -				if (error) {
> -					log_warnx("%s: %s", __func__,
> -					    error->msg);
> -					goto done;
> -				}
> -			}
> -		}
> -		set_mime = 1;
> -		fcgi_gen_binary_response(c, buf, len - hdrlen);
> -
> -		hdrlen = 0;
> -	} while (len != 0);
> +		fcgi_gen_binary_response(c, buf, len);
> +	}
>  done:
>  	if (commit)
>  		got_object_commit_close(commit);
> blob - 19f043e1c43b4b21c3b19d764f34a7575df5b7a7
> file + include/got_object.h
> --- include/got_object.h
> +++ include/got_object.h
> @@ -291,6 +291,13 @@ void got_object_blob_rewind(struct got_blob_object *);
>  void got_object_blob_rewind(struct got_blob_object *);
>  
>  /*
> + * Heuristic to check whether the blob contains binary data.  Rewinds
> + * the blob's data stream back after the header.
> + */
> +const struct got_error *got_object_blob_is_binary(int *,
> +    struct got_blob_object *);
> +
> +/*
>   * Read the entire content of a blob and write it to the specified file.
>   * Flush and rewind the file as well. Indicate the amount of bytes
>   * written in the size_t output argument, and the number of lines in the
> blob - 7bc7eb3dcdcec57157f35f8b6c676e1979ba2b76
> file + lib/object.c
> --- lib/object.c
> +++ lib/object.c
> @@ -402,6 +402,29 @@ got_object_blob_dump_to_file(off_t *filesize, int *nli
>  }
>  
>  const struct got_error *
> +got_object_blob_is_binary(int *binary, struct got_blob_object *blob)
> +{
> +	const struct got_error *err;
> +	size_t hdrlen, len;
> +
> +	*binary = 0;
> +	hdrlen = got_object_blob_get_hdrlen(blob);
> +
> +	if (fseeko(blob->f, hdrlen, SEEK_SET) == -1)
> +		return got_error_from_errno("fseeko");
> +
> +	err = got_object_blob_read_block(&len, blob);
> +	if (err)
> +		return err;
> +
> +	*binary = memchr(blob->read_buf, '\0', len) != NULL;
> +
> +	if (fseeko(blob->f, hdrlen, SEEK_SET) == -1)
> +		return got_error_from_errno("fseeko");
> +	return NULL;
> +}
> +
> +const struct got_error *
>  got_object_blob_dump_to_file(off_t *filesize, int *nlines,
>      off_t **line_offsets, FILE *outfile, struct got_blob_object *blob)
>  {
> 
>