"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
provide got_object_blob_is_binary
To:
gameoftrees@openbsd.org
Date:
Tue, 03 Jan 2023 17:16:14 +0100

Download raw body.

Thread
I'd like to implement a page in gotwebd where a blob can be viewed
inline, inside the html UI, with linkable lines and bells and
whistles.

The blob apis are very block-oriented and don't expose (for good) the
underlying FILE*.  To achieve what I'd like to do in gotwebd however I
need to have

 1. a way to know if a blob is likely to be binary or not.  if it
    contains binary data then we may not want to display it inline or
    treat it differently.

 2. a getline-like function.


Here I'm doing 1. and showing how it can be already be used to
simplify the logic in gotwebd.  2. will be done in a follow-up diff.

The fact that it leaves the underlying stream after the header is done
on purpose.  User of this function are likely to dump the file reading
block after block or (in the future) line by line, skipping the
initial header would be troublesome if not impossible.

diff /home/op/w/got2
commit - 71cd355cb2711ad528715bbbb5b41be7c26ace2a
path + /home/op/w/got2
blob - 87cec74f0c075ba839a114bdb6c50bdca8ae9160
file + gotwebd/got_operations.c
--- gotwebd/got_operations.c
+++ gotwebd/got_operations.c
@@ -52,13 +52,6 @@ static int
 static const struct got_error *got_gotweb_blame_cb(void *, int, int,
     struct got_commit_object *,struct got_object_id *);
 
-static int
-isbinary(const uint8_t *buf, size_t n)
-{
-	return memchr(buf, '\0', n) != NULL;
-}
-
-
 static const struct got_error *
 got_gotweb_flushfile(FILE *f, int fd)
 {
@@ -978,8 +971,8 @@ got_output_file_blob(struct request *c)
 	struct got_reflist_head refs;
 	struct got_blob_object *blob = NULL;
 	char *path = NULL, *in_repo_path = NULL;
-	int obj_type, set_mime = 0, fd = -1;
-	size_t len, hdrlen;
+	int bin, obj_type, fd = -1;
+	size_t len;
 	const uint8_t *buf;
 
 	TAILQ_INIT(&refs);
@@ -1028,43 +1021,31 @@ got_output_file_blob(struct request *c)
 	error = got_object_open_as_blob(&blob, repo, commit_id, BUF, fd);
 	if (error)
 		goto done;
-	hdrlen = got_object_blob_get_hdrlen(blob);
-	do {
+
+	error = got_object_blob_is_binary(&bin, blob);
+	if (error)
+		goto done;
+
+	if (bin)
+		error = gotweb_render_content_type_file(c,
+		    "application/octet-stream", qs->file, NULL);
+	else
+		error = gotweb_render_content_type(c, "text/plain");
+
+	if (error) {
+		log_warnx("%s: %s", __func__, error->msg);
+		goto done;
+	}
+
+	for (;;) {
 		error = got_object_blob_read_block(&len, blob);
 		if (error)
 			goto done;
+		if (len == 0)
+			break;
 		buf = got_object_blob_get_read_buf(blob);
-
-		/*
-		 * Skip blob object header first time around,
-		 * which also contains a zero byte.
-		 */
-		buf += hdrlen;
-		if (set_mime == 0) {
-			if (isbinary(buf, len - hdrlen)) {
-				error = gotweb_render_content_type_file(c,
-				    "application/octet-stream",
-				    qs->file, NULL);
-				if (error) {
-					log_warnx("%s: %s", __func__,
-					    error->msg);
-					goto done;
-				}
-			} else {
-				error = gotweb_render_content_type(c,
-				  "text/plain");
-				if (error) {
-					log_warnx("%s: %s", __func__,
-					    error->msg);
-					goto done;
-				}
-			}
-		}
-		set_mime = 1;
-		fcgi_gen_binary_response(c, buf, len - hdrlen);
-
-		hdrlen = 0;
-	} while (len != 0);
+		fcgi_gen_binary_response(c, buf, len);
+	}
 done:
 	if (commit)
 		got_object_commit_close(commit);
blob - 19f043e1c43b4b21c3b19d764f34a7575df5b7a7
file + include/got_object.h
--- include/got_object.h
+++ include/got_object.h
@@ -291,6 +291,13 @@ void got_object_blob_rewind(struct got_blob_object *);
 void got_object_blob_rewind(struct got_blob_object *);
 
 /*
+ * Heuristic to check whether the blob contains binary data.  Rewinds
+ * the blob's data stream back after the header.
+ */
+const struct got_error *got_object_blob_is_binary(int *,
+    struct got_blob_object *);
+
+/*
  * Read the entire content of a blob and write it to the specified file.
  * Flush and rewind the file as well. Indicate the amount of bytes
  * written in the size_t output argument, and the number of lines in the
blob - 7bc7eb3dcdcec57157f35f8b6c676e1979ba2b76
file + lib/object.c
--- lib/object.c
+++ lib/object.c
@@ -402,6 +402,29 @@ got_object_blob_dump_to_file(off_t *filesize, int *nli
 }
 
 const struct got_error *
+got_object_blob_is_binary(int *binary, struct got_blob_object *blob)
+{
+	const struct got_error *err;
+	size_t hdrlen, len;
+
+	*binary = 0;
+	hdrlen = got_object_blob_get_hdrlen(blob);
+
+	if (fseeko(blob->f, hdrlen, SEEK_SET) == -1)
+		return got_error_from_errno("fseeko");
+
+	err = got_object_blob_read_block(&len, blob);
+	if (err)
+		return err;
+
+	*binary = memchr(blob->read_buf, '\0', len) != NULL;
+
+	if (fseeko(blob->f, hdrlen, SEEK_SET) == -1)
+		return got_error_from_errno("fseeko");
+	return NULL;
+}
+
+const struct got_error *
 got_object_blob_dump_to_file(off_t *filesize, int *nlines,
     off_t **line_offsets, FILE *outfile, struct got_blob_object *blob)
 {