From: Stefan Sperling Subject: Re: provide got_object_blob_is_binary To: Omar Polo Cc: gameoftrees@openbsd.org Date: Tue, 3 Jan 2023 19:54:09 +0100 On Tue, Jan 03, 2023 at 05:16:14PM +0100, Omar Polo wrote: > I'd like to implement a page in gotwebd where a blob can be viewed > inline, inside the html UI, with linkable lines and bells and > whistles. > > The blob apis are very block-oriented and don't expose (for good) the > underlying FILE*. To achieve what I'd like to do in gotwebd however I > need to have > > 1. a way to know if a blob is likely to be binary or not. if it > contains binary data then we may not want to display it inline or > treat it differently. > > 2. a getline-like function. > > > Here I'm doing 1. and showing how it can be already be used to > simplify the logic in gotwebd. 2. will be done in a follow-up diff. > > The fact that it leaves the underlying stream after the header is done > on purpose. User of this function are likely to dump the file reading > block after block or (in the future) line by line, skipping the > initial header would be troublesome if not impossible. Yes, this seems fine. ok. > diff /home/op/w/got2 > commit - 71cd355cb2711ad528715bbbb5b41be7c26ace2a > path + /home/op/w/got2 > blob - 87cec74f0c075ba839a114bdb6c50bdca8ae9160 > file + gotwebd/got_operations.c > --- gotwebd/got_operations.c > +++ gotwebd/got_operations.c > @@ -52,13 +52,6 @@ static int > static const struct got_error *got_gotweb_blame_cb(void *, int, int, > struct got_commit_object *,struct got_object_id *); > > -static int > -isbinary(const uint8_t *buf, size_t n) > -{ > - return memchr(buf, '\0', n) != NULL; > -} > - > - > static const struct got_error * > got_gotweb_flushfile(FILE *f, int fd) > { > @@ -978,8 +971,8 @@ got_output_file_blob(struct request *c) > struct got_reflist_head refs; > struct got_blob_object *blob = NULL; > char *path = NULL, *in_repo_path = NULL; > - int obj_type, set_mime = 0, fd = -1; > - size_t len, hdrlen; > + int bin, obj_type, fd = -1; > + size_t len; > const uint8_t *buf; > > TAILQ_INIT(&refs); > @@ -1028,43 +1021,31 @@ got_output_file_blob(struct request *c) > error = got_object_open_as_blob(&blob, repo, commit_id, BUF, fd); > if (error) > goto done; > - hdrlen = got_object_blob_get_hdrlen(blob); > - do { > + > + error = got_object_blob_is_binary(&bin, blob); > + if (error) > + goto done; > + > + if (bin) > + error = gotweb_render_content_type_file(c, > + "application/octet-stream", qs->file, NULL); > + else > + error = gotweb_render_content_type(c, "text/plain"); > + > + if (error) { > + log_warnx("%s: %s", __func__, error->msg); > + goto done; > + } > + > + for (;;) { > error = got_object_blob_read_block(&len, blob); > if (error) > goto done; > + if (len == 0) > + break; > buf = got_object_blob_get_read_buf(blob); > - > - /* > - * Skip blob object header first time around, > - * which also contains a zero byte. > - */ > - buf += hdrlen; > - if (set_mime == 0) { > - if (isbinary(buf, len - hdrlen)) { > - error = gotweb_render_content_type_file(c, > - "application/octet-stream", > - qs->file, NULL); > - if (error) { > - log_warnx("%s: %s", __func__, > - error->msg); > - goto done; > - } > - } else { > - error = gotweb_render_content_type(c, > - "text/plain"); > - if (error) { > - log_warnx("%s: %s", __func__, > - error->msg); > - goto done; > - } > - } > - } > - set_mime = 1; > - fcgi_gen_binary_response(c, buf, len - hdrlen); > - > - hdrlen = 0; > - } while (len != 0); > + fcgi_gen_binary_response(c, buf, len); > + } > done: > if (commit) > got_object_commit_close(commit); > blob - 19f043e1c43b4b21c3b19d764f34a7575df5b7a7 > file + include/got_object.h > --- include/got_object.h > +++ include/got_object.h > @@ -291,6 +291,13 @@ void got_object_blob_rewind(struct got_blob_object *); > void got_object_blob_rewind(struct got_blob_object *); > > /* > + * Heuristic to check whether the blob contains binary data. Rewinds > + * the blob's data stream back after the header. > + */ > +const struct got_error *got_object_blob_is_binary(int *, > + struct got_blob_object *); > + > +/* > * Read the entire content of a blob and write it to the specified file. > * Flush and rewind the file as well. Indicate the amount of bytes > * written in the size_t output argument, and the number of lines in the > blob - 7bc7eb3dcdcec57157f35f8b6c676e1979ba2b76 > file + lib/object.c > --- lib/object.c > +++ lib/object.c > @@ -402,6 +402,29 @@ got_object_blob_dump_to_file(off_t *filesize, int *nli > } > > const struct got_error * > +got_object_blob_is_binary(int *binary, struct got_blob_object *blob) > +{ > + const struct got_error *err; > + size_t hdrlen, len; > + > + *binary = 0; > + hdrlen = got_object_blob_get_hdrlen(blob); > + > + if (fseeko(blob->f, hdrlen, SEEK_SET) == -1) > + return got_error_from_errno("fseeko"); > + > + err = got_object_blob_read_block(&len, blob); > + if (err) > + return err; > + > + *binary = memchr(blob->read_buf, '\0', len) != NULL; > + > + if (fseeko(blob->f, hdrlen, SEEK_SET) == -1) > + return got_error_from_errno("fseeko"); > + return NULL; > +} > + > +const struct got_error * > got_object_blob_dump_to_file(off_t *filesize, int *nlines, > off_t **line_offsets, FILE *outfile, struct got_blob_object *blob) > { > >