From: Omar Polo Subject: Re: got-fetch-http: bufio rewrite To: Tobias Heider Cc: gameoftrees@openbsd.org Date: Mon, 15 Apr 2024 23:32:49 +0200 On 2024/04/15 20:12:33 +0200, Tobias Heider wrote: > Here is the current state of my bufio rewrite of got-fetch-http. > This should work better for portable. it is, since there's no way to portably write a funopen() replacement. also, the stdio_tls_{read,write} function are a bit of a hack and it's hard to get a decent error reporting. > No idea if I'm doing this > right but it works. reads fine to me. Thanks also for improving on the bufio layer! > Had to add rpath to pledge but I am pretty > sure that was needed anyway. I believe it's needed for TLS since it has to load certs.pem. Maybe we can initialize libtls upfront to avoid having to keep rpath. > Not sure if we should move the synchronous bufio funcs to lib/bufio.c > instead. i wrote bufio.c in a different project and always used it with an event loop, that's why it lacks some sync functions. I'm not opposed to have them added to lib/bufio.c though, and actually would encourage you to move them there if you think they're useful. > There's still a lot of low hanging fruit to clean up. > Opinions? ok and continue in tree? some nits below which can also be fixed later in tree; it's ok op@ anyway :) (and sorry for the poor state of the code; it was just something more than a proof-of-code, but thanks for improving on it!) > --- libexec/got-fetch-http/got-fetch-http.c > +++ libexec/got-fetch-http/got-fetch-http.c > [...] > +static char * > +bufio_getdelim_sync(struct bufio *bio, const char *nl, size_t *len) (I think I've mis-designed the set of APIs by having buf_getdelim instead of bufio_getdelim) > +{ > + int r; > + > + do { > + r = bufio_read(bio); > + if (r == -1 && errno != EAGAIN) > + errx(1, "bufio_read: %s", bufio_io_err(bio)); > + } while (bio->wantev == BUFIO_WANT_READ); > + return buf_getdelim(&bio->rbuf, nl, len); nit: i'd first try to call buf_getdelim() since may have another line in the buffer already without having to read(). > +} > + > [...] > @@ -224,65 +168,70 @@ http_open(int https, const char *method, const char *h > "%s%s%s\r\n", > method, p, host, GOT_USERAGENT, > chdr ? chdr : "", ctype ? ctype : "", te); > + if (r == -1) > + err(1, "asprintf"); > free(p); > - if (r == -1) > - err(1, "asprintf"); > > if (verbose > 0) > fprintf(stderr, "%s: request: %s", getprogname(), req); > + > > - if (fwrite(req, 1, r, fp) != r) { > - free(req); > - fclose(fp); > - return NULL; > - } > + r = bufio_compose(bio, req, r); you could replace the previous asprintf with bufio_compose_fmt() > + if (r == -1) > + err(1, "bufio_compose_fmt"); > free(req); > > - return fp; > + do { > + r = bufio_write(bio); > + if (r == -1 && errno != EAGAIN) > + errx(1, "bufio_read: %s", bufio_io_err(bio)); > + } while (bio->wantev == BUFIO_WANT_WRITE); I believe tls_write() could fails with TLS_WANT_READ too, also I'd loop until bio->wbuf.len != 0 instead of looking at wantev, which is something to plug the bufio into an event loop (poll / libevent / ...) > + return 0; > } > > [...] > static void > -http_chunk(FILE *fp, const void *buf, size_t len) > +http_chunk(struct bufio *bio, const void *buf, size_t len) > { > - /* fprintf(stderr, "> %.*s", (int)len, (char *)buf); */ > + int r; > + bufio_compose_fmt(bio, "%zx\r\n", len); > + bufio_compose(bio, buf, len); > + bufio_compose(bio, "\r\n", 2); These calls should be error-checked, since hypothetically the first one could fail and the other succeed. Actually, we could make bufio errors sticky, so that it's possible to call multiple times bufio_compose* and check for error only at the end. > - fprintf(fp, "%zx\r\n", len); > - if (fwrite(buf, 1, len, fp) != len || > - fwrite("\r\n", 1, 2, fp) != 2) > - err(1, "%s fwrite", __func__); > + do { > + r = bufio_write(bio); > + if (r == -1 && errno != EAGAIN) > + errx(1, "bufio_read: %s", bufio_io_err(bio)); > + } while (bio->wantev == BUFIO_WANT_WRITE); same here wrt looping on wantev. > } > > static int > get_refs(int https, const char *host, const char *port, const char *path) > { > + struct bufio bio; > char buf[HTTP_BUFSIZ]; > const char *errstr, *sufx = "/info/refs"; > - FILE *fp; > size_t skip, chunksz = 0; > ssize_t r; > int chunked; > + int sock; > + int ret = -1; > > - fp = http_open(https, "GET", host, port, path, sufx, > - "service=git-upload-pack", NULL); > - if (fp == NULL) > + if ((sock = dial(https, host, port)) == -1) > return -1; > > - if (http_parse_reply(fp, &chunked, UPLOAD_PACK_ADV) == -1) { > - fclose(fp); > - return -1; > + bufio_init(&bio); > + bufio_set_fd(&bio, sock); > + if (https && bufio_starttls(&bio, host, 0, NULL, 0, NULL, 0)) { I'd explicitly check for a -1 here > [...] > @@ -459,27 +397,41 @@ get_refs(int https, const char *host, const char *port > } > > fflush(stdout); > - fclose(fp); > - return 0; > + ret = 0; > +err: > + bufio_close(&bio); close should need a loop too since tls_close() will do some I/O; the other party may get an TLS error otherwise (harmless since we're done with the request but still.) > + bufio_free(&bio); > + return ret; > } > > static int > upload_request(int https, const char *host, const char *port, const char *path, > FILE *in) > { > + struct bufio bio; > const char *errstr; > char buf[HTTP_BUFSIZ]; > - FILE *fp; > ssize_t r; > size_t chunksz = 0; > long long t; > int chunked; > + int sock; > + int ret = -1; > > - fp = http_open(https, "POST", host, port, path, "/git-upload-pack", > - NULL, UPLOAD_PACK_REQ); > - if (fp == NULL) > + if ((sock = dial(https, host, port)) == -1) > return -1; > > + bufio_init(&bio); this can fail for ENOMEM; unlikely at this point but should still be checked. > + bufio_set_fd(&bio, sock); > + if (https && bufio_starttls(&bio, host, 0, NULL, 0, NULL, 0)) { > + warnx("bufio_starttls"); > + goto err; > + } > + > + if (http_open(&bio, https, "POST", host, port, path, "/git-upload-pack", > + NULL, UPLOAD_PACK_REQ) == -1) > + goto err; > + > for (;;) { > r = fread(buf, 1, 4, in); > if (r != 4) > @@ -495,8 +447,8 @@ upload_request(int https, const char *host, const char > /* no idea why 0000 is not enough. */ > if (t == 0) { > const char *x = "00000009done\n"; > - http_chunk(fp, x, strlen(x)); > - http_chunk(fp, NULL, 0); > + http_chunk(&bio, x, strlen(x)); > + http_chunk(&bio, NULL, 0); > break; > } > > @@ -509,18 +461,16 @@ upload_request(int https, const char *host, const char > if (r != t - 4) > goto err; > > - http_chunk(fp, buf, t); > + http_chunk(&bio, buf, t); > } > > - if (http_parse_reply(fp, &chunked, UPLOAD_PACK_RES) == -1) > + if (http_parse_reply(&bio, &chunked, UPLOAD_PACK_RES) == -1) > goto err; > > for (;;) { > - r = http_read(fp, chunked, &chunksz, buf, sizeof(buf)); > - if (r == -1) { > - fclose(fp); > - return -1; > - } > + r = http_read(&bio, chunked, &chunksz, buf, sizeof(buf)); > + if (r == -1) > + goto err; > > if (r == 0) > break; > @@ -528,12 +478,11 @@ upload_request(int https, const char *host, const char > fwrite(buf, 1, r, stdout); > } > > - fclose(fp); > - return 0; > - > + ret = 0; > err: > - fclose(fp); > - return -1; > + bufio_close(&bio); another bufio_close() call that may need to be called again. > + bufio_free(&bio); > + return ret; > } > > static __dead void > @@ -558,7 +507,7 @@ main(int argc, char **argv) > #endif > > #if !DEBUG_HTTP || defined(PROFILE) > - if (pledge("stdio inet dns", NULL) == -1) > + if (pledge("stdio inet dns rpath", NULL) == -1) nit: i'd sort them as "stdio rpath inet dns" to respect the order in the manpage. > err(1, "pledge"); > #endif >