"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Omar Polo <op@omarpolo.com>
Subject:
Re: got-fetch-http: bufio rewrite
To:
Tobias Heider <tobias.heider@stusta.de>
Cc:
gameoftrees@openbsd.org
Date:
Mon, 15 Apr 2024 23:32:49 +0200

Download raw body.

Thread
On 2024/04/15 20:12:33 +0200, Tobias Heider <tobias.heider@stusta.de> wrote:
> Here is the current state of my bufio rewrite of got-fetch-http.
> This should work better for portable.

it is, since there's no way to portably write a funopen() replacement.
also, the stdio_tls_{read,write} function are a bit of a hack and it's
hard to get a decent error reporting.

> No idea if I'm doing this
> right but it works.

reads fine to me.  Thanks also for improving on the bufio layer!

> Had to add rpath to pledge but I am pretty
> sure that was needed anyway. 

I believe it's needed for TLS since it has to load certs.pem.  Maybe we
can initialize libtls upfront to avoid having to keep rpath.

> Not sure if we should move the synchronous bufio funcs to lib/bufio.c
> instead.

i wrote bufio.c in a different project and always used it with an event
loop, that's why it lacks some sync functions.  I'm not opposed to have
them added to lib/bufio.c though, and actually would encourage you to
move them there if you think they're useful.

> There's still a lot of low hanging fruit to clean up.
> Opinions? ok and continue in tree?

some nits below which can also be fixed later in tree; it's ok op@
anyway :)

(and sorry for the poor state of the code; it was just something more
than a proof-of-code, but thanks for improving on it!)

> --- libexec/got-fetch-http/got-fetch-http.c
> +++ libexec/got-fetch-http/got-fetch-http.c
> [...]
> +static char *
> +bufio_getdelim_sync(struct bufio *bio, const char *nl, size_t *len)

(I think I've mis-designed the set of APIs by having buf_getdelim
instead of bufio_getdelim)

> +{
> +	int	r;
> +
> +	do {
> +		r = bufio_read(bio);
> +		if (r == -1 && errno != EAGAIN)
> +			errx(1, "bufio_read: %s", bufio_io_err(bio));
> +	} while (bio->wantev == BUFIO_WANT_READ);
> +	return buf_getdelim(&bio->rbuf, nl, len);

nit: i'd first try to call buf_getdelim() since may have another line
in the buffer already without having to read().

> +}
> +
> [...]
> @@ -224,65 +168,70 @@ http_open(int https, const char *method, const char *h
>  	    "%s%s%s\r\n",
>  	    method, p, host, GOT_USERAGENT,
>  	    chdr ? chdr : "", ctype ? ctype : "", te);
> +	if (r == -1)
> +		err(1, "asprintf");
>  	free(p);
> -	if (r == -1)
> -		err(1, "asprintf");
>  
>  	if (verbose > 0)
>  		fprintf(stderr, "%s: request: %s", getprogname(), req);
> +	
>  
> -	if (fwrite(req, 1, r, fp) != r) {
> -		free(req);
> -		fclose(fp);
> -		return NULL;
> -	}
> +	r = bufio_compose(bio, req, r);

you could replace the previous asprintf with bufio_compose_fmt()

> +	if (r == -1)
> +		err(1, "bufio_compose_fmt");
>  	free(req);
>  
> -	return fp;
> +	do {
> +		r = bufio_write(bio);
> +		if (r == -1 && errno != EAGAIN)
> +			errx(1, "bufio_read: %s", bufio_io_err(bio));
> +	} while (bio->wantev == BUFIO_WANT_WRITE);

I believe tls_write() could fails with TLS_WANT_READ too, also I'd loop
until bio->wbuf.len != 0 instead of looking at wantev, which is
something to plug the bufio into an event loop (poll / libevent / ...)

> +	return 0;
>  }
>  
> [...]
>  static void
> -http_chunk(FILE *fp, const void *buf, size_t len)
> +http_chunk(struct bufio *bio, const void *buf, size_t len)
>  {
> -	/* fprintf(stderr, "> %.*s", (int)len, (char *)buf); */
> +	int r;
> +	bufio_compose_fmt(bio, "%zx\r\n", len);
> +	bufio_compose(bio, buf, len);
> +	bufio_compose(bio, "\r\n", 2);

These calls should be error-checked, since hypothetically the first one
could fail and the other succeed.  Actually, we could make bufio errors
sticky, so that it's possible to call multiple times bufio_compose* and
check for error only at the end.

> -	fprintf(fp, "%zx\r\n", len);
> -	if (fwrite(buf, 1, len, fp) != len ||
> -	    fwrite("\r\n", 1, 2, fp) != 2)
> -		err(1, "%s fwrite", __func__);
> +	do {
> +		r = bufio_write(bio);
> +		if (r == -1 && errno != EAGAIN)
> +			errx(1, "bufio_read: %s", bufio_io_err(bio));
> +	} while (bio->wantev == BUFIO_WANT_WRITE);

same here wrt looping on wantev.

>  }
>  
>  static int
>  get_refs(int https, const char *host, const char *port, const char *path)
>  {
> +	struct bufio	 bio;
>  	char		 buf[HTTP_BUFSIZ];
>  	const char	*errstr, *sufx = "/info/refs";
> -	FILE		*fp;
>  	size_t		 skip, chunksz = 0;
>  	ssize_t		 r;
>  	int		 chunked;
> +	int		 sock;
> +	int		 ret = -1;
>  
> -	fp = http_open(https, "GET", host, port, path, sufx,
> -	    "service=git-upload-pack", NULL);
> -	if (fp == NULL)
> +	if ((sock = dial(https, host, port)) == -1)
>  		return -1;
>  
> -	if (http_parse_reply(fp, &chunked, UPLOAD_PACK_ADV) == -1) {
> -		fclose(fp);
> -		return -1;
> +	bufio_init(&bio);
> +	bufio_set_fd(&bio, sock);
> +	if (https && bufio_starttls(&bio, host, 0, NULL, 0, NULL, 0)) {

I'd explicitly check for a -1 here

> [...]
> @@ -459,27 +397,41 @@ get_refs(int https, const char *host, const char *port
>  	}
>  
>  	fflush(stdout);
> -	fclose(fp);
> -	return 0;
> +	ret = 0;
> +err:
> +	bufio_close(&bio);

close should need a loop too since tls_close() will do some I/O; the
other party may get an TLS error otherwise (harmless since we're done
with the request but still.)

> +	bufio_free(&bio);
> +	return ret;
>  }
>  
>  static int
>  upload_request(int https, const char *host, const char *port, const char *path,
>      FILE *in)
>  {
> +	struct bufio	 bio;
>  	const char	*errstr;
>  	char		 buf[HTTP_BUFSIZ];
> -	FILE		*fp;
>  	ssize_t		 r;
>  	size_t		 chunksz = 0;
>  	long long	 t;
>  	int		 chunked;
> +	int		 sock;
> +	int		 ret = -1;
>  
> -	fp = http_open(https, "POST", host, port, path, "/git-upload-pack",
> -	    NULL, UPLOAD_PACK_REQ);
> -	if (fp == NULL)
> +	if ((sock = dial(https, host, port)) == -1)
>  		return -1;
>  
> +	bufio_init(&bio);

this can fail for ENOMEM; unlikely at this point but should still be
checked.

> +	bufio_set_fd(&bio, sock);
> +	if (https && bufio_starttls(&bio, host, 0, NULL, 0, NULL, 0)) {
> +		warnx("bufio_starttls");
> +		goto err;
> +	}
> +
> +	if (http_open(&bio, https, "POST", host, port, path, "/git-upload-pack",
> +	    NULL, UPLOAD_PACK_REQ) == -1)
> +		goto err;
> +
>  	for (;;) {
>  		r = fread(buf, 1, 4, in);
>  		if (r != 4)
> @@ -495,8 +447,8 @@ upload_request(int https, const char *host, const char
>  		/* no idea why 0000 is not enough. */
>  		if (t == 0) {
>  			const char *x = "00000009done\n";
> -			http_chunk(fp, x, strlen(x));
> -			http_chunk(fp, NULL, 0);
> +			http_chunk(&bio, x, strlen(x));
> +			http_chunk(&bio, NULL, 0);
>  			break;
>  		}
>  
> @@ -509,18 +461,16 @@ upload_request(int https, const char *host, const char
>  		if (r != t - 4)
>  			goto err;
>  
> -		http_chunk(fp, buf, t);
> +		http_chunk(&bio, buf, t);
>  	}
>  
> -	if (http_parse_reply(fp, &chunked, UPLOAD_PACK_RES) == -1)
> +	if (http_parse_reply(&bio, &chunked, UPLOAD_PACK_RES) == -1)
>  		goto err;
>  
>  	for (;;) {
> -		r = http_read(fp, chunked, &chunksz, buf, sizeof(buf));
> -		if (r == -1) {
> -			fclose(fp);
> -			return -1;
> -		}
> +		r = http_read(&bio, chunked, &chunksz, buf, sizeof(buf));
> +		if (r == -1)
> +			goto err;
>  
>  		if (r == 0)
>  			break;
> @@ -528,12 +478,11 @@ upload_request(int https, const char *host, const char
>  		fwrite(buf, 1, r, stdout);
>  	}
>  
> -	fclose(fp);
> -	return 0;
> -
> +	ret = 0;
>  err:
> -	fclose(fp);
> -	return -1;
> +	bufio_close(&bio);

another bufio_close() call that may need to be called again.

> +	bufio_free(&bio);
> +	return ret;
>  }
>  
>  static __dead void
> @@ -558,7 +507,7 @@ main(int argc, char **argv)
>  #endif
>  
>  #if !DEBUG_HTTP || defined(PROFILE)
> -	if (pledge("stdio inet dns", NULL) == -1)
> +	if (pledge("stdio inet dns rpath", NULL) == -1)

nit: i'd sort them as "stdio rpath inet dns" to respect the order in the
manpage.

>  		err(1, "pledge");
>  #endif
>