"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Tobias Heider <tobias.heider@stusta.de>
Subject:
Re: got-fetch-http: bufio rewrite
To:
Omar Polo <op@omarpolo.com>
Cc:
gameoftrees@openbsd.org
Date:
Mon, 15 Apr 2024 23:40:28 +0200

Download raw body.

Thread
Thanks, replies inline.

On Mon, Apr 15, 2024 at 11:32:49PM +0200, Omar Polo wrote:
> On 2024/04/15 20:12:33 +0200, Tobias Heider <tobias.heider@stusta.de> wrote:
> > Here is the current state of my bufio rewrite of got-fetch-http.
> > This should work better for portable.
> 
> it is, since there's no way to portably write a funopen() replacement.
> also, the stdio_tls_{read,write} function are a bit of a hack and it's
> hard to get a decent error reporting.
> 
> > No idea if I'm doing this
> > right but it works.
> 
> reads fine to me.  Thanks also for improving on the bufio layer!
> 
> > Had to add rpath to pledge but I am pretty
> > sure that was needed anyway. 
> 
> I believe it's needed for TLS since it has to load certs.pem.  Maybe we
> can initialize libtls upfront to avoid having to keep rpath.

ktrace says that is right. I think we can improve more. I still think we could
open the session and then drop everything.

> 
> > Not sure if we should move the synchronous bufio funcs to lib/bufio.c
> > instead.
> 
> i wrote bufio.c in a different project and always used it with an event
> loop, that's why it lacks some sync functions.  I'm not opposed to have
> them added to lib/bufio.c though, and actually would encourage you to
> move them there if you think they're useful.

ack, thx

> 
> > There's still a lot of low hanging fruit to clean up.
> > Opinions? ok and continue in tree?
> 
> some nits below which can also be fixed later in tree; it's ok op@
> anyway :)
> 
> (and sorry for the poor state of the code; it was just something more
> than a proof-of-code, but thanks for improving on it!)
> 
> > --- libexec/got-fetch-http/got-fetch-http.c
> > +++ libexec/got-fetch-http/got-fetch-http.c
> > [...]
> > +static char *
> > +bufio_getdelim_sync(struct bufio *bio, const char *nl, size_t *len)
> 
> (I think I've mis-designed the set of APIs by having buf_getdelim
> instead of bufio_getdelim)
> 
> > +{
> > +	int	r;
> > +
> > +	do {
> > +		r = bufio_read(bio);
> > +		if (r == -1 && errno != EAGAIN)
> > +			errx(1, "bufio_read: %s", bufio_io_err(bio));
> > +	} while (bio->wantev == BUFIO_WANT_READ);
> > +	return buf_getdelim(&bio->rbuf, nl, len);
> 
> nit: i'd first try to call buf_getdelim() since may have another line
> in the buffer already without having to read().

makes sense

> 
> > +}
> > +
> > [...]
> > @@ -224,65 +168,70 @@ http_open(int https, const char *method, const char *h
> >  	    "%s%s%s\r\n",
> >  	    method, p, host, GOT_USERAGENT,
> >  	    chdr ? chdr : "", ctype ? ctype : "", te);
> > +	if (r == -1)
> > +		err(1, "asprintf");
> >  	free(p);
> > -	if (r == -1)
> > -		err(1, "asprintf");
> >  
> >  	if (verbose > 0)
> >  		fprintf(stderr, "%s: request: %s", getprogname(), req);
> > +	
> >  
> > -	if (fwrite(req, 1, r, fp) != r) {
> > -		free(req);
> > -		fclose(fp);
> > -		return NULL;
> > -	}
> > +	r = bufio_compose(bio, req, r);
> 
> you could replace the previous asprintf with bufio_compose_fmt()

Had that in a previous version but this makes it easier to print in the
verbose case.

> 
> > +	if (r == -1)
> > +		err(1, "bufio_compose_fmt");
> >  	free(req);
> >  
> > -	return fp;
> > +	do {
> > +		r = bufio_write(bio);
> > +		if (r == -1 && errno != EAGAIN)
> > +			errx(1, "bufio_read: %s", bufio_io_err(bio));
> > +	} while (bio->wantev == BUFIO_WANT_WRITE);
> 
> I believe tls_write() could fails with TLS_WANT_READ too, also I'd loop
> until bio->wbuf.len != 0 instead of looking at wantev, which is
> something to plug the bufio into an event loop (poll / libevent / ...)

will fix

> 
> > +	return 0;
> >  }
> >  
> > [...]
> >  static void
> > -http_chunk(FILE *fp, const void *buf, size_t len)
> > +http_chunk(struct bufio *bio, const void *buf, size_t len)
> >  {
> > -	/* fprintf(stderr, "> %.*s", (int)len, (char *)buf); */
> > +	int r;
> > +	bufio_compose_fmt(bio, "%zx\r\n", len);
> > +	bufio_compose(bio, buf, len);
> > +	bufio_compose(bio, "\r\n", 2);
> 
> These calls should be error-checked, since hypothetically the first one
> could fail and the other succeed.  Actually, we could make bufio errors
> sticky, so that it's possible to call multiple times bufio_compose* and
> check for error only at the end.
> 

will fix

> > -	fprintf(fp, "%zx\r\n", len);
> > -	if (fwrite(buf, 1, len, fp) != len ||
> > -	    fwrite("\r\n", 1, 2, fp) != 2)
> > -		err(1, "%s fwrite", __func__);
> > +	do {
> > +		r = bufio_write(bio);
> > +		if (r == -1 && errno != EAGAIN)
> > +			errx(1, "bufio_read: %s", bufio_io_err(bio));
> > +	} while (bio->wantev == BUFIO_WANT_WRITE);
> 
> same here wrt looping on wantev.

same

> 
> >  }
> >  
> >  static int
> >  get_refs(int https, const char *host, const char *port, const char *path)
> >  {
> > +	struct bufio	 bio;
> >  	char		 buf[HTTP_BUFSIZ];
> >  	const char	*errstr, *sufx = "/info/refs";
> > -	FILE		*fp;
> >  	size_t		 skip, chunksz = 0;
> >  	ssize_t		 r;
> >  	int		 chunked;
> > +	int		 sock;
> > +	int		 ret = -1;
> >  
> > -	fp = http_open(https, "GET", host, port, path, sufx,
> > -	    "service=git-upload-pack", NULL);
> > -	if (fp == NULL)
> > +	if ((sock = dial(https, host, port)) == -1)
> >  		return -1;
> >  
> > -	if (http_parse_reply(fp, &chunked, UPLOAD_PACK_ADV) == -1) {
> > -		fclose(fp);
> > -		return -1;
> > +	bufio_init(&bio);
> > +	bufio_set_fd(&bio, sock);
> > +	if (https && bufio_starttls(&bio, host, 0, NULL, 0, NULL, 0)) {
> 
> I'd explicitly check for a -1 here

ack, not sure how I lost that when copying it from got-notify-http

> 
> > [...]
> > @@ -459,27 +397,41 @@ get_refs(int https, const char *host, const char *port
> >  	}
> >  
> >  	fflush(stdout);
> > -	fclose(fp);
> > -	return 0;
> > +	ret = 0;
> > +err:
> > +	bufio_close(&bio);
> 
> close should need a loop too since tls_close() will do some I/O; the
> other party may get an TLS error otherwise (harmless since we're done
> with the request but still.)

I will make a sync version then.

> 
> > +	bufio_free(&bio);
> > +	return ret;
> >  }
> >  
> >  static int
> >  upload_request(int https, const char *host, const char *port, const char *path,
> >      FILE *in)
> >  {
> > +	struct bufio	 bio;
> >  	const char	*errstr;
> >  	char		 buf[HTTP_BUFSIZ];
> > -	FILE		*fp;
> >  	ssize_t		 r;
> >  	size_t		 chunksz = 0;
> >  	long long	 t;
> >  	int		 chunked;
> > +	int		 sock;
> > +	int		 ret = -1;
> >  
> > -	fp = http_open(https, "POST", host, port, path, "/git-upload-pack",
> > -	    NULL, UPLOAD_PACK_REQ);
> > -	if (fp == NULL)
> > +	if ((sock = dial(https, host, port)) == -1)
> >  		return -1;
> >  
> > +	bufio_init(&bio);
> 
> this can fail for ENOMEM; unlikely at this point but should still be
> checked.

ack

> 
> > +	bufio_set_fd(&bio, sock);
> > +	if (https && bufio_starttls(&bio, host, 0, NULL, 0, NULL, 0)) {
> > +		warnx("bufio_starttls");
> > +		goto err;
> > +	}
> > +
> > +	if (http_open(&bio, https, "POST", host, port, path, "/git-upload-pack",
> > +	    NULL, UPLOAD_PACK_REQ) == -1)
> > +		goto err;
> > +
> >  	for (;;) {
> >  		r = fread(buf, 1, 4, in);
> >  		if (r != 4)
> > @@ -495,8 +447,8 @@ upload_request(int https, const char *host, const char
> >  		/* no idea why 0000 is not enough. */
> >  		if (t == 0) {
> >  			const char *x = "00000009done\n";
> > -			http_chunk(fp, x, strlen(x));
> > -			http_chunk(fp, NULL, 0);
> > +			http_chunk(&bio, x, strlen(x));
> > +			http_chunk(&bio, NULL, 0);
> >  			break;
> >  		}
> >  
> > @@ -509,18 +461,16 @@ upload_request(int https, const char *host, const char
> >  		if (r != t - 4)
> >  			goto err;
> >  
> > -		http_chunk(fp, buf, t);
> > +		http_chunk(&bio, buf, t);
> >  	}
> >  
> > -	if (http_parse_reply(fp, &chunked, UPLOAD_PACK_RES) == -1)
> > +	if (http_parse_reply(&bio, &chunked, UPLOAD_PACK_RES) == -1)
> >  		goto err;
> >  
> >  	for (;;) {
> > -		r = http_read(fp, chunked, &chunksz, buf, sizeof(buf));
> > -		if (r == -1) {
> > -			fclose(fp);
> > -			return -1;
> > -		}
> > +		r = http_read(&bio, chunked, &chunksz, buf, sizeof(buf));
> > +		if (r == -1)
> > +			goto err;
> >  
> >  		if (r == 0)
> >  			break;
> > @@ -528,12 +478,11 @@ upload_request(int https, const char *host, const char
> >  		fwrite(buf, 1, r, stdout);
> >  	}
> >  
> > -	fclose(fp);
> > -	return 0;
> > -
> > +	ret = 0;
> >  err:
> > -	fclose(fp);
> > -	return -1;
> > +	bufio_close(&bio);
> 
> another bufio_close() call that may need to be called again.

fixed by having a sync version

> 
> > +	bufio_free(&bio);
> > +	return ret;
> >  }
> >  
> >  static __dead void
> > @@ -558,7 +507,7 @@ main(int argc, char **argv)
> >  #endif
> >  
> >  #if !DEBUG_HTTP || defined(PROFILE)
> > -	if (pledge("stdio inet dns", NULL) == -1)
> > +	if (pledge("stdio inet dns rpath", NULL) == -1)
> 
> nit: i'd sort them as "stdio rpath inet dns" to respect the order in the
> manpage.

sure

> 
> >  		err(1, "pledge");
> >  #endif
> >  
> 
>