"GOT", but the "O" is a cute, smiling pufferfish. Index | Thread | Search

From:
Stefan Sperling <stsp@stsp.name>
Subject:
Re: bufferize smtp parsing in got-notify-email
To:
Omar Polo <op@omarpolo.com>
Cc:
gameoftrees@openbsd.org
Date:
Wed, 20 Mar 2024 17:56:14 +0100

Download raw body.

Thread
On Wed, Mar 20, 2024 at 01:13:28PM +0100, Omar Polo wrote:
> rea(2)ing one byte at a time is a big ugly.  This adds a buffer for the
> server replies and reworks read_smtp_code() to consume the data from
> said buffer first.  it also makes got_poll_read_full_timeout()
> redundant.
> 
> ok?
> 
> diff /home/op/w/got
> commit - 39910b637a9a53cc48b0c63766da691dec0af593
> path + /home/op/w/got
> blob - ef9b199c10bfcb34951de0d9a6e4c0915a91e274
> file + gotd/libexec/got-notify-email/got-notify-email.c
> --- gotd/libexec/got-notify-email/got-notify-email.c
> +++ gotd/libexec/got-notify-email/got-notify-email.c
> @@ -18,6 +18,7 @@
>  #include <sys/socket.h>
>  
>  #include <errno.h>
> +#include <poll.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -33,7 +34,11 @@
>  
>  #include "got_lib_poll.h"
>  
> +#define SMTP_LINE_MAX	65535

Is this limit part of the SMTP spec? Or where does it come from?

>  static int smtp_timeout = 60; /* in seconds */
> +static char smtp_buf[SMTP_LINE_MAX];
> +static size_t smtp_buflen;
>  
>  __dead static void
>  usage(void)
> @@ -111,43 +116,54 @@ static int
>  read_smtp_code(int s, const char *code)
>  {
>  	const struct got_error *error;
> -	char buf[4];
> -	size_t n;
> +	char	*endl;
> +	size_t	 linelen;
> +	ssize_t	 r;
>  
> -	error = got_poll_read_full_timeout(s, &n, buf, 3, 3, smtp_timeout);
> -	if (error)
> -		errx(1, "read: %s", error->msg);
> -	if (strncmp(buf, code, 3) != 0) {
> -		buf[3] = '\0';
> -		warnx("unexpected SMTP message code: %s", buf);
> -		return -1;
> -	}
> -
> -	return 0;
> -}
> -
> -static int
> -skip_to_crlf(int s)
> -{
> -	const struct got_error *error;
> -	char buf[1];
> -	size_t len;
> -
>  	for (;;) {
> -		error = got_poll_read_full_timeout(s, &len, buf, 1, 1,
> -		    smtp_timeout);
> +		endl = memmem(smtp_buf, smtp_buflen, "\r\n", 2);
> +		if (endl != NULL)
> +			break;
> +
> +		if (smtp_buflen == sizeof(smtp_buf))
> +			errx(1, "line too long");

It seems odd to be doing this length check after already using
smtp_buflen in the above memmem call. Wouldn't it make more sense
to run this check after smtp_buflen is incremented at the bottom of
the loop?

> +
> +		error = got_poll_fd(s, POLLIN, smtp_timeout);
>  		if (error)
> -			errx(1, "read: %s", error->msg);
> -		if (buf[0] == '\r') {
> -			error = got_poll_read_full(s, &len, buf, 1, 1);
> -			if (error)
> -				errx(1, "read: %s", error->msg);
> -			if (buf[0] == '\n')
> -				return 0;
> -		}
> +			errx(1, "poll: %s", error->msg);
> +
> +		r = read(s, smtp_buf + smtp_buflen,
> +		    sizeof(smtp_buf) - smtp_buflen);
> +		if (r == -1 && errno != EAGAIN)
> +			err(1, "read");
> +		if (r == 0)
> +			errx(1, "unexpected EOF");
> +		smtp_buflen += r;

If r == -1 && errno == EAGAIN we end up substracting -1 here.
Probably not intended? Guard by if (r > 0) ?

What should happen in case r == -1 && errno == EAGAIN?
Shouldn't we immediately jump back into got_poll_fd() again if this
happens, perhaps after sleeping for a bit?

It seems you have essentially reimplemented got_poll_read_full_timeout()
and added the EAGAIN check. Can we implement this change by keeping
got_poll_read_full_timeout() and extend that function with the EAGAIN
check if it is needed? I would like to avoid having variants of this
poll loop spread across the entire code base, since it is extra effort
to review such loops for correctness over and over again. Better to
have it in one place if possible.

>  	}
>  
> -	return -1;
> +	linelen = endl - smtp_buf;
> +	if (linelen < 3)
> +		errx(1, "invalid SMTP response");
> +
> +	if (strncmp(code, smtp_buf, 3) != 0) {
> +		smtp_buf[3] = '\0';
> +		warnx("unexpected SMTP message code: %s", smtp_buf);
> +		return -1;
> +	}
> +
> +	/*
> +	 * Normally we would get just one reply, but the regress doesn't
> +	 * use a real SMTP server and queues all the replies upfront.
> +	 */
> +	linelen += 2;
> +	if (smtp_buflen == linelen)

Maybe write >= here just to make sure we don't drop into the else
clause and do the memmove with bogus length values? I understand
this cannot happen if the buffer smtp response with "\r\n" (and
we wouldn't get here otherwise), but still.

> +		smtp_buflen = 0;
> +	else {
> +		memmove(smtp_buf, smtp_buf + linelen, smtp_buflen - linelen);
> +		smtp_buflen -= linelen;
> +	}
> +
> +	return 0;
>  }
>  
>  static int
> @@ -216,36 +232,26 @@ send_email(int s, const char *myfromaddr, const char *
>  
>  	if (read_smtp_code(s, "220"))
>  		errx(1, "unexpected SMTP greeting received");
> -	if (skip_to_crlf(s))
> -		errx(1, "invalid SMTP message received");
>  
>  	if (send_smtp_msg(s, "HELO localhost\r\n"))
>  		errx(1, "could not send HELO");
>  	if (read_smtp_code(s, "250"))
>  		errx(1, "unexpected SMTP response received");
> -	if (skip_to_crlf(s))
> -		errx(1, "invalid SMTP message received");
>  
>  	if (send_smtp_msg(s, "MAIL FROM:<%s>\r\n", myfromaddr))
>  		errx(1, "could not send MAIL FROM");
>  	if (read_smtp_code(s, "250"))
>  		errx(1, "unexpected SMTP response received");
> -	if (skip_to_crlf(s))
> -		errx(1, "invalid SMTP message received");
>  
>  	if (send_smtp_msg(s, "RCPT TO:<%s>\r\n", recipient))
>  		errx(1, "could not send MAIL FROM");
>  	if (read_smtp_code(s, "250"))
>  		errx(1, "unexpected SMTP response received");
> -	if (skip_to_crlf(s))
> -		errx(1, "invalid SMTP message received");
>  
>  	if (send_smtp_msg(s, "DATA\r\n"))
>  		errx(1, "could not send MAIL FROM");
>  	if (read_smtp_code(s, "354"))
>  		errx(1, "unexpected SMTP response received");
> -	if (skip_to_crlf(s))
> -		errx(1, "invalid SMTP message received");
>  
>  	if (send_smtp_msg(s, "From: %s\r\n", fromaddr))
>  		errx(1, "could not send From header");
> @@ -279,16 +285,12 @@ send_email(int s, const char *myfromaddr, const char *
>  		errx(1, "could not send data terminator");
>  	if (read_smtp_code(s, "250"))
>  		errx(1, "unexpected SMTP response received");
> -	if (skip_to_crlf(s))
> -		errx(1, "invalid SMTP message received");
>  
>  	if (send_smtp_msg(s, "QUIT\r\n"))
>  		errx(1, "could not send QUIT");
>  
>  	if (read_smtp_code(s, "221"))
>  		errx(1, "unexpected SMTP response received");
> -	if (skip_to_crlf(s))
> -		errx(1, "invalid SMTP message received");
>  
>  	close(s);
>  	free(line);
> 
>