From: Stefan Sperling Subject: Re: bufferize smtp parsing in got-notify-email To: Omar Polo Cc: gameoftrees@openbsd.org Date: Wed, 20 Mar 2024 17:56:14 +0100 On Wed, Mar 20, 2024 at 01:13:28PM +0100, Omar Polo wrote: > rea(2)ing one byte at a time is a big ugly. This adds a buffer for the > server replies and reworks read_smtp_code() to consume the data from > said buffer first. it also makes got_poll_read_full_timeout() > redundant. > > ok? > > diff /home/op/w/got > commit - 39910b637a9a53cc48b0c63766da691dec0af593 > path + /home/op/w/got > blob - ef9b199c10bfcb34951de0d9a6e4c0915a91e274 > file + gotd/libexec/got-notify-email/got-notify-email.c > --- gotd/libexec/got-notify-email/got-notify-email.c > +++ gotd/libexec/got-notify-email/got-notify-email.c > @@ -18,6 +18,7 @@ > #include > > #include > +#include > #include > #include > #include > @@ -33,7 +34,11 @@ > > #include "got_lib_poll.h" > > +#define SMTP_LINE_MAX 65535 Is this limit part of the SMTP spec? Or where does it come from? > static int smtp_timeout = 60; /* in seconds */ > +static char smtp_buf[SMTP_LINE_MAX]; > +static size_t smtp_buflen; > > __dead static void > usage(void) > @@ -111,43 +116,54 @@ static int > read_smtp_code(int s, const char *code) > { > const struct got_error *error; > - char buf[4]; > - size_t n; > + char *endl; > + size_t linelen; > + ssize_t r; > > - error = got_poll_read_full_timeout(s, &n, buf, 3, 3, smtp_timeout); > - if (error) > - errx(1, "read: %s", error->msg); > - if (strncmp(buf, code, 3) != 0) { > - buf[3] = '\0'; > - warnx("unexpected SMTP message code: %s", buf); > - return -1; > - } > - > - return 0; > -} > - > -static int > -skip_to_crlf(int s) > -{ > - const struct got_error *error; > - char buf[1]; > - size_t len; > - > for (;;) { > - error = got_poll_read_full_timeout(s, &len, buf, 1, 1, > - smtp_timeout); > + endl = memmem(smtp_buf, smtp_buflen, "\r\n", 2); > + if (endl != NULL) > + break; > + > + if (smtp_buflen == sizeof(smtp_buf)) > + errx(1, "line too long"); It seems odd to be doing this length check after already using smtp_buflen in the above memmem call. Wouldn't it make more sense to run this check after smtp_buflen is incremented at the bottom of the loop? > + > + error = got_poll_fd(s, POLLIN, smtp_timeout); > if (error) > - errx(1, "read: %s", error->msg); > - if (buf[0] == '\r') { > - error = got_poll_read_full(s, &len, buf, 1, 1); > - if (error) > - errx(1, "read: %s", error->msg); > - if (buf[0] == '\n') > - return 0; > - } > + errx(1, "poll: %s", error->msg); > + > + r = read(s, smtp_buf + smtp_buflen, > + sizeof(smtp_buf) - smtp_buflen); > + if (r == -1 && errno != EAGAIN) > + err(1, "read"); > + if (r == 0) > + errx(1, "unexpected EOF"); > + smtp_buflen += r; If r == -1 && errno == EAGAIN we end up substracting -1 here. Probably not intended? Guard by if (r > 0) ? What should happen in case r == -1 && errno == EAGAIN? Shouldn't we immediately jump back into got_poll_fd() again if this happens, perhaps after sleeping for a bit? It seems you have essentially reimplemented got_poll_read_full_timeout() and added the EAGAIN check. Can we implement this change by keeping got_poll_read_full_timeout() and extend that function with the EAGAIN check if it is needed? I would like to avoid having variants of this poll loop spread across the entire code base, since it is extra effort to review such loops for correctness over and over again. Better to have it in one place if possible. > } > > - return -1; > + linelen = endl - smtp_buf; > + if (linelen < 3) > + errx(1, "invalid SMTP response"); > + > + if (strncmp(code, smtp_buf, 3) != 0) { > + smtp_buf[3] = '\0'; > + warnx("unexpected SMTP message code: %s", smtp_buf); > + return -1; > + } > + > + /* > + * Normally we would get just one reply, but the regress doesn't > + * use a real SMTP server and queues all the replies upfront. > + */ > + linelen += 2; > + if (smtp_buflen == linelen) Maybe write >= here just to make sure we don't drop into the else clause and do the memmove with bogus length values? I understand this cannot happen if the buffer smtp response with "\r\n" (and we wouldn't get here otherwise), but still. > + smtp_buflen = 0; > + else { > + memmove(smtp_buf, smtp_buf + linelen, smtp_buflen - linelen); > + smtp_buflen -= linelen; > + } > + > + return 0; > } > > static int > @@ -216,36 +232,26 @@ send_email(int s, const char *myfromaddr, const char * > > if (read_smtp_code(s, "220")) > errx(1, "unexpected SMTP greeting received"); > - if (skip_to_crlf(s)) > - errx(1, "invalid SMTP message received"); > > if (send_smtp_msg(s, "HELO localhost\r\n")) > errx(1, "could not send HELO"); > if (read_smtp_code(s, "250")) > errx(1, "unexpected SMTP response received"); > - if (skip_to_crlf(s)) > - errx(1, "invalid SMTP message received"); > > if (send_smtp_msg(s, "MAIL FROM:<%s>\r\n", myfromaddr)) > errx(1, "could not send MAIL FROM"); > if (read_smtp_code(s, "250")) > errx(1, "unexpected SMTP response received"); > - if (skip_to_crlf(s)) > - errx(1, "invalid SMTP message received"); > > if (send_smtp_msg(s, "RCPT TO:<%s>\r\n", recipient)) > errx(1, "could not send MAIL FROM"); > if (read_smtp_code(s, "250")) > errx(1, "unexpected SMTP response received"); > - if (skip_to_crlf(s)) > - errx(1, "invalid SMTP message received"); > > if (send_smtp_msg(s, "DATA\r\n")) > errx(1, "could not send MAIL FROM"); > if (read_smtp_code(s, "354")) > errx(1, "unexpected SMTP response received"); > - if (skip_to_crlf(s)) > - errx(1, "invalid SMTP message received"); > > if (send_smtp_msg(s, "From: %s\r\n", fromaddr)) > errx(1, "could not send From header"); > @@ -279,16 +285,12 @@ send_email(int s, const char *myfromaddr, const char * > errx(1, "could not send data terminator"); > if (read_smtp_code(s, "250")) > errx(1, "unexpected SMTP response received"); > - if (skip_to_crlf(s)) > - errx(1, "invalid SMTP message received"); > > if (send_smtp_msg(s, "QUIT\r\n")) > errx(1, "could not send QUIT"); > > if (read_smtp_code(s, "221")) > errx(1, "unexpected SMTP response received"); > - if (skip_to_crlf(s)) > - errx(1, "invalid SMTP message received"); > > close(s); > free(line); > >