Download raw body.
bufferize smtp parsing in got-notify-email
On 2024/03/20 17:56:14 +0100, Stefan Sperling <stsp@stsp.name> wrote: > On Wed, Mar 20, 2024 at 01:13:28PM +0100, Omar Polo wrote: > > rea(2)ing one byte at a time is a big ugly. This adds a buffer for the > > server replies and reworks read_smtp_code() to consume the data from > > said buffer first. it also makes got_poll_read_full_timeout() > > redundant. > > > > ok? > > > > diff /home/op/w/got > > commit - 39910b637a9a53cc48b0c63766da691dec0af593 > > path + /home/op/w/got > > blob - ef9b199c10bfcb34951de0d9a6e4c0915a91e274 > > file + gotd/libexec/got-notify-email/got-notify-email.c > > --- gotd/libexec/got-notify-email/got-notify-email.c > > +++ gotd/libexec/got-notify-email/got-notify-email.c > > @@ -18,6 +18,7 @@ > > #include <sys/socket.h> > > > > #include <errno.h> > > +#include <poll.h> > > #include <stdio.h> > > #include <stdlib.h> > > #include <string.h> > > @@ -33,7 +34,11 @@ > > > > #include "got_lib_poll.h" > > > > +#define SMTP_LINE_MAX 65535 > > Is this limit part of the SMTP spec? Or where does it come from? I've taken this from smtpd. right now I'm not finding the maximum value from a cursory read of RFC5321. > > static int smtp_timeout = 60; /* in seconds */ > > +static char smtp_buf[SMTP_LINE_MAX]; > > +static size_t smtp_buflen; > > > > __dead static void > > usage(void) > > @@ -111,43 +116,54 @@ static int > > read_smtp_code(int s, const char *code) > > { > > const struct got_error *error; > > - char buf[4]; > > - size_t n; > > + char *endl; > > + size_t linelen; > > + ssize_t r; > > > > - error = got_poll_read_full_timeout(s, &n, buf, 3, 3, smtp_timeout); > > - if (error) > > - errx(1, "read: %s", error->msg); > > - if (strncmp(buf, code, 3) != 0) { > > - buf[3] = '\0'; > > - warnx("unexpected SMTP message code: %s", buf); > > - return -1; > > - } > > - > > - return 0; > > -} > > - > > -static int > > -skip_to_crlf(int s) > > -{ > > - const struct got_error *error; > > - char buf[1]; > > - size_t len; > > - > > for (;;) { > > - error = got_poll_read_full_timeout(s, &len, buf, 1, 1, > > - smtp_timeout); > > + endl = memmem(smtp_buf, smtp_buflen, "\r\n", 2); > > + if (endl != NULL) > > + break; > > + > > + if (smtp_buflen == sizeof(smtp_buf)) > > + errx(1, "line too long"); > > It seems odd to be doing this length check after already using > smtp_buflen in the above memmem call. Wouldn't it make more sense > to run this check after smtp_buflen is incremented at the bottom of > the loop? I think this needs to stay here. Imagine the corner case of having the buffer almost full but no CRLF in it. We poll(), read some more data and fill the buffer: we should still check for a full line before failing. It may make more sense to read this loop starting with the got_poll_fd() call and then wrapping around until here. > > + > > + error = got_poll_fd(s, POLLIN, smtp_timeout); > > if (error) > > - errx(1, "read: %s", error->msg); > > - if (buf[0] == '\r') { > > - error = got_poll_read_full(s, &len, buf, 1, 1); > > - if (error) > > - errx(1, "read: %s", error->msg); > > - if (buf[0] == '\n') > > - return 0; > > - } > > + errx(1, "poll: %s", error->msg); > > + > > + r = read(s, smtp_buf + smtp_buflen, > > + sizeof(smtp_buf) - smtp_buflen); > > + if (r == -1 && errno != EAGAIN) > > + err(1, "read"); > > + if (r == 0) > > + errx(1, "unexpected EOF"); > > + smtp_buflen += r; > > If r == -1 && errno == EAGAIN we end up substracting -1 here. > Probably not intended? Guard by if (r > 0) ? good catch. > What should happen in case r == -1 && errno == EAGAIN? > Shouldn't we immediately jump back into got_poll_fd() again if this > happens, perhaps after sleeping for a bit? actually we can't get EAGAIN here since we just polled with POLLIN, which guarantees us that there's some data to be read without blocking, so I've removed the errno check. > It seems you have essentially reimplemented got_poll_read_full_timeout() > and added the EAGAIN check. Can we implement this change by keeping > got_poll_read_full_timeout() and extend that function with the EAGAIN > check if it is needed? it's not a read_full() since we keep reading only until a full line has been read, not until the buffer is completely filled. > I would like to avoid having variants of this > poll loop spread across the entire code base, since it is extra effort > to review such loops for correctness over and over again. Better to > have it in one place if possible. I agree, but if you're fine with it I'd wait before adding an interface like this to pollfd.c. I'd like to attempt to write the http notifier first and then see what kind of interface could work for both cases. > > } > > > > - return -1; > > + linelen = endl - smtp_buf; > > + if (linelen < 3) > > + errx(1, "invalid SMTP response"); > > + > > + if (strncmp(code, smtp_buf, 3) != 0) { > > + smtp_buf[3] = '\0'; > > + warnx("unexpected SMTP message code: %s", smtp_buf); > > + return -1; > > + } > > + > > + /* > > + * Normally we would get just one reply, but the regress doesn't > > + * use a real SMTP server and queues all the replies upfront. > > + */ > > + linelen += 2; > > + if (smtp_buflen == linelen) > > Maybe write >= here just to make sure we don't drop into the else > clause and do the memmove with bogus length values? I understand > this cannot happen if the buffer smtp response with "\r\n" (and > we wouldn't get here otherwise), but still. In the normal case, you'd be right, but we break the regress like this. A real smtp server will send just one reply, so at each call we only read one line and process it. However, the regress queues up all the replies, so on the first read() we fill the buffer with all the replies, then we just consume the buffer. The if is actually a micro-optimization, I could just remove it and do the memmove unconditionally. at worst, it'll be done with a length of zero. Also, if we'll switch to using EHLO instead of HELO (which is actually deprecated AFAICS) being able to process multiple lines of reply will come in handy. diff refs/heads/main refs/heads/wip commit - 39910b637a9a53cc48b0c63766da691dec0af593 commit + b0d34b790e2b65c81e669db054b57aef73f98251 blob - ef9b199c10bfcb34951de0d9a6e4c0915a91e274 blob + 473330c193539f05cbf6803db74608ea3ae9a0d9 --- gotd/libexec/got-notify-email/got-notify-email.c +++ gotd/libexec/got-notify-email/got-notify-email.c @@ -18,6 +18,7 @@ #include <sys/socket.h> #include <errno.h> +#include <poll.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -33,7 +34,11 @@ #include "got_lib_poll.h" +#define SMTP_LINE_MAX 65535 + static int smtp_timeout = 60; /* in seconds */ +static char smtp_buf[SMTP_LINE_MAX]; +static size_t smtp_buflen; __dead static void usage(void) @@ -111,43 +116,54 @@ static int read_smtp_code(int s, const char *code) { const struct got_error *error; - char buf[4]; - size_t n; + char *endl; + size_t linelen; + ssize_t r; - error = got_poll_read_full_timeout(s, &n, buf, 3, 3, smtp_timeout); - if (error) - errx(1, "read: %s", error->msg); - if (strncmp(buf, code, 3) != 0) { - buf[3] = '\0'; - warnx("unexpected SMTP message code: %s", buf); - return -1; - } - - return 0; -} - -static int -skip_to_crlf(int s) -{ - const struct got_error *error; - char buf[1]; - size_t len; - for (;;) { - error = got_poll_read_full_timeout(s, &len, buf, 1, 1, - smtp_timeout); + endl = memmem(smtp_buf, smtp_buflen, "\r\n", 2); + if (endl != NULL) + break; + + if (smtp_buflen == sizeof(smtp_buf)) + errx(1, "line too long"); + + error = got_poll_fd(s, POLLIN, smtp_timeout); if (error) - errx(1, "read: %s", error->msg); - if (buf[0] == '\r') { - error = got_poll_read_full(s, &len, buf, 1, 1); - if (error) - errx(1, "read: %s", error->msg); - if (buf[0] == '\n') - return 0; - } + errx(1, "poll: %s", error->msg); + + r = read(s, smtp_buf + smtp_buflen, + sizeof(smtp_buf) - smtp_buflen); + if (r == -1) + err(1, "read"); + if (r == 0) + errx(1, "unexpected EOF"); + smtp_buflen += r; } - return -1; + linelen = endl - smtp_buf; + if (linelen < 3) + errx(1, "invalid SMTP response"); + + if (strncmp(code, smtp_buf, 3) != 0) { + smtp_buf[3] = '\0'; + warnx("unexpected SMTP message code: %s", smtp_buf); + return -1; + } + + /* + * Normally we would get just one reply, but the regress doesn't + * use a real SMTP server and queues all the replies upfront. + */ + linelen += 2; + if (smtp_buflen == linelen) + smtp_buflen = 0; + else { + memmove(smtp_buf, smtp_buf + linelen, smtp_buflen - linelen); + smtp_buflen -= linelen; + } + + return 0; } static int @@ -216,36 +232,26 @@ send_email(int s, const char *myfromaddr, const char * if (read_smtp_code(s, "220")) errx(1, "unexpected SMTP greeting received"); - if (skip_to_crlf(s)) - errx(1, "invalid SMTP message received"); if (send_smtp_msg(s, "HELO localhost\r\n")) errx(1, "could not send HELO"); if (read_smtp_code(s, "250")) errx(1, "unexpected SMTP response received"); - if (skip_to_crlf(s)) - errx(1, "invalid SMTP message received"); if (send_smtp_msg(s, "MAIL FROM:<%s>\r\n", myfromaddr)) errx(1, "could not send MAIL FROM"); if (read_smtp_code(s, "250")) errx(1, "unexpected SMTP response received"); - if (skip_to_crlf(s)) - errx(1, "invalid SMTP message received"); if (send_smtp_msg(s, "RCPT TO:<%s>\r\n", recipient)) errx(1, "could not send MAIL FROM"); if (read_smtp_code(s, "250")) errx(1, "unexpected SMTP response received"); - if (skip_to_crlf(s)) - errx(1, "invalid SMTP message received"); if (send_smtp_msg(s, "DATA\r\n")) errx(1, "could not send MAIL FROM"); if (read_smtp_code(s, "354")) errx(1, "unexpected SMTP response received"); - if (skip_to_crlf(s)) - errx(1, "invalid SMTP message received"); if (send_smtp_msg(s, "From: %s\r\n", fromaddr)) errx(1, "could not send From header"); @@ -279,16 +285,12 @@ send_email(int s, const char *myfromaddr, const char * errx(1, "could not send data terminator"); if (read_smtp_code(s, "250")) errx(1, "unexpected SMTP response received"); - if (skip_to_crlf(s)) - errx(1, "invalid SMTP message received"); if (send_smtp_msg(s, "QUIT\r\n")) errx(1, "could not send QUIT"); if (read_smtp_code(s, "221")) errx(1, "unexpected SMTP response received"); - if (skip_to_crlf(s)) - errx(1, "invalid SMTP message received"); close(s); free(line);
bufferize smtp parsing in got-notify-email