Hello again, It's been a few years since I first reported a problem with inc's handling of long lines (e.g. those that are clearly longer than RFC mandated lengths). Oddly enough, I have only ever seen it coming from one particular sender (and the problem still persists to this day). Every other email sender from whom I receive email has never presented a problem.
For historical context: https://lists.nongnu.org/archive/html/nmh-workers/2019-09/msg00007.html At any rate, I've been mulling over the source for the past few days and decided to take a stab at addressing the issue. Along the way I found 2 other bugs. Not only does inc fail with long lines that are greater than 32768 bytes, it also truncates lines that are less than that value and longer than 1024 bytes (not sure if this behavior is intentional, but it seems like an oversight to me). The other minor bug is that it appends an extra newline to the body after scanning the terminating dot (.) at the end of the transaction, which I decided was fairly innocuous so I didn't spend time on it. Ken made the suggestion that inc should just be able to handle longer lines. Turns out that was quite a challenge not being familiar with the code and it's possible that my modifications are more complicated than necessary, but attached is an attempt to make it work. As it turns out, inc's ability to POP email had no concept of "header" and "body", but merely "command" and "lines of content", so I sought to make it grok when the header ends and the body starts. After it finds the end of headers (a line with 0 bytes of data because CRNL are stripped from the line) it then stops requiring CRNL and reads in chunks of data instead, while still looking for CRNL for conversion. (I thought it should be strict with CRNL parsing during headers, but perhaps that's a flaw in this particular change I've made). When I got that working I then found that most of the message was still missing, and that's when I discovered that inc discarded any characters after 1024 bytes on a line. I've been running it for a couple of days and it seems to work as I haven't discovered any corrupted message or lost anything; it even preserves the huge lines that shouldn't be present. Can this be done better another way? Is it sufficient? The patch is against 1.7.1 since that is what is installed on my system. Thanks, Andy
Index: h/netsec.h ================================================================== --- h/netsec.h +++ h/netsec.h @@ -149,11 +149,11 @@ * * Returns pointer to string, or NULL on error. */ char *netsec_readline(netsec_context *ns_context, size_t *length, - char **errstr); + char **errstr, int strict); /* * Read bytes from the network. * * Arguments: Index: mts/smtp/smtp.c ================================================================== --- mts/smtp/smtp.c +++ mts/smtp/smtp.c @@ -783,11 +783,11 @@ sm_reply.text[0] = 0; rp = sm_reply.text; rc = sizeof(sm_reply.text) - 1; for (more = FALSE; (buffer = netsec_readline(nsc, &buflen, - &errstr)) != NULL ; ) { + &errstr, TRUE)) != NULL ; ) { if (doingEHLO && has_prefix(buffer, "250") && (buffer[3] == '-' || doingEHLO == 2) && buffer[4]) { @@ -993,11 +993,11 @@ * by base64 response data. */ netsec_set_snoop_callback(nsc, netsec_b64_snoop_decoder, &snoopoffset); snoopoffset = 4; - line = netsec_readline(nsc, &len, errstr); + line = netsec_readline(nsc, &len, errstr, TRUE); netsec_set_snoop_callback(nsc, NULL, NULL); if (line == NULL) return NOTOK; @@ -1049,11 +1049,11 @@ case NETSEC_SASL_FINISH: /* * Finish the protocol; we're looking for a 235 message. */ - line = netsec_readline(nsc, &len, errstr); + line = netsec_readline(nsc, &len, errstr, TRUE); if (line == NULL) return NOTOK; if (!has_prefix(line, "235 ")) { if (len > 4) Index: sbr/netsec.c ================================================================== --- sbr/netsec.c +++ sbr/netsec.c @@ -44,10 +44,12 @@ #endif /* TLS_SUPPORT */ /* I'm going to hardcode this for now; maybe make it adjustable later? */ #define NETSEC_BUFSIZE 65536 +int addnl; + /* * Our context structure, which holds all of the relevant information * about a connection. */ @@ -424,23 +426,25 @@ * - If your data may contain embedded NULs, this won't work. You should * be using netsec_read() in that case. */ char * -netsec_readline(netsec_context *nsc, size_t *len, char **errstr) +netsec_readline(netsec_context *nsc, size_t *len, char **errstr, int strict) { unsigned char *ptr = nsc->ns_inptr; size_t count = 0, offset; retry: /* * Search through our existing buffer for a LF */ + addnl = FALSE; - while (count < nsc->ns_inbuflen) { + while (count < nsc->ns_inbuflen && count < BUFSIZ - 2) { count++; if (*ptr++ == '\n') { + addnl = TRUE; char *sptr = (char *) nsc->ns_inptr; if (count > 1 && *(ptr - 2) == '\r') ptr--; *--ptr = '\0'; if (len) @@ -465,19 +469,35 @@ } return sptr; } } - /* - * Hm, we didn't find a \n. If we've already searched half of the input - * buffer, return an error. - */ - - if (count >= nsc->ns_inbufsize / 2) { - netsec_err(errstr, "Unable to find a line terminator after %d bytes", - count); - return NULL; + /* + * Hm, we didn't find a \n. Return the a half buffer if not strictly + * searching for \n. + * Otherwise, if we've already searched half of the input + * buffer, return an error. + */ + + if (strict == TRUE) { + if (count > nsc->ns_inbufsize / 2) { + netsec_err(errstr, + "Unable to find a line terminator after %d bytes", + count); + return NULL; + } + } else { + if (count) { + count = min(BUFSIZ / 2, count); + ptr = nsc->ns_inptr + count; + char *sptr = (char *) nsc->ns_inptr; + if (len) + *len = ptr - nsc->ns_inptr; + nsc->ns_inptr += count; + nsc->ns_inbuflen -= count; + return sptr; + } } /* * Okay, get some more network data. This may move inptr, so regenerate * our ptr value; Index: uip/inc.c ================================================================== --- uip/inc.c +++ uip/inc.c @@ -100,10 +100,11 @@ } *Maildir = NULL; static int num_maildir_entries = 0; static int snoop = 0; extern char response[]; +extern int addnl; static long start; static long stop; static FILE *pf = NULL; @@ -934,9 +935,13 @@ } static int pop_action (char *s) { - fprintf (pf, "%s\n", s); + if (addnl == TRUE) { + fprintf (pf, "%s\n", s); + } else { + fprintf (pf, "%s", s); + } stop += strlen (s) + 1; return 0; /* Is return value used? This was missing before 1999-07-15. */ } Index: uip/popsbr.c ================================================================== --- uip/popsbr.c +++ uip/popsbr.c @@ -23,15 +23,15 @@ /* * static prototypes */ static int command(const char *, ...); -static int multiline(void); +static int multiline(int); static int traverse (int (*)(char *), const char *, ...); static int vcommand(const char *, va_list); -static int pop_getline (char *, int, netsec_context *); +static int pop_getline (char *, int, netsec_context *, int); static int pop_sasl_callback(enum sasl_message_type, unsigned const char *, unsigned int, unsigned char **, unsigned int *, char **); static int @@ -52,11 +52,11 @@ "The POP CAPA command failed; POP server does not " "support SASL"); return NOTOK; } - while ((status = multiline()) != DONE) + while ((status = multiline(TRUE)) != DONE) switch (status) { case NOTOK: return NOTOK; break; case DONE: /* Shouldn't be possible, but just in case */ @@ -233,11 +233,11 @@ free(errstr); return NOTOK; } } - switch (pop_getline (response, sizeof response, nsc)) { + switch (pop_getline (response, sizeof response, nsc, TRUE)) { case OK: if (poprint) fprintf (stderr, "<--- %s\n", response); if (*response == '+') { nmh_creds_t creds; @@ -327,11 +327,11 @@ rc = netsec_printf(nsc, errstr, "AUTH %s\r\n", mech); if (rc) return NOTOK; if (netsec_flush(nsc, errstr) != OK) return NOTOK; - line = netsec_readline(nsc, &len, errstr); + line = netsec_readline(nsc, &len, errstr, 1); if (! line) return NOTOK; /* * If the protocol is being followed correctly, should just * be a "+ ", nothing else. @@ -376,11 +376,11 @@ * and feed it back into the SASL library. */ case NETSEC_SASL_READ: netsec_set_snoop_callback(nsc, netsec_b64_snoop_decoder, &snoopoffset); snoopoffset = 2; - line = netsec_readline(nsc, &len, errstr); + line = netsec_readline(nsc, &len, errstr, 1); netsec_set_snoop_callback(nsc, NULL, NULL); if (line == NULL) return NOTOK; if (len < 2 || (len == 2 && strcmp(line, "+ ") != 0)) { @@ -428,11 +428,11 @@ /* * Finish the protocol; we're looking for an +OK */ case NETSEC_SASL_FINISH: - line = netsec_readline(nsc, &len, errstr); + line = netsec_readline(nsc, &len, errstr, 1); if (line == NULL) return NOTOK; if (!has_prefix(line, "+OK")) { netsec_err(errstr, "Authentication failed: %s", line); @@ -489,10 +489,11 @@ static int traverse (int (*action)(char *), const char *fmt, ...) { int result, snoopstate; + int seenhdr = FALSE; va_list ap; char buffer[sizeof(response)]; va_start(ap, fmt); result = vcommand (fmt, ap); @@ -504,11 +505,11 @@ if ((snoopstate = netsec_get_snoop(nsc))) netsec_set_snoop(nsc, 0); for (;;) - switch (multiline ()) { + switch (multiline (seenhdr ? FALSE : TRUE)) { case NOTOK: netsec_set_snoop(nsc, snoopstate); return NOTOK; case DONE: @@ -515,10 +516,12 @@ strncpy (response, buffer, sizeof(response)); netsec_set_snoop(nsc, snoopstate); return OK; case OK: + if (seenhdr != TRUE && strnlen(response,sizeof(response)) == 0) + seenhdr = TRUE; (*action) (response); break; } } @@ -591,11 +594,11 @@ response[sizeof(response) - 1] = '\0'; free(errstr); return NOTOK; } - switch (pop_getline (response, sizeof response, nsc)) { + switch (pop_getline (response, sizeof response, nsc, TRUE)) { case OK: if (poprint) fprintf (stderr, "<--- %s\n", response); return (*response == '+' ? OK : NOTOK); @@ -609,15 +612,15 @@ return NOTOK; /* NOTREACHED */ } int -multiline (void) +multiline (int strict) { char buffer[BUFSIZ + LEN(TRM)]; - if (pop_getline (buffer, sizeof buffer, nsc) != OK) + if (pop_getline (buffer, sizeof buffer, nsc, strict) != OK) return NOTOK; if (has_prefix(buffer, TRM)) { if (buffer[LEN(TRM)] == 0) return DONE; strncpy (response, buffer + LEN(TRM), sizeof(response)); @@ -631,19 +634,19 @@ /* * This is now just a thin wrapper around netsec_readline(). */ static int -pop_getline (char *s, int n, netsec_context *ns) +pop_getline (char *s, int n, netsec_context *ns, int strict) { /* int c = -2; */ char *p; size_t len, destlen; /* int rc; */ char *errstr; - p = netsec_readline(ns, &len, &errstr); + p = netsec_readline(ns, &len, &errstr, strict); if (p == NULL) { strncpy(response, errstr, sizeof(response)); response[sizeof(response) - 1] = '\0'; free(errstr);