On Mon, 25 Sep 2023 21:31:08 +0200, Walter wrote: > Yours are the first technical, functional corrections I got about the > code. Thanks! Let's go back in time, then. I think that what you're > telling me can be done by simply replacing "break" for "return" in my > original function. Tell me what you think, please.
Yesterday I was so tired that I told you nonsense, there's no difference between puting break or return there, my original funtion already did what you told me. --- send.c.orig 2023-09-25 21:01:34.780102611 +0200 +++ send.c 2023-09-25 21:17:11.120117761 +0200 @@ -33,6 +33,10 @@ #include "rcv.h" #include "extern.h" ++/* To check charset of the message and add the appropiate MIME headers */ ++static char nutf8; ++static int not_utf8(FILE *s, int len); + static volatile sig_atomic_t sendsignal; /* Interrupted by a signal? */ /* @@ -341,6 +345,17 @@ else puts("Null message body; hope that's ok"); } + + /* Check non valid UTF-8 characters in the message */ + nutf8 = not_utf8(mtf, fsize(mtf)); + rewind(mtf); + if (nutf8 > 1) { + savedeadletter(mtf); + puts("Invalid or incomplete multibyte or wide character"); + fputs(". . . message not sent.\n", stderr); + exit(1); + } + /* * Now, take the user names from the combined * to and cc lists and do all the alias @@ -369,7 +384,7 @@ } if ((cp = value("record")) != NULL) (void)savemail(expand(cp), mtf); - + /* Setup sendmail arguments. */ *ap++ = "sendmail"; *ap++ = "-i"; @@ -525,6 +540,16 @@ fmt("To:", hp->h_to, fo, w&GCOMMA), gotcha++; if (hp->h_subject != NULL && w & GSUBJECT) fprintf(fo, "Subject: %s\n", hp->h_subject), gotcha++; + if (nutf8 == 0) + fprintf(fo, "MIME-Version: 1.0\n" + "Content-Type: text/plain; charset=us-ascii\n" + "Content-Transfer-Encoding: 7bit\n"), + gotcha++; + else if (nutf8 == 1) + fprintf(fo, "MIME-Version: 1.0\n" + "Content-Type: text/plain; charset=utf-8\n" + "Content-Transfer-Encoding: 8bit\n"), + gotcha++; if (hp->h_cc != NULL && w & GCC) fmt("Cc:", hp->h_cc, fo, w&GCOMMA), gotcha++; if (hp->h_bcc != NULL && w & GBCC) @@ -610,3 +635,59 @@ sendsignal = s; } + +/* Search non valid UTF-8 characters in the message */ +static int +not_utf8(FILE *fp, int len) +{ + int i, n, nonascii; + char c; + unsigned char s[len]; + + i = 0; + while ((c = getc(fp)) != EOF) + s[i++] = c; + + s[i] = '\0'; + + i = n = nonascii = 0; + while (s[i] != '\0') + if (s[i] <= 0x7f) { + i++; + /* Two bytes case */ + } else if (s[i] >= 0xc2 && s[i] < 0xe0 && + s[i + 1] >= 0x80 && s[i + 1] <= 0xbf) { + i += 2; + nonascii++; + /* Special three bytes case */ + } else if ((s[i] == 0xe0 && + s[i + 1] >= 0xa0 && s[i + 1] <= 0xbf && + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf) || + /* Three bytes case */ + (s[i] > 0xe0 && s[i] < 0xf0 && + s[i + 1] >= 0x80 && s[i + 1] <= 0xbf && + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf)) { + i += 3; + nonascii++; + /* Special four bytes case */ + } else if ((s[i] == 0xf0 && + s[i + 1] >= 0x90 && s[i + 1] <= 0xbf && + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf && + s[i + 3] >= 0x80 && s[i + 3] <= 0xbf) || + /* Four bytes case */ + (s[i] > 0xf0 && + s[i + 1] >= 0x80 && s[i + 1] <= 0xbf && + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf && + s[i + 3] >= 0x80 && s[i + 3] <= 0xbf)) { + i += 4; + nonascii++; + } else { + n = i + 1; + break; + } + + if (nonascii) + n++; + + return n; +} -- Walter