> On Thu, Sep 21, 2023 at 02:12:50PM +0200, Stefan Sperling wrote:
> > Your implementation lacks proper bounds checking. It accesses
> > s[i + 3] based purely on the contents of the input string, without
> > checking whether len < i + 3. Entering the while (i != len) loop with
You surely meant "len > i + 3" (grater than). The patch below is wrong.
I know it doesn't matter anymore but I'm still clarifying so that no one
wastes time trying the patch.
>
>
>
> Index: send.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mail/send.c,v
> retrieving revision 1.26
> diff -u -p -r1.26 send.c
> --- send.c 8 Mar 2023 04:43:11 -0000 1.26
> +++ send.c 21 Sep 2023 14:16:08 -0000
> @@ -33,6 +33,10 @@
> #include "rcv.h"
> #include "extern.h"
>
> +/* To check charset of the message and add the appropiate MIME headers */
> +static char nutf8;
> +static int not_utf8(FILE *s, int len);
> +
> static volatile sig_atomic_t sendsignal; /* Interrupted by a signal? */
>
> /*
> @@ -341,6 +345,11 @@ mail1(struct header *hp, int printheader
> else
> puts("Null message body; hope that's ok");
> }
> +
> + /* Check non valid UTF-8 characters in the message */
> + nutf8 = not_utf8(mtf, fsize(mtf));
> + rewind(mtf);
> +
> /*
> * Now, take the user names from the combined
> * to and cc lists and do all the alias
> @@ -525,6 +534,14 @@ puthead(struct header *hp, FILE *fo, int
> fmt("To:", hp->h_to, fo, w&GCOMMA), gotcha++;
> if (hp->h_subject != NULL && w & GSUBJECT)
> fprintf(fo, "Subject: %s\n", hp->h_subject), gotcha++;
> + if (nutf8 == 0)
> + fprintf(fo, "MIME-Version: 1.0\n"
> + "Content-Type: text/plain; charset=us-ascii\n"
> + "Content-Transfer-Encoding: 7bit\n"), gotcha++;
> + else if (nutf8 == 1)
> + fprintf(fo, "MIME-Version: 1.0\n"
> + "Content-Type: text/plain; charset=utf-8\n"
> + "Content-Transfer-Encoding: 8bit\n"), gotcha++;
> if (hp->h_cc != NULL && w & GCC)
> fmt("Cc:", hp->h_cc, fo, w&GCOMMA), gotcha++;
> if (hp->h_bcc != NULL && w & GBCC)
> @@ -609,4 +626,60 @@ sendint(int s)
> {
>
> sendsignal = s;
> +}
> +
> +/* Search non valid UTF-8 characters in the message */
> +static int
> +not_utf8(FILE *message, int len)
> +{
> + int i, n, nonascii;
> + char c;
> + unsigned char s[len + 1];
> +
> + i = 0;
> + while ((c = getc(message)) != EOF)
> + s[i++] = c;
> +
> + s[i] = '\0';
> +
> + i = n = nonascii = 0;
> + while (i != len)
> + if (s[i] <= 0x7f) {
> + i++;
> + /* Two bytes case */
> + } else if (len < i + 1 && s[i] >= 0xc2 && s[i] < 0xe0 &&
> + s[i + 1] >= 0x80 && s[i + 1] <= 0xbf) {
> + i += 2;
> + nonascii++;
> + /* Special three bytes case */
> + } else if ((len < i + 2 && s[i] == 0xe0 &&
> + s[i + 1] >= 0xa0 && s[i + 1] <= 0xbf &&
> + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf) ||
> + /* Three bytes case */
> + (len < i + 2 && s[i] > 0xe0 && s[i] < 0xf0 &&
> + s[i + 1] >= 0x80 && s[i + 1] <= 0xbf &&
> + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf)) {
> + i += 3;
> + nonascii++;
> + /* Special four bytes case */
> + } else if ((len < i + 3 && s[i] == 0xf0 &&
> + s[i + 1] >= 0x90 && s[i + 1] <= 0xbf &&
> + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf &&
> + s[i + 3] >= 0x80 && s[i + 3] <= 0xbf) ||
> + /* Four bytes case */
> + (len < i + 3 && s[i] > 0xf0 &&
> + s[i + 1] >= 0x80 && s[i + 1] <= 0xbf &&
> + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf &&
> + s[i + 3] >= 0x80 && s[i + 3] <= 0xbf)) {
> + i += 4;
> + nonascii++;
> + } else {
> + n = i + 1;
> + break;
> + }
> +
> + if (nonascii)
> + n++;
> +
> + return n;
> }
>
>
> --
> Walter
--
Walter