Module Name: src
Committed By: martin
Date: Sat Dec 9 13:03:34 UTC 2023
Modified Files:
src/lib/libc/gen [netbsd-10]: vis.c
src/tests/lib/libc/gen [netbsd-10]: t_vis.c
Log Message:
Pull up following revision(s) (requested by riastradh in ticket #485):
lib/libc/gen/vis.c: revision 1.76-1.86
tests/lib/libc/gen/t_vis.c: revision 1.10-1.14
vis(3): Avoid nonportable MIN in portable code.
vis(3) tests: Add xfail test for encoding overflow.
>From Kyle Evans <kevans%FreeBSD.org@localhost>.
PR lib/57573
vis(3) tests: Expand tests and diagnostic outputs on failure.
PR lib/57573
vis(3) tests: Test another overflow edge case.
Related to PR lib/57573.
vis(3): Make maxolen unsigned size_t, not ssize_t.
It is initialized once either to *dlen, which is unsigned size_t, or
to wcslen(start) * MB_MAX_LEN + 1, and wcslen returns unsigned size_t
too. So there appears to have never been any reason for this to be
signed.
Part of PR lib/57573.
vis(3): Make mbslength unsigned.
Sprinkle assertions and comments justifying the proposition that it
would never go negative if signed.
Obviates need to worry about mblength > SSIZE_MAX.
Prompted by PR lib/57573.
vis(3): Avoid arithmetic overflow before calloc(3).
Prompted by PR lib/57573.
vis(3): Call wcslen(start) only once.
It had better not change between these two times!
Prompted by PR lib/57573.
vis(3): Avoid potential arithmetic overflow in maxolen.
Can't easily prove that this overflow is impossible, so let's add a
check.
Prompted by PR lib/57573.
vis(3): Fix main part of PR lib/57573.
>From Kyle Evans <kevans%FreeBSD.org@localhost>.
vis(3): Fix one more buffer overrun in an edge case.
PR lib/57573
vis(3): Sort includes. No functional change intended.
Prompted by PR lib/57573.
vis(3): Need <stdint.h> for SIZE_MAX, per C standard.
>From Kyle Evans <kevans%FreeBSD.org@localhost>.
Followup to PR lib/57573.
vis(3): Per KNF, sys/param.h comes before sys/types.h.
Which is nice because that's also lexicographic.
To generate a diff of this commit:
cvs rdiff -u -r1.75 -r1.75.2.1 src/lib/libc/gen/vis.c
cvs rdiff -u -r1.9 -r1.9.24.1 src/tests/lib/libc/gen/t_vis.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/lib/libc/gen/vis.c
diff -u src/lib/libc/gen/vis.c:1.75 src/lib/libc/gen/vis.c:1.75.2.1
--- src/lib/libc/gen/vis.c:1.75 Fri Jun 18 10:57:14 2021
+++ src/lib/libc/gen/vis.c Sat Dec 9 13:03:34 2023
@@ -1,4 +1,4 @@
-/* $NetBSD: vis.c,v 1.75 2021/06/18 10:57:14 christos Exp $ */
+/* $NetBSD: vis.c,v 1.75.2.1 2023/12/09 13:03:34 martin Exp $ */
/*-
* Copyright (c) 1989, 1993
@@ -57,7 +57,7 @@
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: vis.c,v 1.75 2021/06/18 10:57:14 christos Exp $");
+__RCSID("$NetBSD: vis.c,v 1.75.2.1 2023/12/09 13:03:34 martin Exp $");
#endif /* LIBC_SCCS and not lint */
#ifdef __FBSDID
__FBSDID("$FreeBSD$");
@@ -65,13 +65,15 @@ __FBSDID("$FreeBSD$");
#endif
#include "namespace.h"
-#include <sys/types.h>
+
#include <sys/param.h>
+#include <sys/types.h>
#include <assert.h>
-#include <vis.h>
#include <errno.h>
+#include <stdint.h>
#include <stdlib.h>
+#include <vis.h>
#include <wchar.h>
#include <wctype.h>
@@ -396,21 +398,23 @@ static int
istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength,
int flags, const char *mbextra, int *cerr_ptr)
{
+ char mbbuf[MB_LEN_MAX];
wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
size_t len, olen;
uint64_t bmsk, wmsk;
wint_t c;
visfun_t f;
int clen = 0, cerr, error = -1, i, shft;
- char *mbdst, *mdst;
- ssize_t mbslength, maxolen;
+ char *mbdst, *mbwrite, *mdst;
+ size_t mbslength;
+ size_t maxolen;
mbstate_t mbstate;
_DIAGASSERT(mbdstp != NULL);
_DIAGASSERT(mbsrc != NULL || mblength == 0);
_DIAGASSERT(mbextra != NULL);
- mbslength = (ssize_t)mblength;
+ mbslength = mblength;
/*
* When inputing a single character, must also read in the
* next character for nextc, the look-ahead character.
@@ -431,6 +435,14 @@ istrsenvisx(char **mbdstp, size_t *dlen,
* return to the caller.
*/
+ /*
+ * Guarantee the arithmetic on input to calloc won't overflow.
+ */
+ if (mbslength > (SIZE_MAX - 1)/16) {
+ errno = ENOMEM;
+ return -1;
+ }
+
/* Allocate space for the wide char strings */
psrc = pdst = extra = NULL;
mdst = NULL;
@@ -465,9 +477,15 @@ istrsenvisx(char **mbdstp, size_t *dlen,
memset(&mbstate, 0, sizeof(mbstate));
while (mbslength > 0) {
/* Convert one multibyte character to wchar_t. */
- if (!cerr)
- clen = mbrtowc(src, mbsrc, MIN(mbslength, MB_LEN_MAX),
+ if (!cerr) {
+ clen = mbrtowc(src, mbsrc,
+ (mbslength < MB_LEN_MAX
+ ? mbslength
+ : MB_LEN_MAX),
&mbstate);
+ assert(clen < 0 || (size_t)clen <= mbslength);
+ assert(clen <= MB_LEN_MAX);
+ }
if (cerr || clen < 0) {
/* Conversion error, process as a byte instead. */
*src = (wint_t)(u_char)*mbsrc;
@@ -481,6 +499,20 @@ istrsenvisx(char **mbdstp, size_t *dlen,
*/
clen = 1;
}
+ /*
+ * Let n := MIN(mbslength, MB_LEN_MAX). We have:
+ *
+ * mbslength >= 1
+ * mbrtowc(..., n, &mbstate) <= n,
+ * by the contract of mbrtowc
+ *
+ * clen is either
+ * (a) mbrtowc(..., n, &mbstate), in which case
+ * clen <= n <= mbslength; or
+ * (b) 1, in which case clen = 1 <= mbslength.
+ */
+ assert(clen > 0);
+ assert((size_t)clen <= mbslength);
/* Advance buffer character pointer. */
src++;
/* Advance input pointer by number of bytes read. */
@@ -538,12 +570,49 @@ istrsenvisx(char **mbdstp, size_t *dlen,
* output byte-by-byte here. Else use wctomb().
*/
len = wcslen(start);
- maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1);
+ if (dlen) {
+ maxolen = *dlen;
+ if (maxolen == 0) {
+ errno = ENOSPC;
+ goto out;
+ }
+ } else {
+ if (len > (SIZE_MAX - 1)/MB_LEN_MAX) {
+ errno = ENOSPC;
+ goto out;
+ }
+ maxolen = len*MB_LEN_MAX + 1;
+ }
olen = 0;
memset(&mbstate, 0, sizeof(mbstate));
for (dst = start; len > 0; len--) {
- if (!cerr)
- clen = wcrtomb(mbdst, *dst, &mbstate);
+ if (!cerr) {
+ /*
+ * If we have at least MB_CUR_MAX bytes in the buffer,
+ * we'll just do the conversion in-place into mbdst. We
+ * need to be a little more conservative when we get to
+ * the end of the buffer, as we may not have MB_CUR_MAX
+ * bytes but we may not need it.
+ */
+ if (maxolen - olen > MB_CUR_MAX)
+ mbwrite = mbdst;
+ else
+ mbwrite = mbbuf;
+ clen = wcrtomb(mbwrite, *dst, &mbstate);
+ if (clen > 0 && mbwrite != mbdst) {
+ /*
+ * Don't break past our output limit, noting
+ * that maxolen includes the nul terminator so
+ * we can't write past maxolen - 1 here.
+ */
+ if (olen + clen >= maxolen) {
+ errno = ENOSPC;
+ goto out;
+ }
+
+ memcpy(mbdst, mbwrite, clen);
+ }
+ }
if (cerr || clen < 0) {
/*
* Conversion error, process as a byte(s) instead.
@@ -558,16 +627,27 @@ istrsenvisx(char **mbdstp, size_t *dlen,
shft = i * NBBY;
bmsk = (uint64_t)0xffLL << shft;
wmsk |= bmsk;
- if ((*dst & wmsk) || i == 0)
+ if ((*dst & wmsk) || i == 0) {
+ if (olen + clen + 1 >= maxolen) {
+ errno = ENOSPC;
+ goto out;
+ }
+
mbdst[clen++] = (char)(
(uint64_t)(*dst & bmsk) >>
shft);
+ }
}
cerr = 1;
}
- /* If this character would exceed our output limit, stop. */
- if (olen + clen > (size_t)maxolen)
- break;
+
+ /*
+ * We'll be dereferencing mbdst[clen] after this to write the
+ * nul terminator; the above paths should have checked for a
+ * possible overflow already.
+ */
+ assert(olen + clen < maxolen);
+
/* Advance output pointer by number of bytes written. */
mbdst += clen;
/* Advance buffer character pointer. */
@@ -577,6 +657,7 @@ istrsenvisx(char **mbdstp, size_t *dlen,
}
/* Terminate the output string. */
+ assert(olen < maxolen);
*mbdst = '\0';
if (flags & VIS_NOLOCALE) {
Index: src/tests/lib/libc/gen/t_vis.c
diff -u src/tests/lib/libc/gen/t_vis.c:1.9 src/tests/lib/libc/gen/t_vis.c:1.9.24.1
--- src/tests/lib/libc/gen/t_vis.c:1.9 Tue Jan 10 15:16:57 2017
+++ src/tests/lib/libc/gen/t_vis.c Sat Dec 9 13:03:34 2023
@@ -1,4 +1,4 @@
-/* $NetBSD: t_vis.c,v 1.9 2017/01/10 15:16:57 christos Exp $ */
+/* $NetBSD: t_vis.c,v 1.9.24.1 2023/12/09 13:03:34 martin Exp $ */
/*-
* Copyright (c) 2002 The NetBSD Foundation, Inc.
@@ -116,6 +116,23 @@ ATF_TC_BODY(strvis_empty, tc)
ATF_REQUIRE(dst[0] == '\0' && dst[1] == 'a');
}
+ATF_TC(strnvis_empty_empty);
+ATF_TC_HEAD(strnvis_empty_empty, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "Test strnvis(3) with empty source and destination");
+}
+
+ATF_TC_BODY(strnvis_empty_empty, tc)
+{
+ char dst[] = "fail";
+ int n;
+
+ n = strnvis(dst, 0, "", VIS_SAFE);
+ ATF_CHECK(memcmp(dst, "fail", sizeof(dst)) == 0);
+ ATF_CHECK_EQ_MSG(n, -1, "n=%d", n);
+}
+
ATF_TC(strunvis_hex);
ATF_TC_HEAD(strunvis_hex, tc)
{
@@ -175,16 +192,95 @@ ATF_TC_BODY(strvis_locale, tc)
}
#endif /* VIS_NOLOCALE */
+#define STRVIS_OVERFLOW_MARKER ((char)0xff) /* Arbitrary */
+
+#ifdef VIS_NOLOCALE
+ATF_TC(strvis_overflow_mb);
+ATF_TC_HEAD(strvis_overflow_mb, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test strvis(3) multi-byte overflow");
+}
+
+ATF_TC_BODY(strvis_overflow_mb, tc)
+{
+ const char src[] = "\xf0\x9f\xa5\x91";
+ /* Extra byte to detect overflow */
+ char dst[sizeof(src) + 1];
+ unsigned i;
+ int n;
+
+ setlocale(LC_CTYPE, "en_US.UTF-8");
+
+ for (i = 0; i < sizeof(dst) - 1; i++) {
+ memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst));
+ n = strnvis(dst, i, src, VIS_SAFE);
+ ATF_CHECK_EQ_MSG(dst[i], STRVIS_OVERFLOW_MARKER,
+ "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx]"
+ " STRVIS_OVERFLOW_MARKER=%02hhx",
+ i, dst[0], dst[1], dst[2], dst[3], dst[4],
+ STRVIS_OVERFLOW_MARKER);
+ ATF_CHECK_EQ_MSG(n, -1, "[%u] n=%d", i, n);
+ }
+
+ memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst));
+ n = strnvis(dst, sizeof(dst) - 1, src, VIS_SAFE);
+ ATF_CHECK_EQ_MSG(dst[sizeof(dst) - 1], STRVIS_OVERFLOW_MARKER,
+ "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx]"
+ " STRVIS_OVERFLOW_MARKER=%02hhx",
+ i, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
+ STRVIS_OVERFLOW_MARKER);
+ ATF_CHECK_EQ_MSG(n, (int)sizeof(dst) - 2, "n=%d", n);
+}
+#endif
+
+ATF_TC(strvis_overflow_c);
+ATF_TC_HEAD(strvis_overflow_c, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test strvis(3) C locale overflow");
+}
+
+ATF_TC_BODY(strvis_overflow_c, tc)
+{
+ const char src[] = "AAAA";
+ /* Extra byte to detect overflow */
+ char dst[sizeof(src) + 1];
+ unsigned i;
+ int n;
+
+ for (i = 0; i < sizeof(dst) - 1; i++) {
+ memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst));
+ n = strnvis(dst, i, src, VIS_SAFE | VIS_NOLOCALE);
+ ATF_CHECK_EQ_MSG(dst[i], STRVIS_OVERFLOW_MARKER,
+ "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx]"
+ " STRVIS_OVERFLOW_MARKER=%02hhx",
+ i, dst[0], dst[1], dst[2], dst[3], dst[4],
+ STRVIS_OVERFLOW_MARKER);
+ ATF_CHECK_EQ_MSG(n, -1, "[%u] n=%d", i, n);
+ }
+
+ memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst));
+ n = strnvis(dst, sizeof(dst) - 1, src, VIS_SAFE | VIS_NOLOCALE);
+ ATF_CHECK_EQ_MSG(dst[sizeof(dst) - 1], STRVIS_OVERFLOW_MARKER,
+ "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx]"
+ " STRVIS_OVERFLOW_MARKER=%02hhx",
+ i, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
+ STRVIS_OVERFLOW_MARKER);
+ ATF_CHECK_EQ_MSG(n, (int)sizeof(dst) - 2, "n=%d", n);
+}
+
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, strvis_basic);
ATF_TP_ADD_TC(tp, strvis_null);
ATF_TP_ADD_TC(tp, strvis_empty);
+ ATF_TP_ADD_TC(tp, strnvis_empty_empty);
ATF_TP_ADD_TC(tp, strunvis_hex);
#ifdef VIS_NOLOCALE
ATF_TP_ADD_TC(tp, strvis_locale);
+ ATF_TP_ADD_TC(tp, strvis_overflow_mb);
#endif /* VIS_NOLOCALE */
+ ATF_TP_ADD_TC(tp, strvis_overflow_c);
return atf_no_error();
}