Module Name: src
Committed By: riastradh
Date: Sun Aug 18 04:51:16 UTC 2024
Modified Files:
src/tests/lib/libc/locale: t_c16rtomb.c t_c8rtomb.c
Log Message:
c8rtomb(3), c16rtomb(3), c32rtomb(3): Test stateful shift sequences.
PR lib/58612: c8rtomb/c16rtomb/c32rtomb yield suboptimal shift
sequences
To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/tests/lib/libc/locale/t_c16rtomb.c \
src/tests/lib/libc/locale/t_c8rtomb.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/tests/lib/libc/locale/t_c16rtomb.c
diff -u src/tests/lib/libc/locale/t_c16rtomb.c:1.3 src/tests/lib/libc/locale/t_c16rtomb.c:1.4
--- src/tests/lib/libc/locale/t_c16rtomb.c:1.3 Sun Aug 18 02:19:35 2024
+++ src/tests/lib/libc/locale/t_c16rtomb.c Sun Aug 18 04:51:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: t_c16rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $ */
+/* $NetBSD: t_c16rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $ */
/*-
* Copyright (c) 2002 Tim J. Robbins
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__RCSID("$NetBSD: t_c16rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $");
+__RCSID("$NetBSD: t_c16rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $");
#include <errno.h>
#include <limits.h>
@@ -59,7 +59,7 @@ require_lc_ctype(const char *locale_name
}
static mbstate_t s;
-static char buf[MB_LEN_MAX + 1];
+static char buf[7*MB_LEN_MAX + 1];
ATF_TC_WITHOUT_HEAD(c16rtomb_c_locale_test);
ATF_TC_BODY(c16rtomb_c_locale_test, tc)
@@ -114,6 +114,88 @@ ATF_TC_BODY(c16rtomb_c_locale_test, tc)
"buf=[%02x %02x]", buf[0], buf[1]);
}
+ATF_TC_WITHOUT_HEAD(c16rtomb_iso2022jp_locale_test);
+ATF_TC_BODY(c16rtomb_iso2022jp_locale_test, tc)
+{
+ char *p;
+ size_t n;
+
+ require_lc_ctype("ja_JP.ISO-2022-JP");
+
+ /*
+ * If the buffer argument is NULL, c16 is implicitly 0,
+ * c16rtomb() resets its internal state.
+ */
+ ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n);
+
+ /* Null wide character. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n);
+ ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
+ (unsigned char)buf[1] == 0xcc),
+ "buf=[%02x %02x]", buf[0], buf[1]);
+
+ /* Latin letter A, internal state. */
+ ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n);
+
+ /*
+ * 1. U+0042 LATIN CAPITAL LETTER A
+ * 2. U+00A5 YEN SIGN
+ * 3. U+00A5 YEN SIGN (again, no shift needed)
+ * 4. U+30A2 KATAKANA LETTER A
+ * 5. U+30A2 KATAKANA LETTER A (again, no shift needed)
+ * 6. incomplete UTF-16 surrogate pair -- no output
+ * 7. U+0000 NUL (plus shift sequence to initial state)
+ */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ p = buf;
+ ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'A', &s)), 1, "n=%zu", n); /* 1 */
+ p += 1;
+ atf_tc_expect_fail("PR lib/58612:"
+ " c8rtomb/c16rtomb/c32rtomb yield suboptimal shift sequences");
+ ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 4, "n=%zu", n); /* 2 */
+ p += 4;
+ ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 1, "n=%zu", n); /* 3 */
+ p += 1;
+ ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 5, "n=%zu", n); /* 4 */
+ p += 5;
+ ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 2, "n=%zu", n); /* 5 */
+ p += 2;
+ ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xd800, &s)), 0, "n=%zu", n); /* 6 */
+ ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'\0', &s)), 4, "n=%zu", n); /* 7 */
+ p += 4;
+ ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
+ (unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */
+ (unsigned char)buf[2] == '(' &&
+ (unsigned char)buf[3] == 'J' &&
+ (unsigned char)buf[4] == 0x5c && /* YEN SIGN */
+ (unsigned char)buf[5] == 0x5c && /* YEN SIGN */
+ (unsigned char)buf[6] == 0x1b && /* shift JIS X 0208-1978 */
+ (unsigned char)buf[7] == '$' &&
+ (unsigned char)buf[8] == 'B' &&
+ (unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */
+ (unsigned char)buf[10] == 0x22 &&
+ (unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */
+ (unsigned char)buf[12] == 0x22 &&
+ (unsigned char)buf[13] == 0x1b && /* shift US-ASCII */
+ (unsigned char)buf[14] == '(' &&
+ (unsigned char)buf[15] == 'B' &&
+ (unsigned char)buf[16] == '\0' &&
+ (unsigned char)buf[17] == 0xcc),
+ "buf=[%02x %02x %02x %02x %02x %02x %02x %02x "
+ " %02x %02x %02x %02x %02x %02x %02x %02x "
+ " %02x %02x]",
+ buf[0], buf[1], buf[2], buf[3],
+ buf[4], buf[5], buf[6], buf[7],
+ buf[8], buf[9], buf[10], buf[11],
+ buf[12], buf[13], buf[14], buf[15],
+ buf[16], buf[17]);
+}
+
ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_1_test);
ATF_TC_BODY(c16rtomb_iso_8859_1_test, tc)
{
@@ -198,6 +280,7 @@ ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, c16rtomb_c_locale_test);
+ ATF_TP_ADD_TC(tp, c16rtomb_iso2022jp_locale_test);
ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_1_test);
ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_15_test);
ATF_TP_ADD_TC(tp, c16rtomb_utf_8_test);
Index: src/tests/lib/libc/locale/t_c8rtomb.c
diff -u src/tests/lib/libc/locale/t_c8rtomb.c:1.3 src/tests/lib/libc/locale/t_c8rtomb.c:1.4
--- src/tests/lib/libc/locale/t_c8rtomb.c:1.3 Sun Aug 18 02:19:35 2024
+++ src/tests/lib/libc/locale/t_c8rtomb.c Sun Aug 18 04:51:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: t_c8rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $ */
+/* $NetBSD: t_c8rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $ */
/*-
* Copyright (c) 2002 Tim J. Robbins
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__RCSID("$NetBSD: t_c8rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $");
+__RCSID("$NetBSD: t_c8rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $");
#include <errno.h>
#include <limits.h>
@@ -59,7 +59,7 @@ require_lc_ctype(const char *locale_name
}
static mbstate_t s;
-static char buf[MB_LEN_MAX + 1];
+static char buf[7*MB_LEN_MAX + 1];
ATF_TC_WITHOUT_HEAD(c8rtomb_c_locale_test);
ATF_TC_BODY(c8rtomb_c_locale_test, tc)
@@ -142,6 +142,102 @@ ATF_TC_BODY(c8rtomb_c_locale_test, tc)
"buf=[%02x %02x]", buf[0], buf[1]);
}
+ATF_TC_WITHOUT_HEAD(c8rtomb_iso2022jp_locale_test);
+ATF_TC_BODY(c8rtomb_iso2022jp_locale_test, tc)
+{
+ char *p;
+ size_t n;
+
+ require_lc_ctype("ja_JP.ISO-2022-JP");
+
+ /*
+ * If the buffer argument is NULL, c8 is implicitly 0,
+ * c8rtomb() resets its internal state.
+ */
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0x80, NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xc0, NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xe0, NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf0, NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf8, NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfc, NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfe, NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xff, NULL)), 1, "n=%zu", n);
+
+ /* Null wide character. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0, &s)), 1, "n=%zu", n);
+ ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
+ (unsigned char)buf[1] == 0xcc),
+ "buf=[%02x %02x]", buf[0], buf[1]);
+
+ /* Latin letter A, internal state. */
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 'A', NULL)), 1, "n=%zu", n);
+
+ /*
+ * 1. U+0042 LATIN CAPITAL LETTER A
+ * 2. U+00A5 YEN SIGN
+ * 3. U+00A5 YEN SIGN (again, no shift needed)
+ * 4. U+30A2 KATAKANA LETTER A
+ * 5. U+30A2 KATAKANA LETTER A (again, no shift needed)
+ * 6. incomplete UTF-8 multibyte sequence -- no output
+ * 7. U+0000 NUL (plus shift sequence to initial state)
+ */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ p = buf;
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 'A', &s)), 1, "n=%zu", n); /* 1 */
+ p += 1;
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 2 */
+ atf_tc_expect_fail("PR lib/58612:"
+ " c8rtomb/c16rtomb/c32rtomb yield suboptimal shift sequences");
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 4, "n=%zu", n);
+ p += 4;
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 3 */
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 1, "n=%zu", n);
+ p += 1;
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 4 */
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 4, "n=%zu", n);
+ p += 5;
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 5 */
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 2, "n=%zu", n);
+ p += 2;
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 6 */
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c8rtomb(p, '\0', &s)), 4, "n=%zu", n); /* 7 */
+ p += 4;
+ ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
+ (unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */
+ (unsigned char)buf[2] == '(' &&
+ (unsigned char)buf[3] == 'J' &&
+ (unsigned char)buf[4] == 0x5c && /* YEN SIGN */
+ (unsigned char)buf[5] == 0x5c && /* YEN SIGN */
+ (unsigned char)buf[6] == 0x1b && /* shift JIS X 0208-1978 */
+ (unsigned char)buf[7] == '$' &&
+ (unsigned char)buf[8] == 'B' &&
+ (unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */
+ (unsigned char)buf[10] == 0x22 &&
+ (unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */
+ (unsigned char)buf[12] == 0x22 &&
+ (unsigned char)buf[13] == 0x1b && /* shift US-ASCII */
+ (unsigned char)buf[14] == '(' &&
+ (unsigned char)buf[15] == 'B' &&
+ (unsigned char)buf[16] == '\0' &&
+ (unsigned char)buf[17] == 0xcc),
+ "buf=[%02x %02x %02x %02x %02x %02x %02x %02x "
+ " %02x %02x %02x %02x %02x %02x %02x %02x "
+ " %02x %02x]",
+ buf[0], buf[1], buf[2], buf[3],
+ buf[4], buf[5], buf[6], buf[7],
+ buf[8], buf[9], buf[10], buf[11],
+ buf[12], buf[13], buf[14], buf[15],
+ buf[16], buf[17]);
+}
+
ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_1_test);
ATF_TC_BODY(c8rtomb_iso_8859_1_test, tc)
{
@@ -252,6 +348,7 @@ ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, c8rtomb_c_locale_test);
+ ATF_TP_ADD_TC(tp, c8rtomb_iso2022jp_locale_test);
ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_1_test);
ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_15_test);
ATF_TP_ADD_TC(tp, c8rtomb_utf_8_test);