Module Name: src
Committed By: riastradh
Date: Thu Aug 15 14:16:34 UTC 2024
Modified Files:
src/distrib/sets/lists/base: shl.mi
src/distrib/sets/lists/comp: mi
src/distrib/sets/lists/debug: mi
src/distrib/sets/lists/tests: mi
src/lib/libc: shlib_version
src/lib/libc/locale: Makefile.inc
src/share/man/man3: Makefile
src/tests/lib/libc/locale: Makefile
Added Files:
src/lib/libc/locale: c16rtomb.3 c16rtomb.c c32rtomb.3 c32rtomb.c
c32rtomb.h mbrtoc16.3 mbrtoc16.c mbrtoc32.3 mbrtoc32.c mbrtoc32.h
src/share/man/man3: uchar.3
src/tests/lib/libc/locale: t_c16rtomb.c t_c32rtomb.c t_mbrtoc16.c
t_mbrtoc32.c
Log Message:
libc: New C11 functions mbrtoc16, mbrtoc32, c16rtomb, c32rtomb.
The mbrtoc16/32 functions read mulitbyte strings according to the
current locale into UTF-16/32 code unit sequences; the c16/32rtomb
functions write UTF-16/32 code unit sequences into multibyte strings
according to the current locale. The `r' means restartable: they
work incrementally and pick up where they left off.
NOTE: This bumps the libc minor version, since it adds new symbols.
PR lib/52374: <uchar.h> missing
To generate a diff of this commit:
cvs rdiff -u -r1.987 -r1.988 src/distrib/sets/lists/base/shl.mi
cvs rdiff -u -r1.2468 -r1.2469 src/distrib/sets/lists/comp/mi
cvs rdiff -u -r1.442 -r1.443 src/distrib/sets/lists/debug/mi
cvs rdiff -u -r1.1330 -r1.1331 src/distrib/sets/lists/tests/mi
cvs rdiff -u -r1.296 -r1.297 src/lib/libc/shlib_version
cvs rdiff -u -r1.65 -r1.66 src/lib/libc/locale/Makefile.inc
cvs rdiff -u -r0 -r1.1 src/lib/libc/locale/c16rtomb.3 \
src/lib/libc/locale/c16rtomb.c src/lib/libc/locale/c32rtomb.3 \
src/lib/libc/locale/c32rtomb.c src/lib/libc/locale/c32rtomb.h \
src/lib/libc/locale/mbrtoc16.3 src/lib/libc/locale/mbrtoc16.c \
src/lib/libc/locale/mbrtoc32.3 src/lib/libc/locale/mbrtoc32.c \
src/lib/libc/locale/mbrtoc32.h
cvs rdiff -u -r1.92 -r1.93 src/share/man/man3/Makefile
cvs rdiff -u -r0 -r1.1 src/share/man/man3/uchar.3
cvs rdiff -u -r1.16 -r1.17 src/tests/lib/libc/locale/Makefile
cvs rdiff -u -r0 -r1.1 src/tests/lib/libc/locale/t_c16rtomb.c \
src/tests/lib/libc/locale/t_c32rtomb.c \
src/tests/lib/libc/locale/t_mbrtoc16.c \
src/tests/lib/libc/locale/t_mbrtoc32.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/distrib/sets/lists/base/shl.mi
diff -u src/distrib/sets/lists/base/shl.mi:1.987 src/distrib/sets/lists/base/shl.mi:1.988
--- src/distrib/sets/lists/base/shl.mi:1.987 Fri Aug 2 17:25:38 2024
+++ src/distrib/sets/lists/base/shl.mi Thu Aug 15 14:16:32 2024
@@ -1,4 +1,4 @@
-# $NetBSD: shl.mi,v 1.987 2024/08/02 17:25:38 christos Exp $
+# $NetBSD: shl.mi,v 1.988 2024/08/15 14:16:32 riastradh Exp $
#
# Note: Don't delete entries from here - mark them as "obsolete" instead,
# unless otherwise stated below.
@@ -22,7 +22,7 @@
./lib/libblocklist.so.0.1 base-sys-shlib dynamicroot
./lib/libc.so base-sys-shlib dynamicroot
./lib/libc.so.12 base-sys-shlib dynamicroot
-./lib/libc.so.12.221 base-sys-shlib dynamicroot
+./lib/libc.so.12.222 base-sys-shlib dynamicroot
./lib/libcrypt.so base-sys-shlib dynamicroot
./lib/libcrypt.so.1 base-sys-shlib dynamicroot
./lib/libcrypt.so.1.0 base-sys-shlib dynamicroot
@@ -257,7 +257,7 @@
./usr/lib/libc++.so.1.0 base-sys-shlib compatfile,libcxx
./usr/lib/libc.so base-sys-shlib compatfile
./usr/lib/libc.so.12 base-sys-shlib compatfile
-./usr/lib/libc.so.12.221 base-sys-shlib compatfile
+./usr/lib/libc.so.12.222 base-sys-shlib compatfile
./usr/lib/libcbor.so base-sys-shlib compatfile
./usr/lib/libcbor.so.0 base-sys-shlib compatfile
./usr/lib/libcbor.so.0.5 base-sys-shlib compatfile
Index: src/distrib/sets/lists/comp/mi
diff -u src/distrib/sets/lists/comp/mi:1.2468 src/distrib/sets/lists/comp/mi:1.2469
--- src/distrib/sets/lists/comp/mi:1.2468 Thu Aug 15 13:14:43 2024
+++ src/distrib/sets/lists/comp/mi Thu Aug 15 14:16:32 2024
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.2468 2024/08/15 13:14:43 riastradh Exp $
+# $NetBSD: mi,v 1.2469 2024/08/15 14:16:32 riastradh Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
./etc/mtree/set.comp comp-sys-root
@@ -6842,6 +6842,8 @@
./usr/share/man/cat3/bufferevent_write_buffer.0 comp-obsolete obsolete
./usr/share/man/cat3/byteorder.0 comp-c-catman .cat
./usr/share/man/cat3/bzero.0 comp-c-catman .cat
+./usr/share/man/cat3/c16rtomb.0 comp-c-catman .cat
+./usr/share/man/cat3/c32rtomb.0 comp-c-catman .cat
./usr/share/man/cat3/cabs.0 comp-c-catman complex,.cat
./usr/share/man/cat3/cabsf.0 comp-c-catman complex,.cat
./usr/share/man/cat3/cabsl.0 comp-c-catman complex,.cat
@@ -9204,6 +9206,8 @@
./usr/share/man/cat3/math.0 comp-c-catman .cat
./usr/share/man/cat3/mblen.0 comp-c-catman .cat
./usr/share/man/cat3/mbrlen.0 comp-c-catman .cat
+./usr/share/man/cat3/mbrtoc16.0 comp-c-catman .cat
+./usr/share/man/cat3/mbrtoc32.0 comp-c-catman .cat
./usr/share/man/cat3/mbrtowc.0 comp-c-catman .cat
./usr/share/man/cat3/mbsinit.0 comp-c-catman .cat
./usr/share/man/cat3/mbsrtowcs.0 comp-c-catman .cat
@@ -11148,6 +11152,7 @@
./usr/share/man/cat3/tzset.0 comp-c-catman .cat
./usr/share/man/cat3/tzsetwall.0 comp-c-catman .cat
./usr/share/man/cat3/ualarm.0 comp-c-catman .cat
+./usr/share/man/cat3/uchar.0 comp-c-catman .cat
./usr/share/man/cat3/uid_from_user.0 comp-c-catman .cat
./usr/share/man/cat3/ukfs.0 comp-c-catman .cat,rump
./usr/share/man/cat3/ulimit.0 comp-c-catman .cat
@@ -15402,6 +15407,8 @@
./usr/share/man/html3/bufferevent_write_buffer.html comp-obsolete obsolete
./usr/share/man/html3/byteorder.html comp-c-htmlman html
./usr/share/man/html3/bzero.html comp-c-htmlman html
+./usr/share/man/html3/c16rtomb.html comp-c-htmlman html
+./usr/share/man/html3/c32rtomb.html comp-c-htmlman html
./usr/share/man/html3/cabs.html comp-c-htmlman complex,html
./usr/share/man/html3/cabsf.html comp-c-htmlman complex,html
./usr/share/man/html3/cabsl.html comp-c-htmlman complex,html
@@ -17696,6 +17703,8 @@
./usr/share/man/html3/math.html comp-c-htmlman html
./usr/share/man/html3/mblen.html comp-c-htmlman html
./usr/share/man/html3/mbrlen.html comp-c-htmlman html
+./usr/share/man/html3/mbrtoc16.html comp-c-htmlman html
+./usr/share/man/html3/mbrtoc32.html comp-c-htmlman html
./usr/share/man/html3/mbrtowc.html comp-c-htmlman html
./usr/share/man/html3/mbsinit.html comp-c-htmlman html
./usr/share/man/html3/mbsrtowcs.html comp-c-htmlman html
@@ -19621,6 +19630,7 @@
./usr/share/man/html3/tzset.html comp-c-htmlman html
./usr/share/man/html3/tzsetwall.html comp-c-htmlman html
./usr/share/man/html3/ualarm.html comp-c-htmlman html
+./usr/share/man/html3/uchar.html comp-c-htmlman html
./usr/share/man/html3/uid_from_user.html comp-c-htmlman html
./usr/share/man/html3/ukfs.html comp-c-htmlman html,rump
./usr/share/man/html3/ulimit.html comp-c-htmlman html
@@ -23821,6 +23831,8 @@
./usr/share/man/man3/bufferevent_write_buffer.3 comp-obsolete obsolete
./usr/share/man/man3/byteorder.3 comp-c-man .man
./usr/share/man/man3/bzero.3 comp-c-man .man
+./usr/share/man/man3/c16rtomb.3 comp-c-man .man
+./usr/share/man/man3/c32rtomb.3 comp-c-man .man
./usr/share/man/man3/cabs.3 comp-c-man complex,.man
./usr/share/man/man3/cabsf.3 comp-c-man complex,.man
./usr/share/man/man3/cabsl.3 comp-c-man complex,.man
@@ -26195,6 +26207,8 @@
./usr/share/man/man3/math.3 comp-c-man .man
./usr/share/man/man3/mblen.3 comp-c-man .man
./usr/share/man/man3/mbrlen.3 comp-c-man .man
+./usr/share/man/man3/mbrtoc16.3 comp-c-man .man
+./usr/share/man/man3/mbrtoc32.3 comp-c-man .man
./usr/share/man/man3/mbrtowc.3 comp-c-man .man
./usr/share/man/man3/mbsinit.3 comp-c-man .man
./usr/share/man/man3/mbsrtowcs.3 comp-c-man .man
@@ -28158,6 +28172,7 @@
./usr/share/man/man3/tzset.3 comp-c-man .man
./usr/share/man/man3/tzsetwall.3 comp-c-man .man
./usr/share/man/man3/ualarm.3 comp-c-man .man
+./usr/share/man/man3/uchar.3 comp-c-man .man
./usr/share/man/man3/uid_from_user.3 comp-c-man .man
./usr/share/man/man3/ukfs.3 comp-c-man .man,rump
./usr/share/man/man3/ulimit.3 comp-c-man .man
Index: src/distrib/sets/lists/debug/mi
diff -u src/distrib/sets/lists/debug/mi:1.442 src/distrib/sets/lists/debug/mi:1.443
--- src/distrib/sets/lists/debug/mi:1.442 Thu Aug 15 13:14:44 2024
+++ src/distrib/sets/lists/debug/mi Thu Aug 15 14:16:32 2024
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.442 2024/08/15 13:14:44 riastradh Exp $
+# $NetBSD: mi,v 1.443 2024/08/15 14:16:32 riastradh Exp $
#
./etc/mtree/set.debug comp-sys-root
./usr/lib comp-sys-usr compatdir
@@ -2058,11 +2058,15 @@
./usr/libdata/debug/usr/tests/lib/libc/inet/t_inet_network.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/kevent_nullmnt/h_nullmnt.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_btowc.debug tests-lib-debug debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_c16rtomb.debug tests-lib-debug debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_c32rtomb.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_ctype1.debug tests-obsolete obsolete,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_ctype2.debug tests-obsolete obsolete,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_digittoint.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_ducet.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_io.debug tests-lib-debug debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc16.debug tests-lib-debug debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc32.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtowc.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbsnrtowcs.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbstowcs.debug tests-lib-debug debug,atf,compattestfile
Index: src/distrib/sets/lists/tests/mi
diff -u src/distrib/sets/lists/tests/mi:1.1330 src/distrib/sets/lists/tests/mi:1.1331
--- src/distrib/sets/lists/tests/mi:1.1330 Thu Aug 15 13:14:44 2024
+++ src/distrib/sets/lists/tests/mi Thu Aug 15 14:16:33 2024
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.1330 2024/08/15 13:14:44 riastradh Exp $
+# $NetBSD: mi,v 1.1331 2024/08/15 14:16:33 riastradh Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
#
@@ -3073,11 +3073,15 @@
./usr/tests/lib/libc/locale/Atffile tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/Kyuafile tests-lib-tests compattestfile,atf,kyua
./usr/tests/lib/libc/locale/t_btowc tests-lib-tests compattestfile,atf
+./usr/tests/lib/libc/locale/t_c16rtomb tests-lib-tests compattestfile,atf
+./usr/tests/lib/libc/locale/t_c32rtomb tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_ctype1 tests-obsolete obsolete
./usr/tests/lib/libc/locale/t_ctype2 tests-obsolete obsolete
./usr/tests/lib/libc/locale/t_digittoint tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_ducet tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_io tests-lib-tests compattestfile,atf
+./usr/tests/lib/libc/locale/t_mbrtoc16 tests-lib-tests compattestfile,atf
+./usr/tests/lib/libc/locale/t_mbrtoc32 tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_mbrtowc tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_mbsnrtowcs tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_mbstowcs tests-lib-tests compattestfile,atf
Index: src/lib/libc/shlib_version
diff -u src/lib/libc/shlib_version:1.296 src/lib/libc/shlib_version:1.297
--- src/lib/libc/shlib_version:1.296 Fri Feb 2 21:52:22 2024
+++ src/lib/libc/shlib_version Thu Aug 15 14:16:33 2024
@@ -1,4 +1,4 @@
-# $NetBSD: shlib_version,v 1.296 2024/02/02 21:52:22 andvar Exp $
+# $NetBSD: shlib_version,v 1.297 2024/08/15 14:16:33 riastradh Exp $
# Remember to update distrib/sets/lists/base/shl.* when changing
#
# things we wish to do on next major version bump:
@@ -55,4 +55,4 @@
# - remove tzsetwall(3), upstream has removed it
# - move *rand48* to libcompat
major=12
-minor=221
+minor=222
Index: src/lib/libc/locale/Makefile.inc
diff -u src/lib/libc/locale/Makefile.inc:1.65 src/lib/libc/locale/Makefile.inc:1.66
--- src/lib/libc/locale/Makefile.inc:1.65 Mon Feb 15 14:35:04 2021
+++ src/lib/libc/locale/Makefile.inc Thu Aug 15 14:16:33 2024
@@ -1,5 +1,5 @@
# from: @(#)Makefile.inc 5.1 (Berkeley) 2/18/91
-# $NetBSD: Makefile.inc,v 1.65 2021/02/15 14:35:04 christos Exp $
+# $NetBSD: Makefile.inc,v 1.66 2024/08/15 14:16:33 riastradh Exp $
# locale sources
.PATH: ${ARCHDIR}/locale ${.CURDIR}/locale
@@ -11,6 +11,13 @@ SRCS+= setlocale.c __mb_cur_max.c \
wcstol.c wcstoll.c wcstoimax.c wcstoul.c wcstoull.c wcstoumax.c \
wcstod.c wcstof.c wcstold.c wcscoll.c wcsxfrm.c wcsftime.c
+SRCS+= c16rtomb.c
+SRCS+= c32rtomb.c
+SRCS+= mbrtoc16.c
+SRCS+= mbrtoc32.c
+CPPFLAGS.c32rtomb.c+= -I${LIBCDIR}/citrus
+CPPFLAGS.mbrtoc32.c+= -I${LIBCDIR}/citrus
+
# citrus multibyte locale support
# we have quirk for libc.a - see the last part of lib/libc/Makefile
CPPFLAGS+= -DWITH_RUNE -I${.CURDIR}
@@ -29,6 +36,11 @@ MAN+= btowc.3 mbrtowc.3 mbsrtowcs.3 \
wctob.3 wcrtomb.3 wcsrtombs.3 \
mbrlen.3 mbsinit.3
+MAN+= c16rtomb.3
+MAN+= c32rtomb.3
+MAN+= mbrtoc16.3
+MAN+= mbrtoc32.3
+
MAN+= iswalnum.3 wctype.3 iswctype.3 \
towlower.3 wctrans.3 towctrans.3 \
wcwidth.3
Index: src/share/man/man3/Makefile
diff -u src/share/man/man3/Makefile:1.92 src/share/man/man3/Makefile:1.93
--- src/share/man/man3/Makefile:1.92 Thu Mar 7 22:14:20 2024
+++ src/share/man/man3/Makefile Thu Aug 15 14:16:34 2024
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.92 2024/03/07 22:14:20 christos Exp $
+# $NetBSD: Makefile,v 1.93 2024/08/15 14:16:34 riastradh Exp $
# @(#)Makefile 8.2 (Berkeley) 12/13/93
MAN= _DIAGASSERT.3 __CONCAT.3 __FPTRCAST.3 __UNCONST.3 __USE.3 CMSG_DATA.3 \
@@ -13,6 +13,7 @@ MAN= _DIAGASSERT.3 __CONCAT.3 __FPTRCAST
makedev.3 offsetof.3 param.3 paths.3 queue.3 rbtree.3 sigevent.3 \
stdarg.3 stdbool.3 stddef.3 stdint.3 stdlib.3 sysexits.3 \
tgmath.3 timeradd.3 timeval.3 tm.3 tree.3 typeof.3 types.3 \
+ uchar.3 \
unistd.3
USETBL= # used by queue.3
Index: src/tests/lib/libc/locale/Makefile
diff -u src/tests/lib/libc/locale/Makefile:1.16 src/tests/lib/libc/locale/Makefile:1.17
--- src/tests/lib/libc/locale/Makefile:1.16 Thu Aug 15 13:14:44 2024
+++ src/tests/lib/libc/locale/Makefile Thu Aug 15 14:16:34 2024
@@ -1,13 +1,17 @@
-# $NetBSD: Makefile,v 1.16 2024/08/15 13:14:44 riastradh Exp $
+# $NetBSD: Makefile,v 1.17 2024/08/15 14:16:34 riastradh Exp $
.include <bsd.own.mk>
TESTSDIR= ${TESTSBASE}/lib/libc/locale
TESTS_C+= t_btowc
+TESTS_C+= t_c16rtomb
+TESTS_C+= t_c32rtomb
TESTS_C+= t_digittoint
TESTS_C+= t_ducet
TESTS_C+= t_io
+TESTS_C+= t_mbrtoc16
+TESTS_C+= t_mbrtoc32
TESTS_C+= t_mbrtowc
TESTS_C+= t_mbsnrtowcs
TESTS_C+= t_mbstowcs
Added files:
Index: src/lib/libc/locale/c16rtomb.3
diff -u /dev/null src/lib/libc/locale/c16rtomb.3:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/c16rtomb.3 Thu Aug 15 14:16:33 2024
@@ -0,0 +1,199 @@
+.\" $NetBSD: c16rtomb.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 14, 2024
+.Dt C16RTOMB 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm c16rtomb
+.Nd Restartable UTF-16 code unit to multibyte conversion
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh LIBRARY
+.Lb libc
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fn c16rtomb "char * restrict s" \
+"char16_t c16" \
+"mbstate_t * restrict ps"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+function attempts to encode Unicode input as a multibyte character
+sequence output at
+.Fa s
+in the current locale, writing anywhere between zero and
+.Dv MB_CUR_MAX
+bytes, inclusive, to
+.Fa s ,
+depending on the inputs and conversion state
+.Fa ps .
+.Pp
+The input
+.Fa c16
+is a UTF-16 code unit, which can be either:
+.Bl -bullet
+.It
+a Unicode scalar value in the Basic Multilingual Plane (BMP), that is,
+a 16-bit code unit outside the interval [0xd800,0xdfff]; or,
+.It
+over the course of two consecutive calls to
+.Nm ,
+the high and low surrogate code points of a Unicode scalar value
+outside the BMP.
+.El
+.Pp
+If a low surrogate code point, that is, a value of
+.Fa c16
+in [0xdc00,0xdfff], is passed to
+.Nm
+without the preceding call to it with the same
+.Fa ps
+having been passed a high surrogate code point, that is, a value of
+.Fa c16
+in [0xd800,0xdbff], or if a high surrogate was passed in the previous
+call and anything other than a low surrogate is passed, then
+.Nm
+will return
+.Li (size_t)-1
+to denote failure with
+.Xr errno 2
+set to
+.Er EILSEQ .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh RETURN VALUES
+The
+.Nm
+function returns the number of bytes written to
+.Fa s
+on success, or sets
+.Xr errno 2
+and returns
+.Li "(size_t)-1"
+on failure.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh EXAMPLES
+Convert a UTF-16 code unit sequence to a multibyte string,
+NUL-terminate it, and print it:
+.Bd -literal -offset indent
+char16_t c16[] = { 0xd83d, 0xdca9 };
+char buf[__arraycount(c16)*MB_CUR_MAX + 1], *s = buf;
+size_t i;
+mbstate_t mbs = {0}; /* initial conversion state */
+
+for (i = 0; i < __arraycount(c16); i++) {
+ size_t len;
+
+ len = c16rtomb(s, c16[i], &mbs);
+ if (len == (size_t)-1)
+ err(1, "c16rtomb");
+ assert(len <= sizeof(buf) - (s - buf));
+ s += len;
+}
+*s = '\e0'; /* NUL-terminate */
+printf("%s\n", buf);
+.Ed
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh ERRORS
+.Bl -tag -width ".Bq Er EILSEQ"
+.It Bq Er EILSEQ
+A surrogate code point was passed as
+.Fa c16
+when it is inappropriate.
+.It Bq Er EILSEQ
+The Unicode scalar value requested cannot be encoded as a multibyte
+sequence in the current locale.
+.It Bq Er EIO
+An error occurred in loading the locale's character conversions.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c32rtomb 3 ,
+.Xr mbrtoc16 3 ,
+.Xr mbrtoc32 3 ,
+.Xr uchar 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.Rs
+.%A P. Hoffman
+.%A F. Yergeau
+.%T UTF-16, an encoding of ISO 10646
+.%R RFC 2781
+.%D February 2000
+.%I Internet Engineering Task Force
+.%U https://datatracker.ietf.org/doc/html/rfc2781
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh STANDARDS
+The
+.Nm
+function conforms to
+.St -isoC-2011 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.Nm
+function first appeared in
+.Nx 11.0 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh BUGS
+It is not clear from the standard how
+.Nm
+is supposed to behave when given a high surrogate code point followed
+by a NUL:
+.Bd -literal -offset indent
+c16rtomb(s, 0xd800, ps);
+c16rtomb(s, L'\e0', ps);
+.Ed
+.Pp
+Currently this fails with
+.Er EILSEQ
+which matches other implementations, but this is at odds with language
+in the standard which suggests that passing
+.Li L'\e0'
+should unconditionally store a null byte and reset
+.Fa ps
+to the initial conversion state:
+.Bd -offset indent
+If
+.Fa c16
+is a null wide character, a null byte is stored, preceded by any shift
+sequence needed to restore the initial shift state; the resulting state
+described is the initial conversion state.
+.Ed
+.Pp
+However, it is unclear what else this should store besides a null
+byte.
+Should it discard the pending high surrogate, or convert it to
+something else and store that?
Index: src/lib/libc/locale/c16rtomb.c
diff -u /dev/null src/lib/libc/locale/c16rtomb.c:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/c16rtomb.c Thu Aug 15 14:16:33 2024
@@ -0,0 +1,181 @@
+/* $NetBSD: c16rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * c16rtomb(s, c16, ps)
+ *
+ * Encode the Unicode UTF-16 code unit c16, which may be surrogate
+ * code point, into the multibyte buffer s under the current
+ * locale, using multibyte encoding state ps.
+ *
+ * If c16 is a high surrogate, no output will be produced, but c16
+ * will be remembered; this must be followed by another call
+ * passing the trailing low surrogate.
+ *
+ * If c16 is a low surrogate, it must have been preceded by a call
+ * with the leading high surrogate; at this point the combined
+ * scalar value will be produced as output.
+ *
+ * Return the number of bytes stored on success, or (size_t)-1 on
+ * error with errno set to EILSEQ.
+ *
+ * At most MB_CUR_MAX bytes will be stored.
+ *
+ * References:
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.9 `Unicode Encoding Forms': UTF-16,
+ * p. 124.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ *
+ * P. Hoffman and F. Yergeau, `UTF-16, an encoding of ISO 10646',
+ * RFC 2781, Internet Engineering Task Force, February 2000,
+ * Sec. 2.2: `Decoding UTF-16'.
+ * https://datatracker.ietf.org/doc/html/rfc2781#section-2.2
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: c16rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $");
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <uchar.h>
+
+#include "c32rtomb.h"
+
+struct c16rtombstate {
+ char16_t surrogate;
+ mbstate_t mbs;
+};
+__CTASSERT(offsetof(struct c16rtombstate, mbs) <= sizeof(mbstate_t));
+__CTASSERT(sizeof(struct c32rtombstate) <= sizeof(mbstate_t) -
+ offsetof(struct c16rtombstate, mbs));
+__CTASSERT(_Alignof(struct c16rtombstate) <= _Alignof(mbstate_t));
+
+size_t
+c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps)
+{
+ static mbstate_t psbuf;
+ char buf[MB_LEN_MAX];
+ struct c16rtombstate *S;
+ char32_t c32;
+
+ /*
+ * `If ps is a null pointer, each function uses its own
+ * internal mbstate_t object instead, which is initialized at
+ * program startup to the initial conversion state; the
+ * functions are not required to avoid data races with other
+ * calls to the same function in this case. The
+ * implementation behaves as if no library function calls
+ * these functions with a null pointer for ps.'
+ */
+ if (ps == NULL)
+ ps = &psbuf;
+
+ /*
+ * `If s is a null pointer, the c16rtomb function is equivalent
+ * to the call
+ *
+ * c16rtomb(buf, L'\0', ps)
+ *
+ * where buf is an internal buffer.
+ */
+ if (s == NULL) {
+ s = buf;
+ c16 = L'\0';
+ }
+
+ /*
+ * Open the private UTF-16 decoding state.
+ */
+ S = (struct c16rtombstate *)ps;
+
+#if 0
+ /*
+ * `If c16 is a null wide character, a null byte is stored,
+ * preceded by any shift sequence needed to restore the
+ * initial shift state; the resulting state described is the
+ * initial conversion state.'
+ *
+ * XXX But what else gets stored? Do we just discard any
+ * pending high surrogate, or do we convert it to something
+ * else, or what?
+ */
+ if (c16 == L'\0') {
+ S->surrogate = 0;
+ }
+#endif
+
+ /*
+ * Check whether:
+ *
+ * 1. We had previously decoded a high surrogate.
+ * => Decode the low surrogate -- reject if it's not a low
+ * surrogate -- and combine them to output a scalar
+ * value; clear the high surrogate for next time.
+ * 2. This is a high surrogate.
+ * => Save it and wait for the low surrogate with no output.
+ * 3. This is a low surrogate.
+ * => Reject.
+ * 4. This is not a surrogate.
+ * => Output a scalar value.
+ */
+ if (S->surrogate != 0) { /* 1. pending surrogate pair */
+ if (c16 < 0xdc00 || c16 > 0xdfff) {
+ errno = EILSEQ;
+ return (size_t)-1;
+ }
+ const char16_t w1 = S->surrogate;
+ const char16_t w2 = c16;
+ c32 = __SHIFTIN(__SHIFTOUT(w1, __BITS(9,0)), __BITS(19,10)) |
+ __SHIFTIN(__SHIFTOUT(w2, __BITS(9,0)), __BITS(9,0));
+ c32 += 0x10000;
+ S->surrogate = 0;
+ } else if (c16 >= 0xd800 && c16 <= 0xdbff) { /* 2. high surrogate */
+ S->surrogate = c16;
+ return 0; /* produced nothing */
+ } else if (c16 >= 0xdc00 && c16 <= 0xdfff) { /* 3. low surrogate */
+ errno = EILSEQ;
+ return (size_t)-1;
+ } else { /* 4. not a surrogate */
+ c32 = c16;
+ }
+
+ /*
+ * We have a scalar value. Output it.
+ */
+ return c32rtomb(s, c32, &S->mbs);
+}
Index: src/lib/libc/locale/c32rtomb.3
diff -u /dev/null src/lib/libc/locale/c32rtomb.3:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/c32rtomb.3 Thu Aug 15 14:16:33 2024
@@ -0,0 +1,141 @@
+.\" $NetBSD: c32rtomb.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 14, 2024
+.Dt C32RTOMB 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm c32rtomb
+.Nd Restartable UTF-32 code unit to multibyte conversion
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh LIBRARY
+.Lb libc
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fn c32rtomb "char * restrict s" \
+"char32_t c32" \
+"mbstate_t * restrict ps"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+function attempts to encode Unicode input as a multibyte character
+sequence output at
+.Fa s
+in the current locale, writing anywhere between zero and
+.Dv MB_CUR_MAX
+bytes, inclusive, to
+.Fa s ,
+depending on the inputs and conversion state
+.Fa ps .
+.Pp
+The input
+.Fa c32
+is a UTF-32 code unit, which represents a single Unicode scalar value,
+i.e., a Unicode code point that is not in the interval [0xd800,0xdfff]
+of surrogate code points.
+.Pp
+If a surrogate code point is passed,
+.Nm
+ will return
+.Li (size_t)-1
+to denote failure with
+.Xr errno 2
+set to
+.Er EILSEQ .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh RETURN VALUES
+The
+.Nm
+function returns the number of bytes written to
+.Fa s
+on success, or sets
+.Xr errno 2
+and returns
+.Li "(size_t)-1"
+on failure.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh EXAMPLES
+Convert a sequence of Unicode scalar values to a multibyte sequence,
+NUL-terminate it, and print it:
+.Bd -literal -offset indent
+char32_t c32[] = { 0x1f4a9, 0x20ac, 0x21 };
+char buf[__arraycountb(c32)*MB_CUR_MAX + 1], *s = buf;
+size_t i;
+mbstate_t mbs = {0}; /* initial conversion state */
+
+for (i = 0; i < __arraycount(c32); i++) {
+ size_t len;
+
+ len = c32rtomb(s, c32[i], &mbs);
+ if (len == (size_t)-1)
+ err(1, "c32rtomb");
+ assert(len <= sizeof(buf) - (s - buf));
+ s += len;
+}
+*s = '\e0'; /* NUL-terminate */
+printf("%s\n", buf);
+.Ed
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh ERRORS
+.Bl -tag -width ".Bq Er EILSEQ"
+.It Bq Er EILSEQ
+A surrogate code point was passed as
+.Fa c32 .
+.It Bq Er EILSEQ
+The Unicode scalar value requested cannot be encoded as a multibyte
+sequence in the current locale.
+.It Bq Er EIO
+An error occurred in loading the locale's character conversions.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c16rtomb 3 ,
+.Xr mbrtoc16 3 ,
+.Xr mbrtoc32 3 ,
+.Xr uchar 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh STANDARDS
+The
+.Nm
+function conforms to
+.St -isoC-2011 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.Nm
+function first appeared in
+.Nx 11.0 .
Index: src/lib/libc/locale/c32rtomb.c
diff -u /dev/null src/lib/libc/locale/c32rtomb.c:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/c32rtomb.c Thu Aug 15 14:16:33 2024
@@ -0,0 +1,162 @@
+/* $NetBSD: c32rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * c32rtomb(s, c32, ps)
+ *
+ * Encode the Unicode UTF-32 code unit c32, which must not be a
+ * surrogate code point, into the multibyte buffer s under the
+ * current locale, using multibyte encoding state ps. A UTF-32
+ * code unit is also a Unicode scalar value, which is any Unicode
+ * code point except a surrogate.
+ *
+ * Return the number of bytes stored on success, or (size_t)-1 on
+ * error with errno set to EILSEQ.
+ *
+ * At most MB_CUR_MAX bytes will be stored.
+ *
+ * References:
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: c32rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $");
+
+#include <sys/types.h> /* broken citrus_*.h */
+#include <sys/queue.h> /* broken citrus_*.h */
+
+#include <assert.h>
+#include <errno.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <paths.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <uchar.h>
+#include <wchar.h>
+
+#include "citrus_types.h" /* broken citrus_iconv.h */
+#include "citrus_module.h" /* broken citrus_iconv.h */
+#include "citrus_hash.h" /* broken citrus_iconv.h */
+#include "citrus_iconv.h"
+
+size_t
+c32rtomb(char *restrict s, char32_t c32, mbstate_t *restrict ps)
+{
+ char buf[MB_LEN_MAX];
+ struct _citrus_iconv *iconv = NULL;
+ char srcbuf[4];
+ const char *src;
+ char *dst;
+ size_t srcleft, dstleft, inval, len;
+ int error, errno_save;
+
+ /*
+ * Save errno in case _citrus_iconv_* clobbers it.
+ */
+ errno_save = errno;
+
+ /*
+ * `If s is a null pointer, the c32rtomb function is equivalent
+ * to the call
+ *
+ * c32rtomb(buf, L'\0', ps)
+ *
+ * where buf is an internal buffer.'
+ */
+ if (s == NULL) {
+ s = buf;
+ c32 = L'\0';
+ }
+
+ /*
+ * Reject surrogates.
+ */
+ if (c32 >= 0xd800 && c32 <= 0xdfff) {
+ errno = EILSEQ;
+ len = (size_t)-1;
+ goto out;
+ }
+
+ /*
+ * Open an iconv handle to convert UTF-32LE to locale-dependent
+ * multibyte output.
+ */
+ if ((error = _citrus_iconv_open(&iconv, _PATH_ICONV, "utf-32le",
+ nl_langinfo(CODESET))) != 0) {
+ errno = EIO; /* XXX? */
+ len = (size_t)-1;
+ goto out;
+ }
+
+ /*
+ * Convert from UTF-32LE in our buffer.
+ */
+ le32enc(srcbuf, c32);
+ src = srcbuf;
+ srcleft = sizeof(srcbuf);
+ dst = s;
+ dstleft = MB_CUR_MAX;
+ error = _citrus_iconv_convert(iconv,
+ &src, &srcleft,
+ &dst, &dstleft,
+ _CITRUS_ICONV_F_HIDE_INVALID, &inval);
+ if (error) { /* can't be incomplete, must be error */
+ errno = error;
+ len = (size_t)-1;
+ goto out;
+ }
+ _DIAGASSERT(srcleft == 0);
+ _DIAGASSERT(dstleft <= MB_CUR_MAX);
+
+ /*
+ * If we didn't produce any output, that means the scalar value
+ * c32 can't be encoded in the current locale, so treat it as
+ * EILSEQ.
+ */
+ len = MB_CUR_MAX - dstleft;
+ if (len == 0) {
+ errno = EILSEQ;
+ len = (size_t)-1;
+ goto out;
+ }
+
+ /*
+ * Make sure we preserve errno on success.
+ */
+ errno = errno_save;
+
+out: errno_save = errno;
+ _citrus_iconv_close(iconv);
+ errno = errno_save;
+ return len;
+}
Index: src/lib/libc/locale/c32rtomb.h
diff -u /dev/null src/lib/libc/locale/c32rtomb.h:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/c32rtomb.h Thu Aug 15 14:16:33 2024
@@ -0,0 +1,36 @@
+/* $NetBSD: c32rtomb.h,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LIB_LIBC_LOCALE_C32RTOMB_H_
+#define LIB_LIBC_LOCALE_C32RTOMB_H_
+
+struct c32rtombstate {
+ char dummy;
+};
+
+#endif /* LIB_LIBC_LOCALE_C32RTOMB_H_ */
Index: src/lib/libc/locale/mbrtoc16.3
diff -u /dev/null src/lib/libc/locale/mbrtoc16.3:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/mbrtoc16.3 Thu Aug 15 14:16:33 2024
@@ -0,0 +1,304 @@
+.\" $NetBSD: mbrtoc16.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 14, 2024
+.Dt MBRTOC16 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm mbrtoc16
+.Nd Restartable multibyte to UTF-16 code unit conversion
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh LIBRARY
+.Lb libc
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fn mbrtoc16 "char16_t * restrict pc16" \
+"const char * restrict s" \
+"size_t n" \
+"mbstate_t * restrict ps"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+function attempts to decode a multibyte character sequence at
+.Fa s
+of up to
+.Fa n
+bytes in the current locale, and yield the content as UTF-16 code
+units via the output parameter
+.Fa pc16 .
+.Fa pc16
+may be null, in which case no output is stored.
+.Bl -bullet
+.It
+If the multibyte sequence at
+.Fa s
+is invalid or an error occurs in decoding,
+.Nm
+returns
+.Li (size_t)-1
+and sets
+.Xr errno 2
+to indicate the error.
+.It
+If the multibyte sequence at
+.Fa s
+is still incomplete after
+.Fa n
+bytes, including any previously processed input saved in
+.Fa ps ,
+.Nm
+saves its state in
+.Fa ps
+after all the input so far and returns
+.Li "(size_t)-2".
+.It
+If
+.Nm
+finds the null scalar value at
+.Fa s ,
+then it stores zero at
+.Li * Ns Fa pc16
+and returns zero.
+.It
+If
+.Nm
+finds a nonnull scalar value in the Basic Multilingual Plane, i.e., a
+16-bit scalar value, then it stores the scalar value at
+.Li * Ns Fa pc16 ,
+and returns the number of bytes it read from the input.
+.It
+If
+.Nm
+finds a scalar value outside the Basic Multilingual Plane (BMP), then
+it:
+.Bl -dash -compact
+.It
+stores the scalar value's high surrogate code point at
+.Li * Ns Fa pc16 ;
+.It
+stores conversion state in
+.Fa ps
+to remember the rest of the pending scalar value; and
+.It
+returns the number of bytes it read from the input.
+.El
+.It
+If
+.Nm
+had previously found a scalar value outside the BMP, then, instead of
+any of the above options, it:
+.Bl -dash -compact
+.It
+stores the scalar value's low surrogate code point at
+.Li * Ns Fa pc16 ;
+.It
+consumes rest of the pending scalar value from the conversion state
+.Fa ps ;
+and
+.It
+returns
+.Li (size_t)-3
+to indicate that no bytes were consumed but a code unit was yielded
+nevertheless.
+.El
+.El
+.Pp
+If
+.Fa s
+is a null pointer, the
+.Nm
+call is equivalent to:
+.Bd -ragged -offset indent
+.Fo mbrtoc16
+.Li NULL ,
+.Li \*q\*q ,
+.Li 1 ,
+.Fa ps
+.Fc
+.Ed
+.Pp
+This always returns zero, and has the effect of resetting
+.Fa ps
+to the initial conversion state, without writing to
+.Fa pc16 ,
+even if it is nonnull.
+.Pp
+If
+.Fa ps
+is a null pointer,
+.Nm
+uses an internal
+.Vt mbstate_t
+object with static storage duration, distinct from all other
+.Vt mbstate_t
+objects (including those used by
+.Xr mbrtoc32 3 ,
+.Xr c16rtomb 3 ,
+and
+.Xr c32rtomb 3 ) ,
+which is initialized at program startup to the initial conversion
+state.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh RETURN VALUES
+The
+.Nm
+function returns:
+.Bl -tag -width ".Li (size_t)-3" -offset indent
+.It Li 0
+[null]
+if within the next
+.Fa n
+bytes at
+.Fa s
+the first multibyte character is null.
+.It Fa i
+[code unit]
+where
+.Li 0
+\*(Le
+.Fa i
+\*(Le
+.Fa n ,
+if either
+.Fa ps
+is in the initial conversion state or the previous call to
+.Nm
+with
+.Fa ps
+had not yielded a surrogate code point, and within the first
+.Fa i
+bytes at
+.Fa s
+a Unicode scalar value was decoded.
+.It Li (size_t)-3
+[continuation]
+if the previous call to
+.Nm
+with
+.Fa ps
+had yielded a high surrogate code point for a Unicode scalar value
+outside the Basic Multilingual Plane; no additional input is consumed
+in this case.
+.It Li (size_t)-2
+[incomplete]
+if either
+.Fa ps
+is in the initial conversion state or the previous call to
+.Nm
+with
+.Fa ps
+had not yielded a surrogate code point, and within the first
+.Fa n
+bytes at
+.Fa s ,
+including any previously buffered input, no complete Unicode scalar
+value could be decoded.
+.It Li (size_t)-1
+[error]
+if any encoding error was detected;
+.Xr errno 2
+is set to reflect the error.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh EXAMPLES
+Print the UTF-16 code units of a multibyte string in hexadecimal text:
+.Bd -literal -offset indent
+char *s = ...;
+size_t n = ...;
+mbstate_t mbs = {0}; /* initial conversion state */
+
+while (n) {
+ char16_t c16;
+ size_t len;
+
+ len = mbrtoc16(&c16, s, n, &mbs);
+ switch (len) {
+ case 0: /* null terminator */
+ assert(c16 == L'\e0');
+ goto out;
+ default: /* scalar value or high surrogate */
+ printf("U+%04"PRIx16"\n", (uint16_t)c16);
+ break;
+ case (size_t)-3: /* low surrogate */
+ printf("continue U+%04"PRIx16"\n", (uint16_t)c16);
+ break;
+ case (size_t)-2: /* incomplete */
+ printf("incomplete\en");
+ goto readmore;
+ case (size_t)-1: /* error */
+ printf("error: %d\n", errno);
+ goto out;
+ }
+ s += len;
+ n -= len;
+}
+.Ed
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh ERRORS
+.Bl -tag -width ".Bq Er EILSEQ"
+.It Bq Er EILSEQ
+The multibyte sequence cannot be decoded as a Unicode scalar value.
+.It Bq Er EIO
+An error occurred in loading the locale's character conversions.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c16rtomb 3 ,
+.Xr c32rtomb 3 ,
+.Xr mbrtoc32 3 ,
+.Xr uchar 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.Rs
+.%A P. Hoffman
+.%A F. Yergeau
+.%T UTF-16, an encoding of ISO 10646
+.%R RFC 2781
+.%D February 2000
+.%I Internet Engineering Task Force
+.%U https://datatracker.ietf.org/doc/html/rfc2781
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh STANDARDS
+The
+.Nm
+function conforms to
+.St -isoC-2011 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.Nm
+function first appeared in
+.Nx 11.0 .
Index: src/lib/libc/locale/mbrtoc16.c
diff -u /dev/null src/lib/libc/locale/mbrtoc16.c:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/mbrtoc16.c Thu Aug 15 14:16:33 2024
@@ -0,0 +1,192 @@
+/* $NetBSD: mbrtoc16.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * mbrtoc16(&c16, s, n, ps)
+ *
+ * Decode a Unicode scalar value from up to n bytes out of the
+ * multibyte string s, using multibyte encoding state ps, and
+ * store the next code unit in the UTF-16 representation of that
+ * scalar value at c16.
+ *
+ * If the next scalar value in s is outside the Basic Multilingual
+ * Plane, mbrtoc16 will yield the high surrogate code point in one
+ * call that consumes input, and will yield the low surrogate code
+ * point in the next call without consuming any input and
+ * returning (size_t)-3 instead.
+ *
+ * Return the number of bytes consumed on success, or:
+ *
+ * - 0 if the code unit is NUL, or
+ * - (size_t)-3 if the trailing low surrogate of a surrogate pair
+ * was returned without consuming any additional input, or
+ * - (size_t)-2 if the input is incomplete, or
+ * - (size_t)-1 on error with errno set to EILSEQ.
+ *
+ * In the case of incomplete input, the decoding state so far
+ * after processing s[0], s[1], ..., s[n - 1] is saved in ps, so
+ * subsequent calls to mbrtoc16 will pick up n bytes later into
+ * the input stream.
+ *
+ * References:
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.9 `Unicode Encoding Forms': UTF-16,
+ * p. 124.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ *
+ * P. Hoffman and F. Yergeau, `UTF-16, an encoding of ISO 10646',
+ * RFC 2781, Internet Engineering Task Force, February 2000,
+ * Sec. 2.1: `Encoding UTF-16'.
+ * https://datatracker.ietf.org/doc/html/rfc2781#section-2.1
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: mbrtoc16.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $");
+
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <uchar.h>
+
+#include "mbrtoc32.h"
+
+struct mbrtoc16state {
+ char16_t surrogate;
+ mbstate_t mbs;
+};
+__CTASSERT(offsetof(struct mbrtoc16state, mbs) <= sizeof(mbstate_t));
+__CTASSERT(sizeof(struct mbrtoc32state) <= sizeof(mbstate_t) -
+ offsetof(struct mbrtoc16state, mbs));
+__CTASSERT(_Alignof(struct mbrtoc16state) <= _Alignof(mbstate_t));
+
+size_t
+mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n,
+ mbstate_t *restrict ps)
+{
+ static mbstate_t psbuf;
+ struct mbrtoc16state *S;
+ char32_t c32;
+ size_t len;
+
+ /*
+ * `If ps is a null pointer, each function uses its own
+ * internal mbstate_t object instead, which is initialized at
+ * program startup to the initial conversion state; the
+ * functions are not required to avoid data races with other
+ * calls to the same function in this case. The
+ * implementation behaves as if no library function calls
+ * these functions with a null pointer for ps.'
+ */
+ if (ps == NULL)
+ ps = &psbuf;
+
+ /*
+ * `If s is a null pointer, the mbrtoc16 function is equivalent
+ * to the call:
+ *
+ * mbrtoc16(NULL, "", 1, ps)
+ *
+ * In this case, the values of the parameters pc16 and n are
+ * ignored.'
+ */
+ if (s == NULL) {
+ pc16 = NULL;
+ s = "";
+ n = 1;
+ }
+
+ /*
+ * Get the private conversion state.
+ */
+ S = (struct mbrtoc16state *)ps;
+
+ /*
+ * If there is a pending surrogate, stash it and consume no
+ * bytes of the input, returning (size_t)-3 to indicate that no
+ * bytes of input were consumed.
+ */
+ if (S->surrogate >= 0xdc00 && S->surrogate <= 0xdfff) {
+ if (pc16)
+ *pc16 = S->surrogate;
+ S->surrogate = 0;
+ return (size_t)-3;
+ }
+
+ /*
+ * Consume the next scalar value. If no full scalar value can
+ * be obtained, stop here.
+ */
+ len = mbrtoc32(&c32, s, n, &S->mbs);
+ switch (len) {
+ case 0: /* NUL */
+ if (pc16)
+ *pc16 = 0;
+ return 0;
+ case (size_t)-2: /* still incomplete after n bytes */
+ case (size_t)-1: /* error */
+ return len;
+ default: /* consumed len bytes of input */
+ break;
+ }
+
+ /*
+ * We consumed a scalar value from the input.
+ *
+ * If it's inside the Basic Multilingual Plane (16-bit scalar
+ * values), return it.
+ *
+ * If it's outside the Basic Multilingual Plane, split it into
+ * high and low surrogate code points, return the high, and
+ * save the low.
+ */
+ if (c32 <= 0xffff) {
+ if (pc16)
+ *pc16 = c32;
+ _DIAGASSERT(S->surrogate == 0);
+ } else {
+ c32 -= 0x10000;
+ const char16_t w1 = 0xd800 | __SHIFTOUT(c32, __BITS(19,10));
+ const char16_t w2 = 0xdc00 | __SHIFTOUT(c32, __BITS(9,0));
+ if (pc16)
+ *pc16 = w1;
+ S->surrogate = w2;
+ _DIAGASSERT(S->surrogate != 0);
+ }
+
+ /*
+ * Return the number of bytes consumed from the input.
+ */
+ return len;
+}
Index: src/lib/libc/locale/mbrtoc32.3
diff -u /dev/null src/lib/libc/locale/mbrtoc32.3:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/mbrtoc32.3 Thu Aug 15 14:16:33 2024
@@ -0,0 +1,236 @@
+.\" $NetBSD: mbrtoc32.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 14, 2024
+.Dt MBRTOC32 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm mbrtoc32
+.Nd Restartable multibyte to UTF-32 code unit conversion
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh LIBRARY
+.Lb libc
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fn mbrtoc32 "char32_t * restrict pc32" \
+"const char * restrict s" \
+"size_t n" \
+"mbstate_t * restrict ps"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+function attempts to decode a multibyte character sequence at
+.Fa s
+of up to
+.Fa n
+bytes in the current locale, and yield the content as UTF-32 code
+units, i.e., Unicode scalar values, via the output parameter
+.Fa pc32 .
+.Fa pc32
+may be null, in which case no output is stored.
+.Bl -bullet
+.It
+If the multibyte sequence at
+.Fa s
+is invalid or an error occurs in decoding,
+.Nm
+returns
+.Li (size_t)-1
+and sets
+.Xr errno 2
+to indicate the error.
+.It
+If the multibyte sequence at
+.Fa s
+is still incomplete after
+.Fa n
+bytes, including any previously processed input saved in
+.Fa ps ,
+.Nm
+saves its state in
+.Fa ps
+after all the input so far and returns
+.Li "(size_t)-2".
+.It
+If
+.Nm
+finds the null scalar value at
+.Fa s ,
+then it stores zero at
+.Li * Ns Fa pc32
+and returns zero.
+.It
+If
+.Nm
+finds a nonnull scalar value, then it stores the scalar value at
+.Li * Ns Fa pc32 ,
+and returns the number of bytes it read from the input.
+.El
+.Pp
+If
+.Fa s
+is a null pointer, the
+.Nm
+call is equivalent to:
+.Bd -ragged -offset indent
+.Fo mbrtoc32
+.Li NULL ,
+.Li \*q\*q ,
+.Li 1 ,
+.Fa ps
+.Fc
+.Ed
+.Pp
+This always returns zero, and has the effect of resetting
+.Fa ps
+to the initial conversion state, without writing to
+.Fa pc32 ,
+even if it is nonnull.
+.Pp
+If
+.Fa ps
+is a null pointer,
+.Nm
+uses an internal
+.Vt mbstate_t
+object with static storage duration, distinct from all other
+.Vt mbstate_t
+objects (including those used by
+.Xr mbrtoc16 3 ,
+.Xr c16rtomb 3 ,
+and
+.Xr c32rtomb 3 ) ,
+which is initialized at program startup to the initial conversion
+state.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh RETURN VALUES
+The
+.Nm
+function returns:
+.Bl -tag -width ".Li (size_t)-3" -offset indent
+.It Li 0
+[null]
+if within the next
+.Fa n
+bytes at
+.Fa s
+the first multibyte character is null.
+.It Fa i
+[scalar value]
+where
+.Li 0
+\*(Le
+.Fa i
+\*(Le
+.Fa n ,
+if within the first
+.Fa i
+bytes at
+.Fa s
+a Unicode scalar value was decoded.
+.It Li (size_t)-2
+[incomplete]
+if within the first
+.Fa n
+bytes at
+.Fa s
+no complete Unicode scalar value could be decoded.
+.It Li (size_t)-1
+[error]
+if any encoding error was detected;
+.Xr errno 2
+is set to reflect the error.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh EXAMPLES
+.Bd -literal
+char *s = ...;
+size_t n = ...;
+mbstate_t mbs = {0}; /* initial conversion state */
+
+while (n) {
+ char32_t c32;
+ size_t len;
+
+ len = mbrtoc32(&c32, s, n, &mbs);
+ switch (len) {
+ case 0: /* null terminator */
+ assert(c32 == L'\e0');
+ goto out;
+ default: /* scalar value */
+ printf("U+%04"PRIx32"\n", (uint32_t)c32);
+ break;
+ case (size_t)-2: /* incomplete */
+ printf("incomplete\en");
+ goto readmore;
+ case (size_t)-1: /* error */
+ printf("error: %d\n", errno);
+ goto out;
+ }
+ s += len;
+ n -= len;
+}
+.Ed
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh ERRORS
+.Bl -tag -width ".Bq Er EILSEQ"
+.It Bq Er EILSEQ
+A surrogate code point was passed.
+.It Bq Er EILSEQ
+The Unicode scalar value requested cannot be encoded as a multibyte
+sequence in the current locale.
+.It Bq Er EIO
+An error occurred in loading the locale's character conversions.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c16rtomb 3 ,
+.Xr c32rtomb 3 ,
+.Xr mbrtoc16 3 ,
+.Xr uchar 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh STANDARDS
+The
+.Nm
+function conforms to
+.St -isoC-2011 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.Nm
+function first appeared in
+.Nx 11.0 .
Index: src/lib/libc/locale/mbrtoc32.c
diff -u /dev/null src/lib/libc/locale/mbrtoc32.c:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/mbrtoc32.c Thu Aug 15 14:16:33 2024
@@ -0,0 +1,237 @@
+/* $NetBSD: mbrtoc32.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * mbrtoc32(&c32, s, n, ps)
+ *
+ * Decode a Unicode UTF-32 code unit from up to n bytes out of the
+ * multibyte string s, and store it at c32, using multibyte
+ * encoding state ps. A UTF-32 code unit is also a Unicode scalar
+ * value, which is any Unicode code point except a surrogate.
+ *
+ * Return the number of bytes consumed on success, or 0 if the
+ * code unit is NUL, or (size_t)-2 if the input is incomplete, or
+ * (size_t)-1 on error with errno set to EILSEQ.
+ *
+ * In the case of incomplete input, the decoding state so far
+ * after processing s[0], s[1], ..., s[n - 1] is saved in ps, so
+ * subsequent calls to mbrtoc32 will pick up n bytes later into
+ * the input stream.
+ *
+ * References:
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: mbrtoc32.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $");
+
+#include <sys/param.h> /* MIN */
+#include <sys/types.h> /* broken citrus_*.h */
+#include <sys/queue.h> /* broken citrus_*.h */
+
+#include <assert.h>
+#include <errno.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <paths.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <uchar.h>
+#include <wchar.h>
+
+#include "citrus_types.h" /* broken citrus_iconv.h */
+#include "citrus_module.h" /* broken citrus_iconv.h */
+#include "citrus_hash.h" /* broken citrus_iconv.h */
+#include "citrus_iconv.h"
+
+#include "mbrtoc32.h"
+
+__CTASSERT(sizeof(struct mbrtoc32state) <= sizeof(mbstate_t));
+__CTASSERT(_Alignof(struct mbrtoc32state) <= _Alignof(mbstate_t));
+
+size_t
+mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n,
+ mbstate_t *restrict ps)
+{
+ static mbstate_t psbuf;
+ struct mbrtoc32state *S;
+ struct _citrus_iconv *iconv = NULL;
+ size_t len;
+ char32_t c32;
+ int error, errno_save;
+
+ /*
+ * Save errno in case _citrus_iconv_* clobbers it.
+ */
+ errno_save = errno;
+
+ /*
+ * `If ps is a null pointer, each function uses its own
+ * internal mbstate_t object instead, which is initialized at
+ * program startup to the initial conversion state; the
+ * functions are not required to avoid data races with other
+ * calls to the same function in this case. The
+ * implementation behaves as if no library function calls
+ * these functions with a null pointer for ps.'
+ */
+ if (ps == NULL)
+ ps = &psbuf;
+
+ /*
+ * `If s is a null pointer, the mbrtoc32 function is equivalent
+ * to the call:
+ *
+ * mbrtoc32(NULL, "", 1, ps)
+ *
+ * In this case, the values of the parameters pc32 and n are
+ * ignored.'
+ */
+ if (s == NULL) {
+ pc32 = NULL;
+ s = "";
+ n = 1;
+ }
+
+ /*
+ * Get the private conversion state.
+ */
+ S = (struct mbrtoc32state *)ps;
+
+ /*
+ * If input length is zero, the result is always incomplete by
+ * definition. Don't bother with iconv -- we'd have to
+ * disentangle truncated outputs.
+ */
+ if (n == 0) {
+ len = (size_t)-2;
+ goto out;
+ }
+
+ /*
+ * Reset the destination buffer if this is the initial state.
+ */
+ if (S->dstleft == 0)
+ S->dstleft = sizeof(S->dstbuf);
+
+ /*
+ * Open an iconv handle to convert locale-dependent multibyte
+ * input to UTF-32LE.
+ */
+ if ((error = _citrus_iconv_open(&iconv, _PATH_ICONV,
+ nl_langinfo(CODESET), "utf-32le")) != 0) {
+ errno = EIO; /* XXX? */
+ len = (size_t)-1;
+ goto out;
+ }
+
+ /*
+ * Try to iconv a minimal prefix. If we succeed, set len to
+ * the length consumed and goto ok.
+ */
+ for (len = 0; len < MIN(n, sizeof(S->srcbuf) - S->nsrc);) {
+ const char *src = S->srcbuf;
+ size_t srcleft;
+ char *dst = S->dstbuf + sizeof(S->dstbuf) - S->dstleft;
+ size_t inval;
+
+ S->srcbuf[S->nsrc++] = s[len++];
+ srcleft = S->nsrc;
+
+ error = _citrus_iconv_convert(iconv,
+ &src, &srcleft,
+ &dst, &S->dstleft,
+ _CITRUS_ICONV_F_HIDE_INVALID, &inval);
+ if (error != EINVAL) {
+ if (error == 0)
+ goto ok;
+ errno = error;
+ len = (size_t)-1;
+ goto out;
+ }
+ }
+
+ /*
+ * Incomplete. Return (size_t)-2 and let the caller try again.
+ * We have consumed all n bytes at this point without finding a
+ * complete code point.
+ */
+ len = (size_t)-2;
+ goto out;
+
+ok: /*
+ * Successfully converted a minimal byte sequence, which should
+ * produce exactly one UTF-32 code unit, encoded in
+ * little-endian, representing a code point. Get the code
+ * point.
+ */
+ c32 = le32dec(S->dstbuf);
+
+ /*
+ * Reject surrogate code points. We only deal in scalar
+ * values.
+ *
+ * XXX Is this necessary? Won't iconv take care of it for us?
+ */
+ if (c32 >= 0xd800 && c32 <= 0xdfff) {
+ errno = EILSEQ;
+ len = (size_t)-1;
+ goto out;
+ }
+
+ /*
+ * Non-surrogate code point -- scalar value. Yield it.
+ */
+ if (pc32)
+ *pc32 = c32;
+
+ /*
+ * If we got the null scalar value, return zero length, as the
+ * contract requires.
+ */
+ if (c32 == 0)
+ len = 0;
+
+ /*
+ * Make sure we preserve errno on success.
+ */
+ errno = errno_save;
+
+out: if (len != (size_t)-2) {
+ S->nsrc = 0;
+ memset(S, 0, sizeof(*S)); /* paranoia */
+ }
+ errno_save = errno;
+ _citrus_iconv_close(iconv);
+ errno = errno_save;
+ return len;
+}
Index: src/lib/libc/locale/mbrtoc32.h
diff -u /dev/null src/lib/libc/locale/mbrtoc32.h:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/mbrtoc32.h Thu Aug 15 14:16:33 2024
@@ -0,0 +1,42 @@
+/* $NetBSD: mbrtoc32.h,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LIB_LIBC_LOCALE_MBRTOC32_H_
+#define LIB_LIBC_LOCALE_MBRTOC32_H_
+
+#include <limits.h>
+#include <uchar.h>
+
+struct mbrtoc32state {
+ char srcbuf[MB_LEN_MAX];
+ size_t nsrc;
+ char dstbuf[4];
+ size_t dstleft;
+};
+
+#endif /* LIB_LIBC_LOCALE_MBRTOC32_H_ */
Index: src/share/man/man3/uchar.3
diff -u /dev/null src/share/man/man3/uchar.3:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/share/man/man3/uchar.3 Thu Aug 15 14:16:34 2024
@@ -0,0 +1,131 @@
+.\" $NetBSD: uchar.3,v 1.1 2024/08/15 14:16:34 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 14, 2024
+.Dt UCHAR 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm uchar
+.Nd Unicode utilities
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.In uchar.h
+header file declares types and functions for manipulating Unicode code
+units.
+.\""""""""""""""""""""""""""""""""""""""
+.Ss Types
+.Bl -tag -width ".Vt char32_t"
+.It Vt char16_t
+Unsigned integer type for UTF-16 code units.
+.Pp
+Same type as
+.Vt uint_least16_t
+from
+.In stdint.h .
+May represent both surrogate code points, i.e., code points in the
+interval [0xd800,0xdfff], and Unicode scalar values in the Basic
+Multilingual Plane, which are the 16-bit code points other than
+surrogate code points.
+.It Vt char32_t
+Unsigned integer type for UTF-32 code units.
+.Pp
+Same type as
+.Vt uint_least32_t
+from
+.In stdint.h .
+Can represent all Unicode scalar values, not just those in the Basic
+Multilingual Plane.
+Intended to represent only Unicode scalar values, not surrogate code
+points.
+.It Vt mbstate_t
+Opaque multibyte conversion state.
+.Pp
+Same type as in
+.Vt stddef.h
+and
+.Vt wchar.h .
+.It Vt size_t
+Unsigned integer type to represent array sizes.
+.Pp
+Same type as in
+.Vt stddef.h ,
+.Vt stdint.h ,
+and
+.Vt sys/types.h .
+.El
+.\""""""""""""""""""""""""""""""""""""""
+.Ss Functions
+The
+.In uchar.h
+header file declares the functions
+.Xr mbrtoc16 3 ,
+.Xr c16rtomb 3 ,
+.Xr mbrtoc32 3 ,
+and
+.Xr c32rtomb 3
+for conversion between multibyte sequences and UTF-16/UTF-32 code
+units.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c16rtomb 3 ,
+.Xr c32rtomb 3 ,
+.Xr mbrtoc16 3 ,
+.Xr mbrtoc32 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.Rs
+.%A P. Hoffman
+.%A F. Yergeau
+.%T UTF-16, an encoding of ISO 10646
+.%R RFC 2781
+.%D February 2000
+.%I Internet Engineering Task Force
+.%U https://datatracker.ietf.org/doc/html/rfc2781
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh STANDARDS
+The
+.In uchar.h
+header file conforms to
+.St -isoC-2011
+and
+.St -p1003.1-2024 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.In uchar.h
+header file first appeared in
+.Nx 11.0 .
Index: src/tests/lib/libc/locale/t_c16rtomb.c
diff -u /dev/null src/tests/lib/libc/locale/t_c16rtomb.c:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/tests/lib/libc/locale/t_c16rtomb.c Thu Aug 15 14:16:34 2024
@@ -0,0 +1,187 @@
+/* $NetBSD: t_c16rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2013 Ed Schouten <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Test program for c16rtomb() as specified by ISO/IEC 9899:2011.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: t_c16rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $");
+
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <uchar.h>
+
+#include <atf-c.h>
+
+static void
+require_lc_ctype(const char *locale_name)
+{
+ char *lc_ctype_set;
+
+ lc_ctype_set = setlocale(LC_CTYPE, locale_name);
+ if (lc_ctype_set == NULL)
+ atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
+ locale_name, errno);
+
+ ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
+ "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
+}
+
+static mbstate_t s;
+static char buf[MB_LEN_MAX + 1];
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_c_locale_test);
+ATF_TC_BODY(c16rtomb_c_locale_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("C");
+
+ /*
+ * If the buffer argument is NULL, c16 is implicitly 0,
+ * c16rtomb() resets its internal state.
+ */
+ ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n);
+
+ /* Null wide character. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n);
+ ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
+ (unsigned char)buf[1] == 0xcc),
+ "buf=[%02x %02x]", buf[0], buf[1]);
+
+ /* Latin letter A, internal state. */
+ ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n);
+
+ /* Latin letter A. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), 1, "n=%zu", n);
+ ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
+ (unsigned char)buf[1] == 0xcc),
+ "buf=[%02x %02x]", buf[0], buf[1]);
+
+ /* Unicode character 'Pile of poo'. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+ ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+}
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_1_test);
+ATF_TC_BODY(c16rtomb_iso_8859_1_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.ISO8859-1");
+
+ /* Unicode character 'Euro sign'. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), (size_t)-1,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+ ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+}
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_15_test);
+ATF_TC_BODY(c16rtomb_iso_8859_15_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.ISO8859-15");
+
+ /* Unicode character 'Euro sign'. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), 1, "n=%zu", n);
+ ATF_CHECK_MSG(((unsigned char)buf[0] == 0xa4 &&
+ (unsigned char)buf[1] == 0xcc),
+ "buf=[%02x %02x]", buf[0], buf[1]);
+}
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_utf_8_test);
+ATF_TC_BODY(c16rtomb_utf_8_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.UTF-8");
+
+ /* Unicode character 'Pile of poo'. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), 4, "n=%zu", n);
+ ATF_CHECK_MSG(((unsigned char)buf[0] == 0xf0 &&
+ (unsigned char)buf[1] == 0x9f &&
+ (unsigned char)buf[2] == 0x92 &&
+ (unsigned char)buf[3] == 0xa9 &&
+ (unsigned char)buf[4] == 0xcc),
+ "buf=[%02x %02x %02x %02x %02x]",
+ buf[0], buf[1], buf[2], buf[3], buf[4]);
+
+ /* Invalid code; 'Pile of poo' without the trail surrogate. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), (size_t)-1,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+ ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+
+ /* Invalid code; 'Pile of poo' without the lead surrogate. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+ ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+ ATF_TP_ADD_TC(tp, c16rtomb_c_locale_test);
+ ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_1_test);
+ ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_15_test);
+ ATF_TP_ADD_TC(tp, c16rtomb_utf_8_test);
+
+ return (atf_no_error());
+}
Index: src/tests/lib/libc/locale/t_c32rtomb.c
diff -u /dev/null src/tests/lib/libc/locale/t_c32rtomb.c:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/tests/lib/libc/locale/t_c32rtomb.c Thu Aug 15 14:16:34 2024
@@ -0,0 +1,60 @@
+/* $NetBSD: t_c32rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: t_c32rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $");
+
+#include <atf-c.h>
+#include <locale.h>
+#include <uchar.h>
+
+#include "h_macros.h"
+
+ATF_TC(c32rtomb_null);
+ATF_TC_HEAD(c32rtomb_null, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test null string output to c32rtomb");
+}
+ATF_TC_BODY(c32rtomb_null, tc)
+{
+ char *locale;
+ mbstate_t ps = {0};
+ size_t n;
+
+ REQUIRE_LIBC((locale = setlocale(LC_ALL, "C")), NULL);
+ ATF_REQUIRE_EQ_MSG(strcmp(locale, "C"), 0, "locale=%s", locale);
+
+ ATF_CHECK_EQ_MSG((n = c32rtomb(NULL, L'x', &ps)), 1, "n=%zu", n);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+ ATF_TP_ADD_TC(tp, c32rtomb_null);
+ return atf_no_error();
+}
Index: src/tests/lib/libc/locale/t_mbrtoc16.c
diff -u /dev/null src/tests/lib/libc/locale/t_mbrtoc16.c:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/tests/lib/libc/locale/t_mbrtoc16.c Thu Aug 15 14:16:34 2024
@@ -0,0 +1,241 @@
+/* $NetBSD: t_mbrtoc16.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2013 Ed Schouten <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: t_mbrtoc16.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $");
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <locale.h>
+#include <string.h>
+#include <uchar.h>
+
+#include <atf-c.h>
+
+static void
+require_lc_ctype(const char *locale_name)
+{
+ char *lc_ctype_set;
+
+ lc_ctype_set = setlocale(LC_CTYPE, locale_name);
+ if (lc_ctype_set == NULL)
+ atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
+ locale_name, errno);
+
+ ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
+ "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
+}
+
+static mbstate_t s;
+static char16_t c16;
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test);
+ATF_TC_BODY(mbrtoc16_c_locale_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("C");
+
+ /* Null wide character, internal state. */
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
+
+ /* Null wide character. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
+
+ /* Latin letter A, internal state. */
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'A');
+
+ /* Latin letter A. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'A');
+
+ /* Incomplete character sequence. */
+ c16 = L'z';
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'z');
+
+ /* Check that mbrtoc16() doesn't access the buffer when n == 0. */
+ c16 = L'z';
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'z');
+
+ /* Check that mbrtoc16() doesn't read ahead too aggressively. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'A');
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%"PRIx16" L'C'=U+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'C');
+
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test);
+ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.ISO8859-1");
+
+ /* Currency sign. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xa4, "c16=U+%"PRIx16, (uint16_t)c16);
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test);
+ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.ISO8859-15");
+
+ /* Euro sign. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0x20ac, "c16=U+%"PRIx16, (uint16_t)c16);
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test);
+ATF_TC_BODY(mbrtoc16_utf_8_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.UTF-8");
+
+ /* Null wide character, internal state. */
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
+
+ /* Null wide character. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
+
+ /* Latin letter A, internal state. */
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'A');
+
+ /* Latin letter A. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'A');
+
+ /* Incomplete character sequence (zero length). */
+ c16 = L'z';
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'z');
+
+ /* Incomplete character sequence (truncated double-byte). */
+ memset(&s, 0, sizeof(s));
+ c16 = 0;
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
+ "n=%zu", n);
+
+ /* Same as above, but complete. */
+ memset(&s, 0, sizeof(s));
+ c16 = 0;
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\x84", 2, &s)), 2,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xc4, "c16=U+%"PRIx16, (uint16_t)c16);
+
+ /* Test restarting behaviour. */
+ memset(&s, 0, sizeof(s));
+ c16 = 0;
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xb7", 1, &s)), 1, "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xf7, "c16=U+%"PRIx16, (uint16_t)c16);
+
+ /* Surrogate pair. */
+ memset(&s, 0, sizeof(s));
+ c16 = 0;
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s)), 4,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xd83d, "c16=U+%"PRIx16, (uint16_t)c16);
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-3,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xdca9, "c16=U+%"PRIx16, (uint16_t)c16);
+
+ /* Letter e with acute, precomposed. */
+ memset(&s, 0, sizeof(s));
+ c16 = 0;
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\xa9", 2, &s)), 2,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xe9, "c16=U+%"PRIx16, (uint16_t)c16);
+
+ /* Letter e with acute, combined. */
+ memset(&s, 0, sizeof(s));
+ c16 = 0;
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x65\xcc\x81", 3, &s)), 1,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0x65, "c16=U+%"PRIx16, (uint16_t)c16);
+ ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xcc\x81", 2, &s)), 2,
+ "n=%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0x301, "c16=U+%"PRIx16, (uint16_t)c16);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+ ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test);
+ ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test);
+ ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test);
+ ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test);
+
+ return (atf_no_error());
+}
Index: src/tests/lib/libc/locale/t_mbrtoc32.c
diff -u /dev/null src/tests/lib/libc/locale/t_mbrtoc32.c:1.1
--- /dev/null Thu Aug 15 14:16:34 2024
+++ src/tests/lib/libc/locale/t_mbrtoc32.c Thu Aug 15 14:16:34 2024
@@ -0,0 +1,61 @@
+/* $NetBSD: t_mbrtoc32.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: t_mbrtoc32.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $");
+
+#include <atf-c.h>
+#include <locale.h>
+#include <uchar.h>
+
+#include "h_macros.h"
+
+ATF_TC(mbrtoc32_null);
+ATF_TC_HEAD(mbrtoc32_null, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test null string input to mbrtoc32");
+}
+ATF_TC_BODY(mbrtoc32_null, tc)
+{
+ char *locale;
+ char32_t c32;
+ mbstate_t ps = {0};
+ size_t n;
+
+ REQUIRE_LIBC((locale = setlocale(LC_ALL, "C")), NULL);
+ ATF_REQUIRE_EQ_MSG(strcmp(locale, "C"), 0, "locale=%s", locale);
+
+ ATF_CHECK_EQ_MSG((n = mbrtoc32(&c32, NULL, 0, &ps)), 0, "n=%zu", n);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+ ATF_TP_ADD_TC(tp, mbrtoc32_null);
+ return atf_no_error();
+}