Anyone?
On Sat, Apr 23, 2011 at 10:52:59PM +0200, Stefan Sperling wrote:
> This patch implements the %ls and %lc format directives (wchar_t * and
> wint_t arguments, respectively). Based on NetBSD and FreeBSD code.
> The vfprintf(3) man page has wrongly been claiming that we already
> support them.
>
> Because vfprintf(3) is used on ramdisks, the code is #ifdef PRINTF_WIDE_CHAR
> which is defined when the system libc is built but not when the
> vfprintf() stub for the ramdisk is built. This way, ramdisks should
> not be affected.
>
> The alternative to having the #ifdef would be adding libstubs for
> wcsrtombs() and wcrtomb() (to avoid pulling citrus stuff onto the
> ramdisk) and tolerate some useless growth of the vfprintf() stub.
> While conditional compilation sucks in general I think we're better
> off with the #ifdef in this case.
> I'll happily implement the alternative if it's preferred though.
>
> Index: stdio/Makefile.inc
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdio/Makefile.inc,v
> retrieving revision 1.13
> diff -u -p -r1.13 Makefile.inc
> --- stdio/Makefile.inc 17 Jun 2005 20:40:32 -0000 1.13
> +++ stdio/Makefile.inc 23 Apr 2011 20:01:40 -0000
> @@ -3,7 +3,7 @@
> # stdio sources
> .PATH: ${LIBCSRCDIR}/stdio
>
> -CFLAGS+=-DFLOATING_POINT
> +CFLAGS+=-DFLOATING_POINT -DPRINTF_WIDE_CHAR
>
> SRCS+= asprintf.c clrerr.c fclose.c fdopen.c feof.c ferror.c fflush.c
> fgetc.c \
> fgetln.c fgetpos.c fgets.c fileno.c findfp.c flags.c fopen.c \
> Index: stdio/vfprintf.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdio/vfprintf.c,v
> retrieving revision 1.60
> diff -u -p -r1.60 vfprintf.c
> --- stdio/vfprintf.c 22 Dec 2010 14:54:44 -0000 1.60
> +++ stdio/vfprintf.c 23 Apr 2011 20:36:24 -0000
> @@ -49,6 +49,7 @@
> #include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> +#include <wchar.h>
>
> #include "local.h"
> #include "fvwrite.h"
> @@ -79,6 +80,8 @@ union arg {
> double doublearg;
> long double longdoublearg;
> #endif
> + wint_t wintarg;
> + wchar_t *pwchararg;
> };
>
> static int __find_arguments(const char *fmt0, va_list ap, union arg
> **argtable,
> @@ -138,6 +141,72 @@ __sbprintf(FILE *fp, const char *fmt, va
> return (ret);
> }
>
> +#ifdef PRINTF_WIDE_CHAR
> +/*
> + * Convert a wide character string argument for the %ls format to a multibyte
> + * string representation. If not -1, prec specifies the maximum number of
> + * bytes to output, and also means that we can't assume that the wide char
> + * string is null-terminated.
> + */
> +static char *
> +__wcsconv(wchar_t *wcsarg, int prec)
> +{
> + mbstate_t mbs;
> + char buf[MB_LEN_MAX];
> + wchar_t *p;
> + char *convbuf;
> + size_t clen, nbytes;
> +
> + /* Allocate space for the maximum number of bytes we could output. */
> + if (prec < 0) {
> + memset(&mbs, 0, sizeof(mbs));
> + p = wcsarg;
> + nbytes = wcsrtombs(NULL, (const wchar_t **)&p, 0, &mbs);
> + if (nbytes == (size_t)-1) {
> + errno = EILSEQ;
> + return (NULL);
> + }
> + } else {
> + /*
> + * Optimisation: if the output precision is small enough,
> + * just allocate enough memory for the maximum instead of
> + * scanning the string.
> + */
> + if (prec < 128)
> + nbytes = prec;
> + else {
> + nbytes = 0;
> + p = wcsarg;
> + memset(&mbs, 0, sizeof(mbs));
> + for (;;) {
> + clen = wcrtomb(buf, *p++, &mbs);
> + if (clen == 0 || clen == (size_t)-1 ||
> + nbytes + clen > (size_t)prec)
> + break;
> + nbytes += clen;
> + }
> + if (clen == (size_t)-1) {
> + errno = EILSEQ;
> + return (NULL);
> + }
> + }
> + }
> + if ((convbuf = malloc(nbytes + 1)) == NULL)
> + return (NULL);
> +
> + /* Fill the output buffer. */
> + p = wcsarg;
> + memset(&mbs, 0, sizeof(mbs));
> + if ((nbytes = wcsrtombs(convbuf, (const wchar_t **)&p,
> + nbytes, &mbs)) == (size_t)-1) {
> + free(convbuf);
> + errno = EILSEQ;
> + return (NULL);
> + }
> + convbuf[nbytes] = '\0';
> + return (convbuf);
> +}
> +#endif
>
> #ifdef FLOATING_POINT
> #include <float.h>
> @@ -260,7 +329,9 @@ __vfprintf(FILE *fp, const char *fmt0, _
> size_t argtablesiz;
> int nextarg; /* 1-based argument index */
> va_list orgap; /* original argument pointer */
> -
> +#ifdef PRINTF_WIDE_CHAR
> + char *convbuf; /* buffer for wide to multi-byte conversion */
> +#endif
> /*
> * Choose PADSIZE to trade efficiency vs. size. If larger printf
> * fields occur frequently, increase PADSIZE and make the initialisers
> @@ -402,7 +473,9 @@ __vfprintf(FILE *fp, const char *fmt0, _
> uio.uio_resid = 0;
> uio.uio_iovcnt = 0;
> ret = 0;
> -
> +#ifdef PRINTF_WIDE_CHAR
> + convbuf = NULL;
> +#endif
> memset(&ps, 0, sizeof(ps));
> /*
> * Scan the format for conversions (`%' character).
> @@ -553,8 +626,28 @@ reswitch: switch (ch) {
> flags |= SIZEINT;
> goto rflag;
> case 'c':
> - *(cp = buf) = GETARG(int);
> - size = 1;
> +#ifdef PRINTF_WIDE_CHAR
> + if (flags & LONGINT) {
> + mbstate_t mbs;
> + size_t mbseqlen;
> +
> + memset(&mbs, 0, sizeof(mbs));
> + mbseqlen = wcrtomb(buf,
> + (wchar_t)GETARG(wint_t), &mbs);
> + if (mbseqlen == (size_t)-1) {
> + fp->_flags |= __SERR;
> + errno = EILSEQ;
> + goto error;
> + }
> + cp = buf;
> + size = (int)mbseqlen;
> + } else {
> +#endif
> + *(cp = buf) = GETARG(int);
> + size = 1;
> +#ifdef PRINTF_WIDE_CHAR
> + }
> +#endif
> sign = '\0';
> break;
> case 'D':
> @@ -744,6 +837,26 @@ fp_common:
> ox[1] = 'x';
> goto nosign;
> case 's':
> +#ifdef PRINTF_WIDE_CHAR
> + if (flags & LONGINT) {
> + wchar_t *wcp;
> +
> + if (convbuf != NULL) {
> + free(convbuf);
> + convbuf = NULL;
> + }
> + if ((wcp = GETARG(wchar_t *)) == NULL) {
> + cp = "(null)";
> + } else {
> + convbuf = __wcsconv(wcp, prec);
> + if (convbuf == NULL) {
> + fp->_flags = __SERR;
> + goto error;
> + }
> + cp = convbuf;
> + }
> + } else
> +#endif /* PRINTF_WIDE_CHAR */
> if ((cp = GETARG(char *)) == NULL)
> cp = "(null)";
> if (prec >= 0) {
> @@ -995,6 +1108,8 @@ finish:
> #define TP_MAXINT 24
> #define T_CHAR 25
> #define T_U_CHAR 26
> +#define T_WINT 27
> +#define TP_WCHAR 28
>
> /*
> * Find all arguments when a positional parameter is encountered. Returns a
> @@ -1160,7 +1275,12 @@ reswitch: switch (ch) {
> flags |= SIZEINT;
> goto rflag;
> case 'c':
> - ADDTYPE(T_INT);
> +#ifdef PRINTF_WIDE_CHAR
> + if (flags & LONGINT)
> + ADDTYPE(T_WINT);
> + else
> +#endif
> + ADDTYPE(T_INT);
> break;
> case 'D':
> flags |= LONGINT;
> @@ -1210,7 +1330,12 @@ reswitch: switch (ch) {
> ADDTYPE(TP_VOID);
> break;
> case 's':
> - ADDTYPE(TP_CHAR);
> +#ifdef PRINTF_WIDE_CHAR
> + if (flags & LONGINT)
> + ADDTYPE(TP_WCHAR);
> + else
> +#endif
> + ADDTYPE(TP_CHAR);
> break;
> case 'U':
> flags |= LONGINT;
> @@ -1311,6 +1436,14 @@ done:
> case TP_MAXINT:
> (*argtable)[n].intmaxarg = va_arg(ap, intmax_t);
> break;
> +#ifdef PRINTF_WIDE_CHAR
> + case T_WINT:
> + (*argtable)[n].wintarg = va_arg(ap, wint_t);
> + break;
> + case TP_WCHAR:
> + (*argtable)[n].pwchararg = va_arg(ap, wchar_t *);
> + break;
> +#endif
> }
> }
> goto finish;