Anyone?

On Sat, Apr 23, 2011 at 10:52:59PM +0200, Stefan Sperling wrote:
> This patch implements the %ls and %lc format directives (wchar_t * and
> wint_t arguments, respectively). Based on NetBSD and FreeBSD code.
> The vfprintf(3) man page has wrongly been claiming that we already
> support them.
> 
> Because vfprintf(3) is used on ramdisks, the code is #ifdef PRINTF_WIDE_CHAR
> which is defined when the system libc is built but not when the
> vfprintf() stub for the ramdisk is built. This way, ramdisks should
> not be affected.
> 
> The alternative to having the #ifdef would be adding libstubs for
> wcsrtombs() and wcrtomb() (to avoid pulling citrus stuff onto the
> ramdisk) and tolerate some useless growth of the vfprintf() stub.
> While conditional compilation sucks in general I think we're better
> off with the #ifdef in this case.
> I'll happily implement the alternative if it's preferred though.
> 
> Index: stdio/Makefile.inc
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdio/Makefile.inc,v
> retrieving revision 1.13
> diff -u -p -r1.13 Makefile.inc
> --- stdio/Makefile.inc        17 Jun 2005 20:40:32 -0000      1.13
> +++ stdio/Makefile.inc        23 Apr 2011 20:01:40 -0000
> @@ -3,7 +3,7 @@
>  # stdio sources
>  .PATH: ${LIBCSRCDIR}/stdio
>  
> -CFLAGS+=-DFLOATING_POINT
> +CFLAGS+=-DFLOATING_POINT -DPRINTF_WIDE_CHAR
>  
>  SRCS+=       asprintf.c clrerr.c fclose.c fdopen.c feof.c ferror.c fflush.c 
> fgetc.c \
>       fgetln.c fgetpos.c fgets.c fileno.c findfp.c flags.c fopen.c \
> Index: stdio/vfprintf.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdio/vfprintf.c,v
> retrieving revision 1.60
> diff -u -p -r1.60 vfprintf.c
> --- stdio/vfprintf.c  22 Dec 2010 14:54:44 -0000      1.60
> +++ stdio/vfprintf.c  23 Apr 2011 20:36:24 -0000
> @@ -49,6 +49,7 @@
>  #include <stdlib.h>
>  #include <string.h>
>  #include <unistd.h>
> +#include <wchar.h>
>  
>  #include "local.h"
>  #include "fvwrite.h"
> @@ -79,6 +80,8 @@ union arg {
>       double                  doublearg;
>       long double             longdoublearg;
>  #endif
> +     wint_t                  wintarg;
> +     wchar_t                 *pwchararg;
>  };
>  
>  static int __find_arguments(const char *fmt0, va_list ap, union arg 
> **argtable,
> @@ -138,6 +141,72 @@ __sbprintf(FILE *fp, const char *fmt, va
>       return (ret);
>  }
>  
> +#ifdef PRINTF_WIDE_CHAR
> +/*
> + * Convert a wide character string argument for the %ls format to a multibyte
> + * string representation. If not -1, prec specifies the maximum number of
> + * bytes to output, and also means that we can't assume that the wide char
> + * string is null-terminated.
> + */
> +static char *
> +__wcsconv(wchar_t *wcsarg, int prec)
> +{
> +     mbstate_t mbs;
> +     char buf[MB_LEN_MAX];
> +     wchar_t *p;
> +     char *convbuf;
> +     size_t clen, nbytes;
> +
> +     /* Allocate space for the maximum number of bytes we could output. */
> +     if (prec < 0) {
> +             memset(&mbs, 0, sizeof(mbs));
> +             p = wcsarg;
> +             nbytes = wcsrtombs(NULL, (const wchar_t **)&p, 0, &mbs);
> +             if (nbytes == (size_t)-1) {
> +                     errno = EILSEQ;
> +                     return (NULL);
> +             }
> +     } else {
> +             /*
> +              * Optimisation: if the output precision is small enough,
> +              * just allocate enough memory for the maximum instead of
> +              * scanning the string.
> +              */
> +             if (prec < 128)
> +                     nbytes = prec;
> +             else {
> +                     nbytes = 0;
> +                     p = wcsarg;
> +                     memset(&mbs, 0, sizeof(mbs));
> +                     for (;;) {
> +                             clen = wcrtomb(buf, *p++, &mbs);
> +                             if (clen == 0 || clen == (size_t)-1 ||
> +                                 nbytes + clen > (size_t)prec)
> +                                     break;
> +                             nbytes += clen;
> +                     }
> +                     if (clen == (size_t)-1) {
> +                             errno = EILSEQ;
> +                             return (NULL);
> +                     }
> +             }
> +     }
> +     if ((convbuf = malloc(nbytes + 1)) == NULL)
> +             return (NULL);
> +
> +     /* Fill the output buffer. */
> +     p = wcsarg;
> +     memset(&mbs, 0, sizeof(mbs));
> +     if ((nbytes = wcsrtombs(convbuf, (const wchar_t **)&p,
> +         nbytes, &mbs)) == (size_t)-1) {
> +             free(convbuf);
> +             errno = EILSEQ;
> +             return (NULL);
> +     }
> +     convbuf[nbytes] = '\0';
> +     return (convbuf);
> +}
> +#endif
>  
>  #ifdef FLOATING_POINT
>  #include <float.h>
> @@ -260,7 +329,9 @@ __vfprintf(FILE *fp, const char *fmt0, _
>       size_t argtablesiz;
>       int nextarg;            /* 1-based argument index */
>       va_list orgap;          /* original argument pointer */
> -
> +#ifdef PRINTF_WIDE_CHAR
> +     char *convbuf;          /* buffer for wide to multi-byte conversion */
> +#endif
>       /*
>        * Choose PADSIZE to trade efficiency vs. size.  If larger printf
>        * fields occur frequently, increase PADSIZE and make the initialisers
> @@ -402,7 +473,9 @@ __vfprintf(FILE *fp, const char *fmt0, _
>       uio.uio_resid = 0;
>       uio.uio_iovcnt = 0;
>       ret = 0;
> -
> +#ifdef PRINTF_WIDE_CHAR
> +     convbuf = NULL;
> +#endif
>       memset(&ps, 0, sizeof(ps));
>       /*
>        * Scan the format for conversions (`%' character).
> @@ -553,8 +626,28 @@ reswitch:        switch (ch) {
>                       flags |= SIZEINT;
>                       goto rflag;
>               case 'c':
> -                     *(cp = buf) = GETARG(int);
> -                     size = 1;
> +#ifdef PRINTF_WIDE_CHAR
> +                     if (flags & LONGINT) {
> +                             mbstate_t mbs;
> +                             size_t mbseqlen;
> +
> +                             memset(&mbs, 0, sizeof(mbs));
> +                             mbseqlen = wcrtomb(buf,
> +                                 (wchar_t)GETARG(wint_t), &mbs);
> +                             if (mbseqlen == (size_t)-1) {
> +                                     fp->_flags |= __SERR;
> +                                     errno = EILSEQ;
> +                                     goto error;
> +                             }
> +                             cp = buf;
> +                             size = (int)mbseqlen;
> +                     } else {
> +#endif
> +                             *(cp = buf) = GETARG(int);
> +                             size = 1;
> +#ifdef PRINTF_WIDE_CHAR
> +                     }
> +#endif
>                       sign = '\0';
>                       break;
>               case 'D':
> @@ -744,6 +837,26 @@ fp_common:
>                       ox[1] = 'x';
>                       goto nosign;
>               case 's':
> +#ifdef PRINTF_WIDE_CHAR
> +                     if (flags & LONGINT) {
> +                             wchar_t *wcp;
> +
> +                             if (convbuf != NULL) {
> +                                     free(convbuf);
> +                                     convbuf = NULL;
> +                             }
> +                             if ((wcp = GETARG(wchar_t *)) == NULL) {
> +                                     cp = "(null)";
> +                             } else {
> +                                     convbuf = __wcsconv(wcp, prec);
> +                                     if (convbuf == NULL) {
> +                                             fp->_flags = __SERR;
> +                                             goto error;
> +                                     }
> +                                     cp = convbuf;
> +                             }
> +                     } else
> +#endif /* PRINTF_WIDE_CHAR */
>                       if ((cp = GETARG(char *)) == NULL)
>                               cp = "(null)";
>                       if (prec >= 0) {
> @@ -995,6 +1108,8 @@ finish:
>  #define TP_MAXINT    24
>  #define T_CHAR               25
>  #define T_U_CHAR     26
> +#define T_WINT               27
> +#define TP_WCHAR     28
>  
>  /*
>   * Find all arguments when a positional parameter is encountered.  Returns a
> @@ -1160,7 +1275,12 @@ reswitch:      switch (ch) {
>                       flags |= SIZEINT;
>                       goto rflag;
>               case 'c':
> -                     ADDTYPE(T_INT);
> +#ifdef PRINTF_WIDE_CHAR
> +                     if (flags & LONGINT)
> +                             ADDTYPE(T_WINT);
> +                     else
> +#endif
> +                             ADDTYPE(T_INT);
>                       break;
>               case 'D':
>                       flags |= LONGINT;
> @@ -1210,7 +1330,12 @@ reswitch:      switch (ch) {
>                       ADDTYPE(TP_VOID);
>                       break;
>               case 's':
> -                     ADDTYPE(TP_CHAR);
> +#ifdef PRINTF_WIDE_CHAR
> +                     if (flags & LONGINT)
> +                             ADDTYPE(TP_WCHAR);
> +                     else
> +#endif
> +                             ADDTYPE(TP_CHAR);
>                       break;
>               case 'U':
>                       flags |= LONGINT;
> @@ -1311,6 +1436,14 @@ done:
>               case TP_MAXINT:
>                       (*argtable)[n].intmaxarg = va_arg(ap, intmax_t);
>                       break;
> +#ifdef PRINTF_WIDE_CHAR
> +             case T_WINT:
> +                     (*argtable)[n].wintarg = va_arg(ap, wint_t);
> +                     break;
> +             case TP_WCHAR:
> +                     (*argtable)[n].pwchararg = va_arg(ap, wchar_t *);
> +                     break;
> +#endif
>               }
>       }
>       goto finish;

Reply via email to