Based on the Roland's email at below, I just built and placed 2nd
version of the test binaries for S10 sparcv9 and i386 at the Tmp directory of
the I18N & L10N community:
http://www.opensolaris.org/os/community/int_localization/tmp/
The test binaries are built with S10 FCS and SOS10 C compilers and they
should be usable at SX release too. Your help and contribution on further
testing with your favorite locales would be much appreciated!
Ienup
Roland Mainz wrote:
> Hi!
>
> ----
>
> Attached is the 2nd version of the ksh93 i18n fix
> ("ksh93-shift_ijs_patch002.diff.txt") for testing.
> Can anyone please help to verify that this new patch for ksh93 (Korn
> Shell 93) fixes the problems when inputting/editing text in ja_JP.PCK or
> *.UTF-8 locales, please ?
>
> Building ksh93 from source+patch:
> 1. Download
> http://svn.genunix.org/repos/on/branches/ksh93/gisburn/scripts/buildksh93.ksh
> - this script builds ksh93 from sources (and also contains instructions
> how to download the sources via "wget")
> 2. Fetch sources as described in "buildksh93.ksh"
> 3. Edit "buildksh93.ksh" to match the platform (default is Solaris 10 on
> i386 with Sun Studio 10/11)
> 4. Unpack source
> % mkdir build
> % cd build
> % gunzip -c ../ast-ksh.2006-02-14.tgz | tar -xf -
> % gunzip -c ../INIT.2006-01-24.tgz | tar -xf -
> 5. Apply patch:
> % gpatch -p0 <ksh93-shift_ijs_patch002.diff.txt
> 6. Build ksh93:
> % time nice ksh ../buildksh93.ksh 2>&1 | tee -a buildlog.log
> 7. Start ksh93:
> % ./arch/sol10.i386/bin/ksh
> % set -o emacs # for emacs editing mode, alternatively you can use "vi"
> or "gmacs" editing mode
> # input and/or edit japanese/chinese/korean text and report whether this
> works correctly
>
> Thanks for the help! :-)
>
> ----
>
> Bye,
> Roland
>
>
>
> ------------------------------------------------------------------------
>
> --- src/cmd/ksh93/edit/edit.c
> +++ src/cmd/ksh93/edit/edit.c 2006-04-19 12:17:59.000000000 +0200
> @@ -28,6 +28,7 @@
> */
>
> #include <ast.h>
> +#include <ast_wchar.h>
> #include <errno.h>
> #include <ccode.h>
> #include <ctype.h>
> @@ -53,7 +54,15 @@
> #define CURSOR_UP "\E[A"
>
> #if SHOPT_MULTIBYTE
> -# define is_print(c) ((c&~STRIP) || isprint(c))
> +# if _hdr_wctype
> +# include <wctype.h>
> +# define is_print(c) iswprint((c))
> +# else
> +# define is_print(c) (((c)&~STRIP) || isprint((c)))
> +# endif
> +# if !_lib_iswprint && !defined(iswprint)
> +# define iswprint(c) (((c)&~STRIP) || isprint((c)))
> +# endif
> #else
> # define is_print(c) isprint(c)
> #endif
> @@ -1166,7 +1175,6 @@
> {
> if(curp == sp)
> r = dp - phys;
> - d = (is_print(c)?1:-1);
> #if SHOPT_MULTIBYTE
> d = mbwidth((wchar_t)c);
> if(d==1 && !is_print(c))
> @@ -1183,7 +1191,9 @@
> continue;
> }
> else
> -#endif /* SHOPT_MULTIBYTE */
> +#else /* not SHOPT_MULTIBYTE */
> + d = (is_print(c)?1:-1);
> +#endif /* not SHOPT_MULTIBYTE */
> if(d<0)
> {
> if(c=='\t')
> --- src/cmd/ksh93/edit/vi.c
> +++ src/cmd/ksh93/edit/vi.c 2006-04-19 12:17:59.000000000 +0200
> @@ -28,6 +28,8 @@
> * cbosgd!pds
> -*/
>
> +#include <ast.h>
> +#include <ast_wchar.h>
>
> #if KSHELL
> # include "defs.h"
> @@ -65,10 +67,16 @@
> # define gencpy(a,b) ed_gencpy(a,b)
> # define genncpy(a,b,n) ed_genncpy(a,b,n)
> # define genlen(str) ed_genlen(str)
> -# define digit(c) ((c&~STRIP)==0 && isdigit(c))
> -# define is_print(c) ((c&~STRIP) || isprint(c))
> +# if _hdr_wctype
> +# include <wctype.h>
> +# define digit(c) iswdigit((c))
> +# define is_print(c) iswprint((c))
> +# else
> +# define digit(c) (((c)&~STRIP)==0 && isdigit((c)))
> +# define is_print(c) (((c)&~STRIP) || isprint((c)))
> +# endif
> # if !_lib_iswprint && !defined(iswprint)
> -# define iswprint(c) is_print((c))
> +# define iswprint(c) (((c)&~STRIP) || isprint((c)))
> # endif
> static int _isalph(int);
> static int _ismetach(int);
> @@ -2027,6 +2035,9 @@
> {
> register int i;
>
> + if (vp->lastline == NULL)
> + return;
> +
> if( (i = cur_virt - first_virt + 1) > 0 )
> {
> /*** save last thing user typed ***/
> --- src/cmd/ksh93/include/national.h
> +++ src/cmd/ksh93/include/national.h 2006-04-19 12:17:59.000000000 +0200
> @@ -29,7 +29,7 @@
> #if SHOPT_MULTIBYTE
>
> # ifndef MARKER
> -# define MARKER 0x7fff /* Must be invalid character */
> +# define MARKER 0xdfff /* Must be invalid character */
> # endif
>
> extern int sh_strchr(const char*,const char*);
> --- src/cmd/ksh93/sh/lex.c
> +++ src/cmd/ksh93/sh/lex.c 2006-04-19 12:17:54.000000000 +0200
> @@ -293,11 +293,12 @@
> {
> switch(*len = mbsize(_Fcin.fcptr))
> {
> - case -1: /* bogus multiByte char - parse as bytes? */
> - case 0: /* NULL byte */
> + case -1: /* bogus multiByte char - parse as bytes? */
> + case 0: /* NULL byte */
> + *len = 1;
> case 1:
> - lexState = state[curChar=fcget()];
> - break;
> + lexState = state[curChar=fcget()];
> + break;
> default:
> /*
> * None of the state tables contain entries
> @@ -1596,6 +1597,36 @@
> {
> if(n!=S_NL)
> {
> +#if SHOPT_MULTIBYTE
> + if(mbwide())
> + {
> + do
> + {
> + ssize_t len;
> + switch((len = mbsize(_Fcin.fcptr)))
> + {
> + case -1: /* bogus multiByte char
> - parse as bytes? */
> + case 0: /* NULL byte */
> + case 1:
> + n = state[fcget()];
> + break;
> + default:
> + /*
> + * None of the state tables
> contain
> + * entries for multibyte
> characters,
> + * however, they should be
> treated
> + * the same as any other alph
> + * character. Therefore, we'll
> use
> + * the state of the 'a'
> character.
> + */
> + mbchar(_Fcin.fcptr);
> + n = state['a'];
> + }
> + }
> + while(n == 0);
> + }
> + else
> +#endif /* SHOPT_MULTIBYTE */
> /* skip over regular characters */
> while((n=state[fcget()])==0);
> }
> --- src/cmd/ksh93/sh/macro.c
> +++ src/cmd/ksh93/sh/macro.c 2006-04-19 12:17:54.000000000 +0200
> @@ -266,7 +266,38 @@
> cp = fcseek(0);
> while(1)
> {
> +#if SHOPT_MULTIBYTE
> + if(mbwide())
> + {
> + do
> + {
> + ssize_t len;
> + switch((len = mbsize(cp)))
> + {
> + case -1: /* bogus multiByte char - parse
> as bytes? */
> + case 0: /* NULL byte */
> + case 1:
> + n = state[*(unsigned char*)cp++];
> + break;
> + default:
> + /*
> + * None of the state tables contain
> + * entries for multibyte characters,
> + * however, they should be treated
> + * the same as any other alph
> + * character. Therefore, we'll use
> + * the state of the 'a' character.
> + */
> + cp += len;
> + n = state['a'];
> + }
> + }
> + while(n == 0);
> + }
> + else
> +#endif /* SHOPT_MULTIBYTE */
> while((n=state[*(unsigned char*)cp++])==0);
> +
> if(n==S_NL || n==S_QUOTE || n==S_RBRA)
> continue;
> if(c=(cp-1)-fcseek(0))
> @@ -395,8 +426,42 @@
> cp++;
> while(1)
> {
> - while((n=state[*(unsigned char*)cp++])==0);
> - c = (cp-1) - first;
> +#if SHOPT_MULTIBYTE
> + if (mbwide())
> + {
> + ssize_t len;
> + do
> + {
> + switch((len = mbsize(cp)))
> + {
> + case -1: /* bogus multiByte char - parse
> as bytes? */
> + case 0: /* NULL byte */
> + len = 1;
> + case 1:
> + n = state[*(unsigned char*)cp++];
> + break;
> + default:
> + /*
> + * None of the state tables contain
> entries
> + * for multibyte characters. However,
> they
> + * should be treated the same as any
> other
> + * alpha character, so we'll use the
> state
> + * which would normally be assigned to
> the
> + * 'a' character.
> + */
> + cp += len;
> + n = state['a'];
> + }
> + }
> + while(n == 0);
> + c = (cp-len) - first;
> + }
> + else
> +#endif /* SHOPT_MULTIBYTE */
> + {
> + while((n=state[*(unsigned char*)cp++])==0);
> + c = (cp-1) - first;
> + }
> switch(n)
> {
> case S_ESC:
> --- src/cmd/ksh93/sh/string.c
> +++ src/cmd/ksh93/sh/string.c 2006-04-19 12:17:59.000000000 +0200
> @@ -24,6 +24,7 @@
> */
>
> #include <ast.h>
> +#include <ast_wchar.h>
> #include "defs.h"
> #include <stak.h>
> #include <ctype.h>
> @@ -36,8 +37,12 @@
> #define mbchar(p) (*(unsigned char*)p++)
> #endif
>
> +#if _hdr_wctype
> +# include <wctype.h>
> +#endif
> +
> #if !_lib_iswprint && !defined(iswprint)
> -# define iswprint(c) ((c&~0377) || isprint(c))
> +# define iswprint(c) (((c)&~0377) || isprint((c)))
> #endif
>
>
> --- src/lib/libast/comp/setlocale.c
> +++ src/lib/libast/comp/setlocale.c 2006-04-19 12:17:59.000000000 +0200
> @@ -30,6 +30,8 @@
>
> #include "lclib.h"
>
> +#include <ast.h>
> +#include <ast_wchar.h>
> #include <ctype.h>
> #include <mc.h>
> #include <namval.h>
> --- src/lib/libast/comp/wc.c
> +++ src/lib/libast/comp/wc.c 2006-04-19 12:17:59.000000000 +0200
> @@ -26,6 +26,7 @@
> */
>
> #include <ast.h>
> +#include <ast_wchar.h>
> #include <wchar.h>
>
> #if !_lib_mbtowc
> --- src/lib/libast/features/wchar
> +++ src/lib/libast/features/wchar 2006-04-19 12:25:08.000000000 +0200
> @@ -1,5 +1,6 @@
> set prototyped
> -lib mbstowcs,wctomb,wcrtomb,wcslen,wcstombs,wcwidth stdlib.h stdio.h wchar.h
> +lib mbstowcs,wctomb,wcrtomb,wcslen,wcstombs,wcscpy,wcwidth stdlib.h stdio.h
> wchar.h wctype.h
> +lib iswprint,iswalnum stdlib.h stdio.h ctype.h wctype.h
> lib towlower,towupper stdlib.h stdio.h wchar.h
> typ mbstate_t stdlib.h stdio.h wchar.h
> nxt wchar
> @@ -30,6 +31,12 @@
> #undef putwc
> #undef putwchar
> #undef ungetwc
> + #undef fwprintf
> + #undef swprintf
> + #undef vfwprintf
> + #undef vswprintf
> + #undef vwprintf
> + #undef wprintf
>
> #define fgetwc _ast_fgetwc
> #define fgetws _ast_fgetws
> @@ -79,6 +86,12 @@
> #if !_lib_wcstombs
> extern size_t wcstombs(char*, const wchar_t*, size_t);
> #endif
> + #if !_lib_wcscpy
> + extern wchar_t *wcscpy(wchar_t*t, const wchar_t*);
> + #endif
> + #if !_lib_wcwidth
> + extern int int wcwidth(wchar_t c);
> + #endif
>
> extern int fwprintf(FILE*, const wchar_t*, ...);
> extern int fwscanf(FILE*, const wchar_t*, ...);
> --- src/lib/libast/regex/reglib.h
> +++ src/lib/libast/regex/reglib.h 2006-04-19 12:17:59.000000000 +0200
> @@ -57,6 +57,7 @@
> char re_rhs[1]; /* substitution rhs */
>
> #include <ast.h>
> +#include <ast_wchar.h>
> #include <cdt.h>
> #include <stk.h>
>
> --- src/lib/libcmd/Mamfile
> +++ src/lib/libcmd/Mamfile 2006-04-19 12:17:54.000000000 +0200
> @@ -444,7 +444,7 @@
> prev cat.c
> meta cat.o %.c>%.o cat.c cat
> prev cat.c
> -exec - ${CC} ${mam_cc_FLAGS} ${CCFLAGS} -I. -I${PACKAGE_ast_INCLUDE}
> -DERROR_CATALOG=\""libcmd"\" -DUSAGE_LICENSE=\""[-author?Glenn Fowler <gsf at
> research.att.com>][-author?David Korn <dgk at
> research.att.com>][-copyright?Copyright (c) 1992-2006 AT&T Knowledge
> Ventures][-license?http://www.opensource.org/licenses/cpl1.0.txt][--catalog?libcmd]"\"
> -D_PACKAGE_ast -D_BLD_cmd -c cat.c
> +exec - ${CC} ${mam_cc_FLAGS} ${CCFLAGS} -I. -I${PACKAGE_ast_INCLUDE}
> -DERROR_CATALOG=\""libcmd"\" -DUSAGE_LICENSE=\""[-author?Glenn Fowler <gsf at
> research.att.com>][-author?David Korn <dgk at
> research.att.com>][-copyright?Copyright (c) 1992-2006 AT&T Knowledge
> Ventures][-license?http://www.opensource.org/licenses/cpl1.0.txt][--catalog?libcmd]"\"
> -D_PACKAGE_ast -D_BLD_cmd -DSHOPT_MULTIBYTE -c cat.c
> done cat.o generated
> make chgrp.o
> prev chgrp.c
> --- src/lib/libcmd/cat.c
> +++ src/lib/libcmd/cat.c 2006-04-19 12:17:54.000000000 +0200
> @@ -133,8 +133,39 @@
> while (endbuff)
> {
> cpold = cp;
> - /* skip over ASCII characters */
> + /* skip over ASCII and multi byte characters */
> +#if SHOPT_MULTIBYTE
> + if(mbwide())
> + {
> + do
> + {
> + ssize_t len;
> + switch((len = mbsize(cp)))
> + {
> + case -1: /* bogus multiByte char
> - parse as bytes? */
> + case 0: /* NULL byte */
> + case 1:
> + n = states[*cp++];
> + break;
> + default:
> + /*
> + * None of the state tables
> contain
> + * entries for multibyte
> characters,
> + * however, they should be
> treated
> + * the same as any other alph
> + * character. Therefore, we'll
> use
> + * the state of the 'a'
> character.
> + */
> + cp += len;
> + n = states['a'];
> + }
> + }
> + while(n == 0);
> + }
> + else
> +#endif /* SHOPT_MULTIBYTE */
> while ((n = states[*cp++]) == 0);
> +
> if (n==T_ENDBUF)
> {
> if (cp>endbuff)
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> i18n-discuss mailing list
> i18n-discuss at opensolaris.org
> http://mail.opensolaris.org/mailman/listinfo/i18n-discuss