In perl.git, the branch smoke-me/mauke/keyword-plugin-mutex has been updated
<https://perl5.git.perl.org/perl.git/commitdiff/a837dcbb7a60e741a4b8a8d42513c6915b5b8349?hp=85d38063ea1ca720fa2cef564b16612b79eefbae> discards 85d38063ea1ca720fa2cef564b16612b79eefbae (commit) discards cdcd162a59674bec9ec6c2343ae708311bec74be (commit) discards e8d66e5de269d8c9ac88febbc8f2fc9da4eaa02a (commit) discards d2f48fb882e67371b37300c2df34f68028b7d993 (commit) - Log ----------------------------------------------------------------- commit a837dcbb7a60e741a4b8a8d42513c6915b5b8349 Author: Lukas Mai <l....@web.de> Date: Thu Nov 9 01:19:58 2017 +0100 perldelta entry for wrap_keyword_plugin commit 298eb9a30f1c32b38e85848fdc53bec476d9a05e Author: Lukas Mai <l....@web.de> Date: Thu Nov 9 01:00:23 2017 +0100 test wrap_keyword_plugin (RT #132413) commit fff8944ab033c8e3b93e85887aba0c560fcfc4c6 Author: Lukas Mai <l....@web.de> Date: Thu Nov 9 00:59:53 2017 +0100 add wrap_keyword_plugin function (RT #132413) ----------------------------------------------------------------------- Summary of changes: embed.fnc | 15 +- embed.h | 18 +- embedvar.h | 2 +- ext/POSIX/POSIX.xs | 45 +- ext/XS-APItest/APItest.xs | 2 +- intrpvar.h | 6 +- locale.c | 1062 +++++++++++++++++++++++++++------------------ numeric.c | 25 +- perl.h | 77 ++-- proto.h | 20 +- sv.c | 4 +- toke.c | 7 +- utf8.c | 28 +- 13 files changed, 790 insertions(+), 521 deletions(-) diff --git a/embed.fnc b/embed.fnc index 69d6a9b86f..31c020aa5f 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1269,9 +1269,9 @@ ApOM |int |init_i18nl14n |int printwarn p |char* |my_strerror |const int errnum Xpn |void |_warn_problematic_locale p |void |new_numeric |NULLOK const char* newcoll -Xp |void |set_numeric_local +Xp |void |set_numeric_underlying Xp |void |set_numeric_standard -ApM |bool |_is_in_locale_category|const bool compiling|const int category +Xp |bool |_is_in_locale_category|const bool compiling|const int category Apd |void |sync_locale ApdO |void |require_pv |NN const char* pv Apd |void |pack_cat |NN SV *cat|NN const char *pat|NN const char *patend \ @@ -1721,7 +1721,7 @@ EXMp |void |_invlist_dump |NN PerlIO *file|I32 level \ Ap |void |taint_env Ap |void |taint_proper |NULLOK const char* f|NN const char *const s EpM |char * |_byte_dump_string \ - |NN const U8 * s \ + |NN const U8 * const start \ |const STRLEN len \ |const bool format #if defined(PERL_IN_UTF8_C) @@ -2724,6 +2724,11 @@ s |bool |isa_lookup |NN HV *stash|NN const char * const name \ #endif #if defined(PERL_IN_LOCALE_C) +# ifdef HAS_NL_LANGINFO +sn |const char*|my_nl_langinfo|const nl_item item|bool toggle +# else +sn |const char*|my_nl_langinfo|const int item|bool toggle +# endif in |const char *|save_to_buffer|NN const char * string \ |NULLOK char **buf \ |NN Size_t *buf_size \ @@ -2732,9 +2737,9 @@ in |const char *|save_to_buffer|NN const char * string \ s |char* |stdize_locale |NN char* locs s |void |new_collate |NULLOK const char* newcoll s |void |new_ctype |NN const char* newctype -s |void |set_numeric_radix +s |void |set_numeric_radix|const bool use_locale # ifdef WIN32 -s |char* |my_setlocale |int category|NULLOK const char* locale +s |char* |win32_setlocale|int category|NULLOK const char* locale # endif # ifdef DEBUGGING s |void |print_collxfrm_input_and_return \ diff --git a/embed.h b/embed.h index 4b454402c1..46c59b5b4e 100644 --- a/embed.h +++ b/embed.h @@ -28,7 +28,6 @@ #define Gv_AMupdate(a,b) Perl_Gv_AMupdate(aTHX_ a,b) #define _force_out_malformed_utf8_message(a,b,c,d) Perl__force_out_malformed_utf8_message(aTHX_ a,b,c,d) -#define _is_in_locale_category(a,b) Perl__is_in_locale_category(aTHX_ a,b) #define _is_uni_FOO(a,b) Perl__is_uni_FOO(aTHX_ a,b) #define _is_uni_perl_idcont(a) Perl__is_uni_perl_idcont(aTHX_ a) #define _is_uni_perl_idstart(a) Perl__is_uni_perl_idstart(aTHX_ a) @@ -1146,6 +1145,7 @@ #ifdef PERL_CORE #define Slab_Alloc(a) Perl_Slab_Alloc(aTHX_ a) #define Slab_Free(a) Perl_Slab_Free(aTHX_ a) +#define _is_in_locale_category(a,b) Perl__is_in_locale_category(aTHX_ a,b) #define _warn_problematic_locale Perl__warn_problematic_locale #define abort_execution(a,b) Perl_abort_execution(aTHX_ a,b) #define alloc_LOGOP(a,b,c) Perl_alloc_LOGOP(aTHX_ a,b,c) @@ -1378,8 +1378,8 @@ #define scalar(a) Perl_scalar(aTHX_ a) #define scalarvoid(a) Perl_scalarvoid(aTHX_ a) #define set_caret_X() Perl_set_caret_X(aTHX) -#define set_numeric_local() Perl_set_numeric_local(aTHX) #define set_numeric_standard() Perl_set_numeric_standard(aTHX) +#define set_numeric_underlying() Perl_set_numeric_underlying(aTHX) #define sub_crush_depth(a) Perl_sub_crush_depth(aTHX_ a) #define sv_2num(a) Perl_sv_2num(aTHX_ a) #define sv_clean_all() Perl_sv_clean_all(aTHX) @@ -1414,6 +1414,11 @@ # endif # endif # endif +# if !(defined(HAS_NL_LANGINFO)) +# if defined(PERL_IN_LOCALE_C) +#define my_nl_langinfo S_my_nl_langinfo +# endif +# endif # if !(defined(HAS_SIGACTION) && defined(SA_SIGINFO)) #define sighandler Perl_sighandler # endif @@ -1502,6 +1507,11 @@ #define do_semop(a,b) Perl_do_semop(aTHX_ a,b) #define do_shmio(a,b,c) Perl_do_shmio(aTHX_ a,b,c) # endif +# if defined(HAS_NL_LANGINFO) +# if defined(PERL_IN_LOCALE_C) +#define my_nl_langinfo S_my_nl_langinfo +# endif +# endif # if defined(HAS_SIGACTION) && defined(SA_SIGINFO) #define sighandler Perl_sighandler # endif @@ -1593,10 +1603,10 @@ # if defined(USE_LOCALE) #define new_collate(a) S_new_collate(aTHX_ a) #define new_ctype(a) S_new_ctype(aTHX_ a) -#define set_numeric_radix() S_set_numeric_radix(aTHX) +#define set_numeric_radix(a) S_set_numeric_radix(aTHX_ a) #define stdize_locale(a) S_stdize_locale(aTHX_ a) # if defined(WIN32) -#define my_setlocale(a,b) S_my_setlocale(aTHX_ a,b) +#define win32_setlocale(a,b) S_win32_setlocale(aTHX_ a,b) # endif # endif # endif diff --git a/embedvar.h b/embedvar.h index a3e014f96d..1a146c4d54 100644 --- a/embedvar.h +++ b/embedvar.h @@ -219,10 +219,10 @@ #define PL_my_cxt_size (vTHX->Imy_cxt_size) #define PL_na (vTHX->Ina) #define PL_nomemok (vTHX->Inomemok) -#define PL_numeric_local (vTHX->Inumeric_local) #define PL_numeric_name (vTHX->Inumeric_name) #define PL_numeric_radix_sv (vTHX->Inumeric_radix_sv) #define PL_numeric_standard (vTHX->Inumeric_standard) +#define PL_numeric_underlying (vTHX->Inumeric_underlying) #define PL_ofsgv (vTHX->Iofsgv) #define PL_oldname (vTHX->Ioldname) #define PL_op (vTHX->Iop) diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index f4a93290ce..cf27c12dd0 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -1,4 +1,5 @@ #define PERL_EXT_POSIX +#define PERL_EXT #ifdef NETWARE #define _POSIX_ @@ -2124,14 +2125,29 @@ localeconv() #else struct lconv *lcbuf; + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + /* localeconv() deals with both LC_NUMERIC and LC_MONETARY, but * LC_MONETARY is already in the correct locale */ - DECLARATION_FOR_LC_NUMERIC_MANIPULATION; +# ifdef USE_LOCALE_MONETARY + + const bool is_monetary_utf8 = _is_cur_LC_category_utf8(LC_MONETARY); +# endif +# ifdef USE_LOCALE_NUMERIC + + bool is_numeric_utf8; + STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); + is_numeric_utf8 = _is_cur_LC_category_utf8(LC_NUMERIC); +# endif + RETVAL = newHV(); sv_2mortal((SV*)RETVAL); - if ((lcbuf = localeconv())) { + + lcbuf = localeconv(); + + if (lcbuf) { const struct lconv_offset *strings = lconv_strings; const struct lconv_offset *integers = lconv_integers; const char *ptr = (const char *) lcbuf; @@ -2139,18 +2155,18 @@ localeconv() while (strings->name) { /* This string may be controlled by either LC_NUMERIC, or * LC_MONETARY */ - bool is_utf8_locale -#if defined(USE_LOCALE_NUMERIC) && defined(USE_LOCALE_MONETARY) - = _is_cur_LC_category_utf8((isLC_NUMERIC_STRING(strings->name)) - ? LC_NUMERIC - : LC_MONETARY); -#elif defined(USE_LOCALE_NUMERIC) - = _is_cur_LC_category_utf8(LC_NUMERIC); -#elif defined(USE_LOCALE_MONETARY) - = _is_cur_LC_category_utf8(LC_MONETARY); -#else - = FALSE; -#endif + const bool is_utf8_locale = +# if defined(USE_LOCALE_NUMERIC) && defined(USE_LOCALE_MONETARY) + (isLC_NUMERIC_STRING(strings->name)) + ? is_numeric_utf8 + : is_monetary_utf8; +# elif defined(USE_LOCALE_NUMERIC) + is_numeric_utf8; +# elif defined(USE_LOCALE_MONETARY) + is_monetary_utf8; +# else + FALSE; +# endif const char *value = *((const char **)(ptr + strings->offset)); @@ -2181,6 +2197,7 @@ localeconv() integers++; } } + RESTORE_LC_NUMERIC_STANDARD(); #endif /* HAS_LOCALECONV */ OUTPUT: diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index 579bf7bf58..8bf1545b63 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -2886,7 +2886,7 @@ utf16_to_utf8 (sv, ...) len = SvUV(ST(1)); } /* Mortalise this right now, as we'll be testing croak()s */ - dest = sv_2mortal(newSV(len * 3 / 2 + 1)); + dest = sv_2mortal(newSV(len * 2 + 1)); if (ix) { utf16_to_utf8_reversed(source, (U8 *)SvPVX(dest), len, &got); } else { diff --git a/intrpvar.h b/intrpvar.h index 87f33d8bb4..d88628a094 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -613,9 +613,9 @@ PERLVARI(I, perl_destruct_level, signed char, 0) #ifdef USE_LOCALE_NUMERIC PERLVARI(I, numeric_standard, int, TRUE) - /* Assume simple numerics */ -PERLVARI(I, numeric_local, bool, TRUE) - /* Assume local numerics */ + /* Assume C locale numerics */ +PERLVARI(I, numeric_underlying, bool, TRUE) + /* Assume underlying locale numerics */ PERLVAR(I, numeric_name, char *) /* Name of current numeric locale */ PERLVAR(I, numeric_radix_sv, SV *) /* The radix separator if not '.' */ diff --git a/locale.c b/locale.c index 5db28add69..a2985f7457 100644 --- a/locale.c +++ b/locale.c @@ -23,8 +23,8 @@ /* utility functions for handling locale-specific stuff like what * character represents the decimal point. * - * All C programs have an underlying locale. Perl generally doesn't pay any - * attention to it except within the scope of a 'use locale'. For most + * All C programs have an underlying locale. Perl code generally doesn't pay + * any attention to it except within the scope of a 'use locale'. For most * categories, it accomplishes this by just using different operations if it is * in such scope than if not. However, various libc functions called by Perl * are affected by the LC_NUMERIC category, so there are macros in perl.h that @@ -45,16 +45,20 @@ * initialization. This is done before option parsing, and before any thread * creation, so can be a file-level static */ #ifdef DEBUGGING -# ifdef PERL_GLOBAL_STRUCT +# ifdef PERL_GLOBAL_STRUCT /* no global syms allowed */ -# define debug_initialization 0 -# define DEBUG_INITIALIZATION_set(v) -# else +# define debug_initialization 0 +# define DEBUG_INITIALIZATION_set(v) +# else static bool debug_initialization = FALSE; -# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v) -# endif +# define DEBUG_INITIALIZATION_set(v) (debug_initialization = v) +# endif #endif +/* strlen() of a literal string constant. XXX We might want this more general, + * but using it in just this file for now */ +#define STRLENs(s) (sizeof("" s "") - 1) + #ifdef USE_LOCALE /* @@ -102,35 +106,60 @@ S_stdize_locale(pTHX_ char *locs) #endif +/* Windows requres a customized base-level setlocale() */ +# ifdef WIN32 +# define my_setlocale(cat, locale) win32_setlocale(cat, locale) +# else +# define my_setlocale(cat, locale) setlocale(cat, locale) +# endif + +/* Just placeholders for now. "_c" is intended to be called when the category + * is a constant known at compile time; "_r", not known until run time */ +# define do_setlocale_c(category, locale) my_setlocale(category, locale) +# define do_setlocale_r(category, locale) my_setlocale(category, locale) + STATIC void -S_set_numeric_radix(pTHX) +S_set_numeric_radix(pTHX_ const bool use_locale) { -#ifdef USE_LOCALE_NUMERIC -# ifdef HAS_LOCALECONV - const struct lconv* const lc = localeconv(); + /* If 'use_locale' is FALSE, set to use a dot for the radix character. If + * TRUE, use the radix character derived from the current locale */ - if (lc && lc->decimal_point) { - if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) { - SvREFCNT_dec(PL_numeric_radix_sv); - PL_numeric_radix_sv = NULL; - } - else { - if (PL_numeric_radix_sv) - sv_setpv(PL_numeric_radix_sv, lc->decimal_point); - else - PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0); - if (! is_utf8_invariant_string((U8 *) lc->decimal_point, 0) - && is_utf8_string((U8 *) lc->decimal_point, 0) +#if defined(USE_LOCALE_NUMERIC) && ( defined(HAS_LOCALECONV) \ + || defined(HAS_NL_LANGINFO)) + + /* We only set up the radix SV if we are to use a locale radix ... */ + if (use_locale) { + const char * radix = my_nl_langinfo(PERL_RADIXCHAR, FALSE); + /* FALSE => already in dest locale */ + + /* ... and the character being used isn't a dot */ + if (strNE(radix, ".")) { + if (PL_numeric_radix_sv) { + sv_setpv(PL_numeric_radix_sv, radix); + } + else { + PL_numeric_radix_sv = newSVpv(radix, 0); + } + + if ( ! is_utf8_invariant_string( + (U8 *) SvPVX(PL_numeric_radix_sv), SvCUR(PL_numeric_radix_sv)) + && is_utf8_string( + (U8 *) SvPVX(PL_numeric_radix_sv), SvCUR(PL_numeric_radix_sv)) && _is_cur_LC_category_utf8(LC_NUMERIC)) { - SvUTF8_on(PL_numeric_radix_sv); + SvUTF8_on(PL_numeric_radix_sv); } - } + goto done; + } } - else - PL_numeric_radix_sv = NULL; -#ifdef DEBUGGING + SvREFCNT_dec(PL_numeric_radix_sv); + PL_numeric_radix_sv = NULL; + + done: ; + +# ifdef DEBUGGING + if (DEBUG_L_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, "Locale radix is '%s', ?UTF-8=%d\n", (PL_numeric_radix_sv) @@ -140,10 +169,10 @@ S_set_numeric_radix(pTHX) ? cBOOL(SvUTF8(PL_numeric_radix_sv)) : 0); } -#endif -# endif /* HAS_LOCALECONV */ -#endif /* USE_LOCALE_NUMERIC */ +# endif +#endif /* USE_LOCALE_NUMERIC and can find the radix char */ + } /* Is the C string input 'name' "C" or "POSIX"? If so, and 'name' is the @@ -152,26 +181,32 @@ S_set_numeric_radix(pTHX) * the odds are extremely small that it would return these two strings for some * other locale. Note that VMS in these two locales includes many non-ASCII * characters as controls and punctuation (below are hex bytes): - * cntrl: 00-1F 7F 84-97 9B-9F - * punct: 21-2F 3A-40 5B-60 7B-7E A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD + * cntrl: 84-97 9B-9F + * punct: A1-A3 A5 A7-AB B0-B3 B5-B7 B9-BD BF-CF D1-DD DF-EF F1-FD * Oddly, none there are listed as alphas, though some represent alphabetics * http://www.nntp.perl.org/group/perl.perl5.porters/2013/02/msg198753.html */ -#define isNAME_C_OR_POSIX(name) ((name) != NULL \ - && ((*(name) == 'C' && (*(name + 1)) == '\0') \ - || strEQ((name), "POSIX"))) +#define isNAME_C_OR_POSIX(name) \ + ( (name) != NULL \ + && (( *(name) == 'C' && (*(name + 1)) == '\0') \ + || strEQ((name), "POSIX"))) void Perl_new_numeric(pTHX_ const char *newnum) { -#ifdef USE_LOCALE_NUMERIC + +#ifndef USE_LOCALE_NUMERIC + + PERL_UNUSED_ARG(newnum); + +#else /* Called after all libc setlocale() calls affecting LC_NUMERIC, to tell * core Perl this and that 'newnum' is the name of the new locale. * It installs this locale as the current underlying default. * * The default locale and the C locale can be toggled between by use of the - * set_numeric_local() and set_numeric_standard() functions, which should - * probably not be called directly, but only via macros like + * set_numeric_underlying() and set_numeric_standard() functions, which + * should probably not be called directly, but only via macros like * SET_NUMERIC_STANDARD() in perl.h. * * The toggling is necessary mainly so that a non-dot radix decimal point @@ -180,7 +215,7 @@ Perl_new_numeric(pTHX_ const char *newnum) * * This sets several interpreter-level variables: * PL_numeric_name The underlying locale's name: a copy of 'newnum' - * PL_numeric_local A boolean indicating if the toggled state is such + * PL_numeric_underlying A boolean indicating if the toggled state is such * that the current locale is the program's underlying * locale * PL_numeric_standard An int indicating if the toggled state is such @@ -202,14 +237,14 @@ Perl_new_numeric(pTHX_ const char *newnum) Safefree(PL_numeric_name); PL_numeric_name = NULL; PL_numeric_standard = TRUE; - PL_numeric_local = TRUE; + PL_numeric_underlying = TRUE; return; } save_newnum = stdize_locale(savepv(newnum)); PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum); - PL_numeric_local = TRUE; + PL_numeric_underlying = TRUE; if (! PL_numeric_name || strNE(PL_numeric_name, save_newnum)) { Safefree(PL_numeric_name); @@ -224,60 +259,67 @@ Perl_new_numeric(pTHX_ const char *newnum) * need the underlying locale change to it temporarily). */ set_numeric_standard(); - set_numeric_radix(); - -#else - PERL_UNUSED_ARG(newnum); #endif /* USE_LOCALE_NUMERIC */ + } void Perl_set_numeric_standard(pTHX) { + #ifdef USE_LOCALE_NUMERIC + /* Toggle the LC_NUMERIC locale to C. Most code should use the macros like * SET_NUMERIC_STANDARD() in perl.h instead of calling this directly. The * macro avoids calling this routine if toggling isn't necessary according * to our records (which could be wrong if some XS code has changed the * locale behind our back) */ - setlocale(LC_NUMERIC, "C"); + do_setlocale_c(LC_NUMERIC, "C"); PL_numeric_standard = TRUE; - PL_numeric_local = isNAME_C_OR_POSIX(PL_numeric_name); - set_numeric_radix(); -#ifdef DEBUGGING + PL_numeric_underlying = isNAME_C_OR_POSIX(PL_numeric_name); + set_numeric_radix(0); + +# ifdef DEBUGGING + if (DEBUG_L_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, - "Underlying LC_NUMERIC locale now is C\n"); + "LC_NUMERIC locale now is standard C\n"); } -#endif +# endif #endif /* USE_LOCALE_NUMERIC */ + } void -Perl_set_numeric_local(pTHX) +Perl_set_numeric_underlying(pTHX) { + #ifdef USE_LOCALE_NUMERIC + /* Toggle the LC_NUMERIC locale to the current underlying default. Most - * code should use the macros like SET_NUMERIC_LOCAL() in perl.h instead of - * calling this directly. The macro avoids calling this routine if - * toggling isn't necessary according to our records (which could be wrong - * if some XS code has changed the locale behind our back) */ + * code should use the macros like SET_NUMERIC_UNDERLYING() in perl.h + * instead of calling this directly. The macro avoids calling this routine + * if toggling isn't necessary according to our records (which could be + * wrong if some XS code has changed the locale behind our back) */ - setlocale(LC_NUMERIC, PL_numeric_name); + do_setlocale_c(LC_NUMERIC, PL_numeric_name); PL_numeric_standard = isNAME_C_OR_POSIX(PL_numeric_name); - PL_numeric_local = TRUE; - set_numeric_radix(); -#ifdef DEBUGGING + PL_numeric_underlying = TRUE; + set_numeric_radix(1); + +# ifdef DEBUGGING + if (DEBUG_L_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, - "Underlying LC_NUMERIC locale now is %s\n", + "LC_NUMERIC locale now is %s\n", PL_numeric_name); } -#endif +# endif #endif /* USE_LOCALE_NUMERIC */ + } /* @@ -286,7 +328,14 @@ Perl_set_numeric_local(pTHX) STATIC void S_new_ctype(pTHX_ const char *newctype) { -#ifdef USE_LOCALE_CTYPE + +#ifndef USE_LOCALE_CTYPE + + PERL_ARGS_ASSERT_NEW_CTYPE; + PERL_UNUSED_ARG(newctype); + PERL_UNUSED_CONTEXT; + +#else /* Called after all libc setlocale() calls affecting LC_CTYPE, to tell * core Perl this and that 'newctype' is the name of the new locale. @@ -349,10 +398,10 @@ S_new_ctype(pTHX_ const char *newctype) * nowadays. It isn't a problem for most controls to be changed * into something else; we check only \n and \t, though perhaps \r * could be an issue as well. */ - if (check_for_problems + if ( check_for_problems && (isGRAPH_A(i) || isBLANK_A(i) || i == '\n')) { - if ((isALPHANUMERIC_A(i) && ! isALPHANUMERIC_LC(i)) + if (( isALPHANUMERIC_A(i) && ! isALPHANUMERIC_LC(i)) || (isPUNCT_A(i) && ! isPUNCT_LC(i)) || (isBLANK_A(i) && ! isBLANK_LC(i)) || (i == '\n' && ! isCNTRL_LC(i))) @@ -381,7 +430,8 @@ S_new_ctype(pTHX_ const char *newctype) } } -#ifdef MB_CUR_MAX +# ifdef MB_CUR_MAX + /* We only handle single-byte locales (outside of UTF-8 ones; so if * this locale requires more than one byte, there are going to be * problems. */ @@ -401,7 +451,8 @@ S_new_ctype(pTHX_ const char *newctype) { multi_byte_locale = TRUE; } -#endif + +# endif if (bad_count || multi_byte_locale) { PL_warn_locale = Perl_newSVpvf(aTHX_ @@ -433,12 +484,12 @@ S_new_ctype(pTHX_ const char *newctype) * here is transparent to this function's caller */ const char * const badlocale = savepv(newctype); - setlocale(LC_CTYPE, "C"); + do_setlocale_c(LC_CTYPE, "C"); /* The '0' below suppresses a bogus gcc compiler warning */ Perl_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale), 0); - setlocale(LC_CTYPE, badlocale); + do_setlocale_c(LC_CTYPE, badlocale); Safefree(badlocale); if (IN_LC(LC_CTYPE)) { @@ -450,9 +501,7 @@ S_new_ctype(pTHX_ const char *newctype) } #endif /* USE_LOCALE_CTYPE */ - PERL_ARGS_ASSERT_NEW_CTYPE; - PERL_UNUSED_ARG(newctype); - PERL_UNUSED_CONTEXT; + } void @@ -484,16 +533,17 @@ Perl__warn_problematic_locale() STATIC void S_new_collate(pTHX_ const char *newcoll) { -#ifdef USE_LOCALE_COLLATE + +#ifndef USE_LOCALE_COLLATE + + PERL_UNUSED_ARG(newcoll); + PERL_UNUSED_CONTEXT; + +#else /* Called after all libc setlocale() calls affecting LC_COLLATE, to tell * core Perl this and that 'newcoll' is the name of the new locale. * - * Any code changing the locale (outside this file) should use - * POSIX::setlocale, which calls this function. Therefore this function - * should be called directly only from this file and from - * POSIX::setlocale(). - * * The design of locale collation is that every locale change is given an * index 'PL_collation_ix'. The first time a string particpates in an * operation that requires collation while locale collation is active, it @@ -681,7 +731,8 @@ S_new_collate(pTHX_ const char *newcoll) PL_collxfrm_base = base + 1; } -#ifdef DEBUGGING +# ifdef DEBUGGING + if (DEBUG_L_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, "%s:%d: ?UTF-8 locale=%d; x_len_shorter=%zu, " @@ -692,80 +743,96 @@ S_new_collate(pTHX_ const char *newcoll) x_len_shorter, x_len_longer, PL_collxfrm_mult, PL_collxfrm_base); } -#endif +# endif + } } -#else - PERL_UNUSED_ARG(newcoll); #endif /* USE_LOCALE_COLLATE */ -} - -#ifndef WIN32 /* No wrapper except on Windows */ -#define my_setlocale(a,b) setlocale(a,b) +} -#else /* WIN32 */ +#ifdef WIN32 STATIC char * -S_my_setlocale(pTHX_ int category, const char* locale) +S_win32_setlocale(pTHX_ int category, const char* locale) { /* This, for Windows, emulates POSIX setlocale() behavior. There is no - * difference unless the input locale is "", which means on Windows to get - * the machine default, which is set via the computer's "Regional and - * Language Options" (or its current equivalent). In POSIX, it instead - * means to find the locale from the user's environment. This routine - * looks in the environment, and, if anything is found, uses that instead - * of going to the machine default. If there is no environment override, - * the machine default is used, as normal, by calling the real setlocale() - * with "". The POSIX behavior is to use the LC_ALL variable if set; - * otherwise to use the particular category's variable if set; otherwise to - * use the LANG variable. */ + * difference between the two unless the input locale is "", which normally + * means on Windows to get the machine default, which is set via the + * computer's "Regional and Language Options" (or its current equivalent). + * In POSIX, it instead means to find the locale from the user's + * environment. This routine changes the Windows behavior to first look in + * the environment, and, if anything is found, use that instead of going to + * the machine default. If there is no environment override, the machine + * default is used, by calling the real setlocale() with "". + * + * The POSIX behavior is to use the LC_ALL variable if set; otherwise to + * use the particular category's variable if set; otherwise to use the LANG + * variable. */ bool override_LC_ALL = FALSE; char * result; if (locale && strEQ(locale, "")) { -# ifdef LC_ALL + +# ifdef LC_ALL + locale = PerlEnv_getenv("LC_ALL"); if (! locale) { -#endif + +# endif switch (category) { -# ifdef LC_ALL + +# ifdef LC_ALL case LC_ALL: override_LC_ALL = TRUE; break; /* We already know its variable isn't set */ -# endif -# ifdef USE_LOCALE_TIME + +# endif +# ifdef USE_LOCALE_TIME + case LC_TIME: locale = PerlEnv_getenv("LC_TIME"); break; -# endif -# ifdef USE_LOCALE_CTYPE + +# endif +# ifdef USE_LOCALE_CTYPE + case LC_CTYPE: locale = PerlEnv_getenv("LC_CTYPE"); break; -# endif -# ifdef USE_LOCALE_COLLATE + +# endif +# ifdef USE_LOCALE_COLLATE + case LC_COLLATE: locale = PerlEnv_getenv("LC_COLLATE"); break; -# endif -# ifdef USE_LOCALE_MONETARY + +# endif +# ifdef USE_LOCALE_MONETARY + case LC_MONETARY: locale = PerlEnv_getenv("LC_MONETARY"); break; -# endif -# ifdef USE_LOCALE_NUMERIC + +# endif +# ifdef USE_LOCALE_NUMERIC + case LC_NUMERIC: locale = PerlEnv_getenv("LC_NUMERIC"); break; -# endif -# ifdef USE_LOCALE_MESSAGES + +# endif +# ifdef USE_LOCALE_MESSAGES + case LC_MESSAGES: locale = PerlEnv_getenv("LC_MESSAGES"); break; -# endif + +# endif + default: /* This is a category, like PAPER_SIZE that we don't * know about; and so can't provide a wrapper. */ @@ -777,9 +844,13 @@ S_my_setlocale(pTHX_ int category, const char* locale) locale = ""; } } -# ifdef LC_ALL + +# ifdef LC_ALL + } -# endif + +# endif + } result = setlocale(category, locale); @@ -795,7 +866,9 @@ S_my_setlocale(pTHX_ int category, const char* locale) * lower priority than the other LC_foo variables, so override it for each * one that is set. (If they are set to "", it means to use the same thing * we just set LC_ALL to, so can skip) */ -# ifdef USE_LOCALE_TIME + +# ifdef USE_LOCALE_TIME + result = PerlEnv_getenv("LC_TIME"); if (result && strNE(result, "")) { setlocale(LC_TIME, result); @@ -803,8 +876,10 @@ S_my_setlocale(pTHX_ int category, const char* locale) __FILE__, __LINE__, setlocale_debug_string(LC_TIME, result, "not captured"))); } -# endif -# ifdef USE_LOCALE_CTYPE + +# endif +# ifdef USE_LOCALE_CTYPE + result = PerlEnv_getenv("LC_CTYPE"); if (result && strNE(result, "")) { setlocale(LC_CTYPE, result); @@ -812,8 +887,10 @@ S_my_setlocale(pTHX_ int category, const char* locale) __FILE__, __LINE__, setlocale_debug_string(LC_CTYPE, result, "not captured"))); } -# endif -# ifdef USE_LOCALE_COLLATE + +# endif +# ifdef USE_LOCALE_COLLATE + result = PerlEnv_getenv("LC_COLLATE"); if (result && strNE(result, "")) { setlocale(LC_COLLATE, result); @@ -821,8 +898,10 @@ S_my_setlocale(pTHX_ int category, const char* locale) __FILE__, __LINE__, setlocale_debug_string(LC_COLLATE, result, "not captured"))); } -# endif -# ifdef USE_LOCALE_MONETARY + +# endif +# ifdef USE_LOCALE_MONETARY + result = PerlEnv_getenv("LC_MONETARY"); if (result && strNE(result, "")) { setlocale(LC_MONETARY, result); @@ -830,8 +909,10 @@ S_my_setlocale(pTHX_ int category, const char* locale) __FILE__, __LINE__, setlocale_debug_string(LC_MONETARY, result, "not captured"))); } -# endif -# ifdef USE_LOCALE_NUMERIC + +# endif +# ifdef USE_LOCALE_NUMERIC + result = PerlEnv_getenv("LC_NUMERIC"); if (result && strNE(result, "")) { setlocale(LC_NUMERIC, result); @@ -839,8 +920,10 @@ S_my_setlocale(pTHX_ int category, const char* locale) __FILE__, __LINE__, setlocale_debug_string(LC_NUMERIC, result, "not captured"))); } -# endif -# ifdef USE_LOCALE_MESSAGES + +# endif +# ifdef USE_LOCALE_MESSAGES + result = PerlEnv_getenv("LC_MESSAGES"); if (result && strNE(result, "")) { setlocale(LC_MESSAGES, result); @@ -848,7 +931,8 @@ S_my_setlocale(pTHX_ int category, const char* locale) __FILE__, __LINE__, setlocale_debug_string(LC_MESSAGES, result, "not captured"))); } -# endif + +# endif result = setlocale(LC_ALL, NULL); DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", @@ -866,9 +950,9 @@ Perl_setlocale(int category, const char * locale) /* This wraps POSIX::setlocale() */ char * retval; + char * newlocale; dTHX; - #ifdef USE_LOCALE_NUMERIC /* A NULL locale means only query what the current one is. We @@ -881,19 +965,19 @@ Perl_setlocale(int category, const char * locale) return savepv(PL_numeric_name); } -# ifdef LC_ALL +# ifdef LC_ALL else if (category == LC_ALL) { SET_NUMERIC_UNDERLYING(); } -# endif +# endif } #endif - retval = my_setlocale(category, locale); + retval = do_setlocale_r(category, locale); DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s\n", __FILE__, __LINE__, @@ -904,6 +988,7 @@ Perl_setlocale(int category, const char * locale) if (locale == 0) { SET_NUMERIC_STANDARD(); } + return NULL; } @@ -916,105 +1001,63 @@ Perl_setlocale(int category, const char * locale) SET_NUMERIC_STANDARD(); return retval; } - else { /* Now that have switched locales, we have to update our records to - correspond */ - -#ifdef USE_LOCALE_CTYPE - - if ( category == LC_CTYPE -# ifdef LC_ALL + /* Now that have switched locales, we have to update our records to + * correspond. */ - || category == LC_ALL - -# endif - - ) - { - char *newctype; - -# ifdef LC_ALL - - if (category == LC_ALL) { - newctype = setlocale(LC_CTYPE, NULL); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "%s:%d: %s\n", __FILE__, __LINE__, - setlocale_debug_string(LC_CTYPE, NULL, newctype))); - } - else - -# endif + switch (category) { - newctype = retval; - new_ctype(newctype); - } +#ifdef USE_LOCALE_CTYPE -#endif /* USE_LOCALE_CTYPE */ + case LC_CTYPE: + new_ctype(retval); + break; +#endif #ifdef USE_LOCALE_COLLATE - if ( category == LC_COLLATE - -# ifdef LC_ALL - - || category == LC_ALL - -# endif - - ) - { - char *newcoll; - -# ifdef LC_ALL - - if (category == LC_ALL) { - newcoll = setlocale(LC_COLLATE, NULL); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "%s:%d: %s\n", __FILE__, __LINE__, - setlocale_debug_string(LC_COLLATE, NULL, newcoll))); - } - else - -# endif - - newcoll = retval; - new_collate(newcoll); - } - -#endif /* USE_LOCALE_COLLATE */ + case LC_COLLATE: + new_collate(retval); + break; +#endif #ifdef USE_LOCALE_NUMERIC - if ( category == LC_NUMERIC + case LC_NUMERIC: + new_numeric(retval); + break; -# ifdef LC_ALL +#endif +#ifdef LC_ALL - || category == LC_ALL + case LC_ALL: -# endif + /* LC_ALL updates all the things we care about. The values may not + * be the same as 'retval', as the locale "" may have set things + * individually */ - ) - { - char *newnum; +# ifdef USE_LOCALE_CTYPE -# ifdef LC_ALL + newlocale = do_setlocale_c(LC_CTYPE, NULL); + new_ctype(newlocale); - if (category == LC_ALL) { - newnum = setlocale(LC_NUMERIC, NULL); - DEBUG_Lv(PerlIO_printf(Perl_debug_log, - "%s:%d: %s\n", __FILE__, __LINE__, - setlocale_debug_string(LC_NUMERIC, NULL, newnum))); - } - else +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE -# endif + newlocale = do_setlocale_c(LC_COLLATE, NULL); + new_collate(newlocale); - newnum = retval; - new_numeric(newnum); - } +# endif +# ifdef USE_LOCALE_NUMERIC -#endif /* USE_LOCALE_NUMERIC */ + newlocale = do_setlocale_c(LC_NUMERIC, NULL); + new_numeric(newlocale); + +# endif /* USE_LOCALE_NUMERIC */ +#endif /* LC_ALL */ + default: + break; } return retval; @@ -1051,7 +1094,7 @@ S_save_to_buffer(const char * string, char **buf, Size_t *buf_size, const Size_t =for apidoc Perl_langinfo -This is an (almostª) drop-in replacement for the system C<L<nl_langinfo(3)>>, +This is an (almost ª) drop-in replacement for the system C<L<nl_langinfo(3)>>, taking the same C<item> parameter values, and returning the same information. But it is more thread-safe than regular C<nl_langinfo()>, and hides the quirks of Perl's locale handling from your code, and can be used on systems that lack @@ -1167,7 +1210,7 @@ C<Perl_langinfo> in the same thread. =item * -ªIt returns S<C<const char *>>, whereas plain C<nl_langinfo()> returns S<C<char +ª It returns S<C<const char *>>, whereas plain C<nl_langinfo()> returns S<C<char *>>, but you are (only by documentation) forbidden to write into the buffer. By declaring this C<const>, the compiler enforces this restriction. The extra C<const> is why this isn't an unequivocal drop-in replacement for @@ -1194,7 +1237,16 @@ Perl_langinfo(const nl_item item) Perl_langinfo(const int item) #endif { - bool toggle = TRUE; + return my_nl_langinfo(item, TRUE); +} + +const char * +#ifdef HAS_NL_LANGINFO +S_my_nl_langinfo(const nl_item item, bool toggle) +#else +S_my_nl_langinfo(const int item, bool toggle) +#endif +{ dTHX; #if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */ @@ -1209,7 +1261,7 @@ Perl_langinfo(const int item) if (toggle) { if (item == PERL_RADIXCHAR || item == PERL_THOUSEP) { - setlocale(LC_NUMERIC, PL_numeric_name); + do_setlocale_c(LC_NUMERIC, PL_numeric_name); } else { toggle = FALSE; @@ -1219,7 +1271,7 @@ Perl_langinfo(const int item) save_to_buffer(nl_langinfo(item), &PL_langinfo_buf, &PL_langinfo_bufsize, 0); if (toggle) { - setlocale(LC_NUMERIC, "C"); + do_setlocale_c(LC_NUMERIC, "C"); } LOCALE_UNLOCK; @@ -1327,7 +1379,7 @@ Perl_langinfo(const int item) LOCALE_LOCK; if (toggle) { - setlocale(LC_NUMERIC, PL_numeric_name); + do_setlocale_c(LC_NUMERIC, PL_numeric_name); } lc = localeconv(); @@ -1362,7 +1414,7 @@ Perl_langinfo(const int item) save_to_buffer(retval, &PL_langinfo_buf, &PL_langinfo_bufsize, 0); if (toggle) { - setlocale(LC_NUMERIC, "C"); + do_setlocale_c(LC_NUMERIC, "C"); } LOCALE_UNLOCK; @@ -1376,7 +1428,7 @@ Perl_langinfo(const int item) * and so are returned unconditionally; they may not be what the locale * actually says, but should give good enough results for someone using * them as formats (as opposed to trying to parse them to figure out - * what the locale says). The other format ones are actually tested to + * what the locale says). The other format items are actually tested to * verify they work on the platform */ case PERL_D_FMT: return "%x"; case PERL_T_FMT: return "%X"; @@ -1641,9 +1693,10 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * it. We look at, in order, LC_ALL, LANG, a system default locale (if we * think there is one), and the ultimate fallback "C". This is all done in * the same loop as above to avoid duplicating code, but it makes things - * more complex. After the original failure, we add the fallback - * possibilities to the list of locales to try, and iterate the loop - * through them all until one succeeds. + * more complex. The 'trial_locales' array is initialized with just one + * element; it causes the behavior described in the paragraph above this to + * happen. If that fails, we add elements to 'trial_locales', and do extra + * loop iterations to cause the behavior described in this paragraph. * * On Ultrix, the locale MUST come from the environment, so there is * preliminary code to set it. I (khw) am not sure that it is necessary, @@ -1665,19 +1718,31 @@ Perl_init_i18nl10n(pTHX_ int printwarn) int ok = 1; -#if defined(USE_LOCALE) -#ifdef USE_LOCALE_CTYPE +#ifndef USE_LOCALE + + PERL_UNUSED_ARG(printwarn); + +#else /* USE_LOCALE */ +# ifdef USE_LOCALE_CTYPE + char *curctype = NULL; -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE + +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE + char *curcoll = NULL; -#endif /* USE_LOCALE_COLLATE */ -#ifdef USE_LOCALE_NUMERIC + +# endif /* USE_LOCALE_COLLATE */ +# ifdef USE_LOCALE_NUMERIC + char *curnum = NULL; -#endif /* USE_LOCALE_NUMERIC */ -#ifdef __GLIBC__ + +# endif /* USE_LOCALE_NUMERIC */ +# ifdef __GLIBC__ + const char * const language = savepv(PerlEnv_getenv("LANGUAGE")); -#endif + +# endif /* NULL uses the existing already set up locale */ const char * const setlocale_init = (PerlEnv_getenv("PERL_SKIP_LOCALE_INIT")) @@ -1703,18 +1768,23 @@ Perl_init_i18nl10n(pTHX_ int printwarn) bool done = FALSE; char * sl_result; /* return from setlocale() */ char * locale_param; -#ifdef WIN32 + +# ifdef WIN32 + /* In some systems you can find out the system default locale * and use that as the fallback locale. */ -# define SYSTEM_DEFAULT_LOCALE -#endif -#ifdef SYSTEM_DEFAULT_LOCALE +# define SYSTEM_DEFAULT_LOCALE +# endif +# ifdef SYSTEM_DEFAULT_LOCALE + const char *system_default_locale = NULL; -#endif -#ifdef DEBUGGING +# endif +# ifdef DEBUGGING + DEBUG_INITIALIZATION_set(cBOOL(PerlEnv_getenv("PERL_DEBUG_LOCALE_INIT"))); -# define DEBUG_LOCALE_INIT(category, locale, result) \ + +# define DEBUG_LOCALE_INIT(category, locale, result) \ STMT_START { \ if (debug_initialization) { \ PerlIO_printf(Perl_debug_log, \ @@ -1724,24 +1794,28 @@ Perl_init_i18nl10n(pTHX_ int printwarn) locale, \ result)); \ } \ - } STMT_END -#else -# define DEBUG_LOCALE_INIT(a,b,c) -#endif + } STMT_END + +# else +# define DEBUG_LOCALE_INIT(a,b,c) +# endif + +# ifndef LOCALE_ENVIRON_REQUIRED -#ifndef LOCALE_ENVIRON_REQUIRED PERL_UNUSED_VAR(done); PERL_UNUSED_VAR(locale_param); -#else + +# else /* * Ultrix setlocale(..., "") fails if there are no environment * variables from which to get a locale name. */ -# ifdef LC_ALL +# ifdef LC_ALL + if (lang) { - sl_result = my_setlocale(LC_ALL, setlocale_init); + sl_result = do_setlocale_c(LC_ALL, setlocale_init); DEBUG_LOCALE_INIT(LC_ALL, setlocale_init, sl_result); if (sl_result) done = TRUE; @@ -1749,64 +1823,75 @@ Perl_init_i18nl10n(pTHX_ int printwarn) setlocale_failure = TRUE; } if (! setlocale_failure) { -# ifdef USE_LOCALE_CTYPE + +# ifdef USE_LOCALE_CTYPE + locale_param = (! done && (lang || PerlEnv_getenv("LC_CTYPE"))) ? setlocale_init : NULL; - curctype = my_setlocale(LC_CTYPE, locale_param); + curctype = do_setlocale_c(LC_CTYPE, locale_param); DEBUG_LOCALE_INIT(LC_CTYPE, locale_param, sl_result); if (! curctype) setlocale_failure = TRUE; else curctype = savepv(curctype); -# endif /* USE_LOCALE_CTYPE */ -# ifdef USE_LOCALE_COLLATE + +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE + locale_param = (! done && (lang || PerlEnv_getenv("LC_COLLATE"))) ? setlocale_init : NULL; - curcoll = my_setlocale(LC_COLLATE, locale_param); + curcoll = do_setlocale_c(LC_COLLATE, locale_param); DEBUG_LOCALE_INIT(LC_COLLATE, locale_param, sl_result); if (! curcoll) setlocale_failure = TRUE; else curcoll = savepv(curcoll); -# endif /* USE_LOCALE_COLLATE */ -# ifdef USE_LOCALE_NUMERIC + +# endif /* USE_LOCALE_COLLATE */ +# ifdef USE_LOCALE_NUMERIC + locale_param = (! done && (lang || PerlEnv_getenv("LC_NUMERIC"))) ? setlocale_init : NULL; - curnum = my_setlocale(LC_NUMERIC, locale_param); + curnum = do_setlocale_c(LC_NUMERIC, locale_param); DEBUG_LOCALE_INIT(LC_NUMERIC, locale_param, sl_result); if (! curnum) setlocale_failure = TRUE; else curnum = savepv(curnum); -# endif /* USE_LOCALE_NUMERIC */ -# ifdef USE_LOCALE_MESSAGES + +# endif /* USE_LOCALE_NUMERIC */ +# ifdef USE_LOCALE_MESSAGES + locale_param = (! done && (lang || PerlEnv_getenv("LC_MESSAGES"))) ? setlocale_init : NULL; - sl_result = my_setlocale(LC_MESSAGES, locale_param); + sl_result = do_setlocale_c(LC_MESSAGES, locale_param); DEBUG_LOCALE_INIT(LC_MESSAGES, locale_param, sl_result); if (! sl_result) { setlocale_failure = TRUE; } -# endif /* USE_LOCALE_MESSAGES */ -# ifdef USE_LOCALE_MONETARY + +# endif /* USE_LOCALE_MESSAGES */ +# ifdef USE_LOCALE_MONETARY + locale_param = (! done && (lang || PerlEnv_getenv("LC_MONETARY"))) ? setlocale_init : NULL; - sl_result = my_setlocale(LC_MONETARY, locale_param); + sl_result = do_setlocale_c(LC_MONETARY, locale_param); DEBUG_LOCALE_INIT(LC_MONETARY, locale_param, sl_result); if (! sl_result) { setlocale_failure = TRUE; } -# endif /* USE_LOCALE_MONETARY */ - } -# endif /* LC_ALL */ +# endif /* USE_LOCALE_MONETARY */ + + } -#endif /* !LOCALE_ENVIRON_REQUIRED */ +# endif /* LC_ALL */ +# endif /* !LOCALE_ENVIRON_REQUIRED */ /* We try each locale in the list until we get one that works, or exhaust * the list. Normally the loop is executed just once. But if setting the @@ -1814,6 +1899,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * will execute the loop multiple times */ trial_locales[0] = setlocale_init; trial_locales_count = 1; + for (i= 0; i < trial_locales_count; i++) { const char * trial_locale = trial_locales[i]; @@ -1824,8 +1910,9 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * sense */ setlocale_failure = FALSE; -#ifdef SYSTEM_DEFAULT_LOCALE -# ifdef WIN32 +# ifdef SYSTEM_DEFAULT_LOCALE +# ifdef WIN32 + /* On Windows machines, an entry of "" after the 0th means to use * the system default locale, which we now proceed to get. */ if (strEQ(trial_locale, "")) { @@ -1833,10 +1920,10 @@ Perl_init_i18nl10n(pTHX_ int printwarn) /* Note that this may change the locale, but we are going to do * that anyway just below */ - system_default_locale = setlocale(LC_ALL, ""); + system_default_locale = do_setlocale_c(LC_ALL, ""); DEBUG_LOCALE_INIT(LC_ALL, "", system_default_locale); - /* Skip if invalid or it's already on the list of locales to + /* Skip if invalid or if it's already on the list of locales to * try */ if (! system_default_locale) { goto next_iteration; @@ -1849,12 +1936,13 @@ Perl_init_i18nl10n(pTHX_ int printwarn) trial_locale = system_default_locale; } -# endif /* WIN32 */ -#endif /* SYSTEM_DEFAULT_LOCALE */ +# endif /* WIN32 */ +# endif /* SYSTEM_DEFAULT_LOCALE */ } -#ifdef LC_ALL - sl_result = my_setlocale(LC_ALL, trial_locale); +# ifdef LC_ALL + + sl_result = do_setlocale_c(LC_ALL, trial_locale); DEBUG_LOCALE_INIT(LC_ALL, trial_locale, sl_result); if (! sl_result) { setlocale_failure = TRUE; @@ -1865,52 +1953,64 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * setlocales below just return their category's current values. * This adequately handles the case in NetBSD where LC_COLLATE may * not be defined for a locale, and setting it individually will - * fail, whereas setting LC_ALL suceeds, leaving LC_COLLATE set to + * fail, whereas setting LC_ALL succeeds, leaving LC_COLLATE set to * the POSIX locale. */ trial_locale = NULL; } -#endif /* LC_ALL */ + +# endif /* LC_ALL */ if (!setlocale_failure) { -#ifdef USE_LOCALE_CTYPE + +# ifdef USE_LOCALE_CTYPE + Safefree(curctype); - curctype = my_setlocale(LC_CTYPE, trial_locale); + curctype = do_setlocale_c(LC_CTYPE, trial_locale); DEBUG_LOCALE_INIT(LC_CTYPE, trial_locale, curctype); if (! curctype) setlocale_failure = TRUE; else curctype = savepv(curctype); -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE + +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE + Safefree(curcoll); - curcoll = my_setlocale(LC_COLLATE, trial_locale); + curcoll = do_setlocale_c(LC_COLLATE, trial_locale); DEBUG_LOCALE_INIT(LC_COLLATE, trial_locale, curcoll); if (! curcoll) setlocale_failure = TRUE; else curcoll = savepv(curcoll); -#endif /* USE_LOCALE_COLLATE */ -#ifdef USE_LOCALE_NUMERIC + +# endif /* USE_LOCALE_COLLATE */ +# ifdef USE_LOCALE_NUMERIC + Safefree(curnum); - curnum = my_setlocale(LC_NUMERIC, trial_locale); + curnum = do_setlocale_c(LC_NUMERIC, trial_locale); DEBUG_LOCALE_INIT(LC_NUMERIC, trial_locale, curnum); if (! curnum) setlocale_failure = TRUE; else curnum = savepv(curnum); -#endif /* USE_LOCALE_NUMERIC */ -#ifdef USE_LOCALE_MESSAGES - sl_result = my_setlocale(LC_MESSAGES, trial_locale); + +# endif /* USE_LOCALE_NUMERIC */ +# ifdef USE_LOCALE_MESSAGES + + sl_result = do_setlocale_c(LC_MESSAGES, trial_locale); DEBUG_LOCALE_INIT(LC_MESSAGES, trial_locale, sl_result); if (! (sl_result)) setlocale_failure = TRUE; -#endif /* USE_LOCALE_MESSAGES */ -#ifdef USE_LOCALE_MONETARY - sl_result = my_setlocale(LC_MONETARY, trial_locale); + +# endif /* USE_LOCALE_MESSAGES */ +# ifdef USE_LOCALE_MONETARY + + sl_result = do_setlocale_c(LC_MONETARY, trial_locale); DEBUG_LOCALE_INIT(LC_MONETARY, trial_locale, sl_result); if (! (sl_result)) setlocale_failure = TRUE; -#endif /* USE_LOCALE_MONETARY */ + +# endif /* USE_LOCALE_MONETARY */ if (! setlocale_failure) { /* Success */ break; @@ -1924,41 +2024,50 @@ Perl_init_i18nl10n(pTHX_ int printwarn) unsigned int j; if (locwarn) { /* Output failure info only on the first one */ -#ifdef LC_ALL + +# ifdef LC_ALL PerlIO_printf(Perl_error_log, "perl: warning: Setting locale failed.\n"); -#else /* !LC_ALL */ +# else /* !LC_ALL */ PerlIO_printf(Perl_error_log, "perl: warning: Setting locale failed for the categories:\n\t"); -# ifdef USE_LOCALE_CTYPE + +# ifdef USE_LOCALE_CTYPE + if (! curctype) PerlIO_printf(Perl_error_log, "LC_CTYPE "); -# endif /* USE_LOCALE_CTYPE */ -# ifdef USE_LOCALE_COLLATE + +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE if (! curcoll) PerlIO_printf(Perl_error_log, "LC_COLLATE "); -# endif /* USE_LOCALE_COLLATE */ -# ifdef USE_LOCALE_NUMERIC + +# endif /* USE_LOCALE_COLLATE */ +# ifdef USE_LOCALE_NUMERIC + if (! curnum) PerlIO_printf(Perl_error_log, "LC_NUMERIC "); -# endif /* USE_LOCALE_NUMERIC */ + +# endif /* USE_LOCALE_NUMERIC */ + PerlIO_printf(Perl_error_log, "and possibly others\n"); -#endif /* LC_ALL */ +# endif /* LC_ALL */ PerlIO_printf(Perl_error_log, "perl: warning: Please check that your locale settings:\n"); -#ifdef __GLIBC__ +# ifdef __GLIBC__ + PerlIO_printf(Perl_error_log, "\tLANGUAGE = %c%s%c,\n", language ? '"' : '(', language ? language : "unset", language ? '"' : ')'); -#endif +# endif PerlIO_printf(Perl_error_log, "\tLC_ALL = %c%s%c,\n", @@ -1966,7 +2075,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) lc_all ? lc_all : "unset", lc_all ? '"' : ')'); -#if defined(USE_ENVIRON_ARRAY) +# if defined(USE_ENVIRON_ARRAY) + { char **e; @@ -1990,10 +2100,13 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } } } -#else + +# else + PerlIO_printf(Perl_error_log, "\t(possibly more locale environment variables)\n"); -#endif + +# endif PerlIO_printf(Perl_error_log, "\tLANG = %c%s%c\n", @@ -2040,7 +2153,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } done_lang: -#if defined(WIN32) && defined(LC_ALL) +# if defined(WIN32) && defined(LC_ALL) + /* For Windows, we also try the system default locale before "C". * (If there exists a Windows without LC_ALL we skip this because * it gets too complicated. For those, the "C" is the next @@ -2048,7 +2162,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) * the array, but the code at the loop above knows to treat it * differently when not the 0th */ trial_locales[trial_locales_count++] = ""; -#endif + +# endif for (j = 0; j < trial_locales_count; j++) { if (strEQ("C", trial_locales[j])) { @@ -2060,9 +2175,11 @@ Perl_init_i18nl10n(pTHX_ int printwarn) done_C: ; } /* end of first time through the loop */ -#ifdef WIN32 +# ifdef WIN32 + next_iteration: ; -#endif + +# endif } /* end of looping through the trial locales */ @@ -2081,21 +2198,29 @@ Perl_init_i18nl10n(pTHX_ int printwarn) msg = "Failed to fall back to"; /* To continue, we should use whatever values we've got */ -#ifdef USE_LOCALE_CTYPE + +# ifdef USE_LOCALE_CTYPE + Safefree(curctype); - curctype = savepv(setlocale(LC_CTYPE, NULL)); + curctype = savepv(do_setlocale_c(LC_CTYPE, NULL)); DEBUG_LOCALE_INIT(LC_CTYPE, NULL, curctype); -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE + +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE + Safefree(curcoll); - curcoll = savepv(setlocale(LC_COLLATE, NULL)); + curcoll = savepv(do_setlocale_c(LC_COLLATE, NULL)); DEBUG_LOCALE_INIT(LC_COLLATE, NULL, curcoll); -#endif /* USE_LOCALE_COLLATE */ -#ifdef USE_LOCALE_NUMERIC + +# endif /* USE_LOCALE_COLLATE */ +# ifdef USE_LOCALE_NUMERIC + Safefree(curnum); - curnum = savepv(setlocale(LC_NUMERIC, NULL)); + curnum = savepv(do_setlocale_c(LC_NUMERIC, NULL)); DEBUG_LOCALE_INIT(LC_NUMERIC, NULL, curnum); -#endif /* USE_LOCALE_NUMERIC */ + +# endif /* USE_LOCALE_NUMERIC */ + } if (locwarn) { @@ -2105,14 +2230,18 @@ Perl_init_i18nl10n(pTHX_ int printwarn) description = "the standard locale"; name = "C"; } -#ifdef SYSTEM_DEFAULT_LOCALE + +# ifdef SYSTEM_DEFAULT_LOCALE + else if (strEQ(trial_locales[i], "")) { description = "the system default locale"; if (system_default_locale) { name = system_default_locale; } } -#endif /* SYSTEM_DEFAULT_LOCALE */ + +# endif /* SYSTEM_DEFAULT_LOCALE */ + else { description = "a fallback locale"; name = trial_locales[i]; @@ -2128,19 +2257,23 @@ Perl_init_i18nl10n(pTHX_ int printwarn) } } /* End of tried to fallback */ -#ifdef USE_LOCALE_CTYPE +# ifdef USE_LOCALE_CTYPE + new_ctype(curctype); -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE + new_collate(curcoll); -#endif /* USE_LOCALE_COLLATE */ -#ifdef USE_LOCALE_NUMERIC +# endif /* USE_LOCALE_COLLATE */ +# ifdef USE_LOCALE_NUMERIC + new_numeric(curnum); -#endif /* USE_LOCALE_NUMERIC */ -#if defined(USE_PERLIO) && defined(USE_LOCALE_CTYPE) +# endif /* USE_LOCALE_NUMERIC */ +# if defined(USE_PERLIO) && defined(USE_LOCALE_CTYPE) + /* Set PL_utf8locale to TRUE if using PerlIO _and_ the current LC_CTYPE * locale is UTF-8. If PL_utf8locale and PL_unicode (set by -C or by * $ENV{PERL_UNICODE}) are true, perl.c:S_parse_body() will turn on the @@ -2157,32 +2290,39 @@ Perl_init_i18nl10n(pTHX_ int printwarn) if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG) PL_utf8cache = -1; } -#endif -#ifdef USE_LOCALE_CTYPE +# endif +# ifdef USE_LOCALE_CTYPE + Safefree(curctype); -#endif /* USE_LOCALE_CTYPE */ -#ifdef USE_LOCALE_COLLATE + +# endif /* USE_LOCALE_CTYPE */ +# ifdef USE_LOCALE_COLLATE + Safefree(curcoll); -#endif /* USE_LOCALE_COLLATE */ -#ifdef USE_LOCALE_NUMERIC + +# endif /* USE_LOCALE_COLLATE */ +# ifdef USE_LOCALE_NUMERIC + Safefree(curnum); -#endif /* USE_LOCALE_NUMERIC */ -#ifdef __GLIBC__ +# endif /* USE_LOCALE_NUMERIC */ + +# ifdef __GLIBC__ + Safefree(language); -#endif + +# endif Safefree(lc_all); Safefree(lang); -#else /* !USE_LOCALE */ - PERL_UNUSED_ARG(printwarn); #endif /* USE_LOCALE */ - #ifdef DEBUGGING + /* So won't continue to output stuff */ DEBUG_INITIALIZATION_set(FALSE); + #endif return ok; @@ -2583,10 +2723,14 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, * length 1 strings, as we can't be sure that it's a real slope * change */ if (length_in_chars > 1 && new_m > PL_collxfrm_mult) { -#ifdef DEBUGGING + +# ifdef DEBUGGING + STRLEN old_m = PL_collxfrm_mult; STRLEN old_b = PL_collxfrm_base; -#endif + +# endif + PL_collxfrm_mult = new_m; PL_collxfrm_base = 1; /* +1 For trailing NUL */ computed_guess = PL_collxfrm_base @@ -2645,7 +2789,8 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, xAlloc += (xAlloc / 4) + 1; PL_strxfrm_is_behaved = FALSE; -#ifdef DEBUGGING +# ifdef DEBUGGING + if (DEBUG_Lv_TEST || debug_initialization) { PerlIO_printf(Perl_debug_log, "_mem_collxfrm required more space than previously calculated" @@ -2653,7 +2798,9 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, PL_collation_name, (int) COLLXFRM_HDR_LEN, xAlloc - COLLXFRM_HDR_LEN); } -#endif + +# endif + } Renew(xbuf, xAlloc, char); @@ -2667,7 +2814,8 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, } -#ifdef DEBUGGING +# ifdef DEBUGGING + if (DEBUG_Lv_TEST || debug_initialization) { print_collxfrm_input_and_return(s, s + len, xlen, utf8); @@ -2676,7 +2824,8 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, _byte_dump_string((U8 *) xbuf + COLLXFRM_HDR_LEN, *xlen, 1)); } -#endif + +# endif /* Free up unneeded space; retain ehough for trailing NUL */ Renew(xbuf, COLLXFRM_HDR_LEN + *xlen + 1, char); @@ -2693,15 +2842,19 @@ Perl__mem_collxfrm(pTHX_ const char *input_string, Safefree(s); } *xlen = 0; -#ifdef DEBUGGING + +# ifdef DEBUGGING + if (DEBUG_Lv_TEST || debug_initialization) { print_collxfrm_input_and_return(s, s + len, NULL, utf8); } -#endif + +# endif + return NULL; } -#ifdef DEBUGGING +# ifdef DEBUGGING STATIC void S_print_collxfrm_input_and_return(pTHX_ @@ -2763,8 +2916,7 @@ S_print_bytes_for_locale(pTHX_ } } -#endif /* #ifdef DEBUGGING */ - +# endif /* #ifdef DEBUGGING */ #endif /* USE_LOCALE_COLLATE */ #ifdef USE_LOCALE @@ -2783,12 +2935,14 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) char *save_input_locale = NULL; STRLEN final_pos; -#ifdef LC_ALL +# ifdef LC_ALL + assert(category != LC_ALL); -#endif + +# endif /* First dispose of the trivial cases */ - save_input_locale = setlocale(category, NULL); + save_input_locale = do_setlocale_r(category, NULL); if (! save_input_locale) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not find current locale for category %d\n", @@ -2804,7 +2958,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) return FALSE; } -#if defined(USE_LOCALE_CTYPE) \ +# if defined(USE_LOCALE_CTYPE) \ && (defined(MB_CUR_MAX) || (defined(HAS_NL_LANGINFO) && defined(CODESET))) { /* Next try nl_langinfo or MB_CUR_MAX if available */ @@ -2815,7 +2969,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) if (category != LC_CTYPE) { /* These work only on LC_CTYPE */ /* Get the current LC_CTYPE locale */ - save_ctype_locale = setlocale(LC_CTYPE, NULL); + save_ctype_locale = do_setlocale_c(LC_CTYPE, NULL); if (! save_ctype_locale) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not find current locale for LC_CTYPE\n")); @@ -2831,7 +2985,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) Safefree(save_ctype_locale); save_ctype_locale = NULL; } - else if (! setlocale(LC_CTYPE, save_input_locale)) { + else if (! do_setlocale_c(LC_CTYPE, save_input_locale)) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Could not change LC_CTYPE locale to %s\n", save_input_locale)); @@ -2847,32 +3001,38 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * information is desired. This means that nl_langinfo() and MB_CUR_MAX * should give the correct results */ -# if defined(HAS_NL_LANGINFO) && defined(CODESET) +# if defined(HAS_NL_LANGINFO) && defined(CODESET) + /* The task is easiest if has this POSIX 2001 function */ + { - char *codeset = nl_langinfo(CODESET); - if (codeset && strNE(codeset, "")) { - codeset = savepv(codeset); + const char *codeset = my_nl_langinfo(PERL_CODESET, FALSE); + /* FALSE => already in dest locale */ + + DEBUG_L(PerlIO_printf(Perl_debug_log, + "\tnllanginfo returned CODESET '%s'\n", codeset)); + if (codeset && strNE(codeset, "")) { /* If we switched LC_CTYPE, switch back */ if (save_ctype_locale) { - setlocale(LC_CTYPE, save_ctype_locale); + do_setlocale_c(LC_CTYPE, save_ctype_locale); Safefree(save_ctype_locale); } - is_utf8 = foldEQ(codeset, STR_WITH_LEN("UTF-8")) - || foldEQ(codeset, STR_WITH_LEN("UTF8")); + is_utf8 = ( ( strlen(codeset) == STRLENs("UTF-8") + && foldEQ(codeset, STR_WITH_LEN("UTF-8"))) + || ( strlen(codeset) == STRLENs("UTF8") + && foldEQ(codeset, STR_WITH_LEN("UTF8")))); DEBUG_L(PerlIO_printf(Perl_debug_log, "\tnllanginfo returned CODESET '%s'; ?UTF8 locale=%d\n", codeset, is_utf8)); - Safefree(codeset); Safefree(save_input_locale); return is_utf8; } } -# endif -# ifdef MB_CUR_MAX +# endif +# ifdef MB_CUR_MAX /* Here, either we don't have nl_langinfo, or it didn't return a * codeset. Try MB_CUR_MAX */ @@ -2889,7 +3049,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) Safefree(save_input_locale); -# ifdef HAS_MBTOWC +# ifdef HAS_MBTOWC /* ... But, most system that have MB_CUR_MAX will also have mbtowc(), * since they are both in the C99 standard. We can feed a known byte @@ -2897,34 +3057,42 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) * result */ if (is_utf8) { wchar_t wc; ... 790 lines suppressed ... -- Perl5 Master Repository