In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/690140045b6a80981ca64b0ea62f68c2035574f1?hp=a27b5f526fbceff0f6c58160999a5830b3385915>
- Log ----------------------------------------------------------------- commit 690140045b6a80981ca64b0ea62f68c2035574f1 Author: Karl Williamson <pub...@khwilliamson.com> Date: Mon Feb 3 19:12:16 2014 -0700 Add -DL option to trace setlocale calls This will help field debugging of locale issues. M locale.c M perl.c M perl.h M pod/perldelta.pod commit 240a4b1129a0b1ee7eab430e28c7bd4f7486938a Author: Karl Williamson <pub...@khwilliamson.com> Date: Mon Feb 3 19:52:54 2014 -0700 Revert "Fix handy.t for systems without $Config{d_isblank}." This reverts commit d61570b1bbf3e2d76cc293690156fb361b054272. This commit was made unnecessary by commit 3f9a3488327f59f53c00adc132d91f19840e2a50. M ext/XS-APItest/t/handy.t commit b8df125dc88244864ce3bf7ad4879ff00476bc41 Author: Karl Williamson <pub...@khwilliamson.com> Date: Mon Feb 3 12:18:38 2014 -0700 Regenerate podcheck db due to recent 79col fixes Commits 51b4c035919497f474ce46dcbdac1d2f3fd18a84 and 02257115537194d7a3b36a956d5643069f78c54f fixed some too-long verbatim line issues. I'm not sure why commit b3a2acfa0c0e4f8e48e1f6eb4d6fd143f293d2c6 added them to the db, as they were fixed before it was applied. My guess is that the workspace had not been rebased recently enough. M t/porting/known_pod_issues.dat ----------------------------------------------------------------------- Summary of changes: ext/XS-APItest/t/handy.t | 14 --------- locale.c | 64 ++++++++++++++++++++++++++++++++++++++++++ perl.c | 3 +- perl.h | 8 +++++- pod/perldelta.pod | 5 ++++ t/porting/known_pod_issues.dat | 2 -- 6 files changed, 78 insertions(+), 18 deletions(-) diff --git a/ext/XS-APItest/t/handy.t b/ext/XS-APItest/t/handy.t index b869ddb..41f5c7f 100644 --- a/ext/XS-APItest/t/handy.t +++ b/ext/XS-APItest/t/handy.t @@ -40,8 +40,6 @@ if($Config{d_setlocale}) { $utf8_locale = find_utf8_locale(); } } -my $has_isblank = $Config{d_isblank}; # has C99, locale-specific blank checking - my %properties = ( # name => Lookup-property name @@ -179,13 +177,9 @@ foreach my $name (sort keys %properties) { } else { - TODO: { # UTF-8 locale works on full range 0-255 - local $TODO = 'locale-specific isblank not available' - unless $has_isblank || $function ne 'BLANK' || $char_name ne 'NO-BREAK SPACE'; my $truth = truth($matches && $i < 256); is ($ret, $truth, "is${function}_LC( $display_name ) == $truth ($utf8_locale)"); - } } } } @@ -221,12 +215,8 @@ foreach my $name (sort keys %properties) { fail($@); } else { - TODO: { - local $TODO = 'locale-specific isblank not available' - unless $has_isblank || $function ne 'BLANK' || $char_name ne 'NO-BREAK SPACE'; my $truth = truth($matches); is ($ret, $truth, "is${function}_LC_uvchr( $display_name ) == $truth ($utf8_locale)"); - } } } @@ -264,12 +254,8 @@ foreach my $name (sort keys %properties) { fail($@); } else { - TODO: { - local $TODO = 'locale-specific isblank not available' - unless $has_isblank || $function ne 'BLANK' || $char_name ne 'NO-BREAK SPACE'; my $truth = truth($matches); is ($ret, $truth, "is${function}_LC_utf8( $display_name ) == $truth ($utf8_locale)"); - } } } } diff --git a/locale.c b/locale.c index 5144d8a..8190476 100644 --- a/locale.c +++ b/locale.c @@ -117,6 +117,12 @@ Perl_set_numeric_radix(pTHX) } else PL_numeric_radix_sv = NULL; + + DEBUG_L(PerlIO_printf(Perl_debug_log, "Locale radix is %s\n", + (PL_numeric_radix_sv) + ? lc->decimal_point + : "NULL")); + # endif /* HAS_LOCALECONV */ #endif /* USE_LOCALE_NUMERIC */ } @@ -195,6 +201,8 @@ Perl_set_numeric_standard(pTHX) PL_numeric_local = FALSE; set_numeric_radix(); } + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Underlying LC_NUMERIC locale now is C\n")); #endif /* USE_LOCALE_NUMERIC */ } @@ -215,6 +223,9 @@ Perl_set_numeric_local(pTHX) PL_numeric_local = TRUE; set_numeric_radix(); } + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Underlying LC_NUMERIC locale now is %s\n", + PL_numeric_name)); #endif /* USE_LOCALE_NUMERIC */ } @@ -699,12 +710,18 @@ S_is_cur_LC_category_utf8(pTHX_ int category) /* First dispose of the trivial cases */ save_input_locale = setlocale(category, NULL); if (! save_input_locale) { + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Could not find current locale for category %d\n", + category)); return FALSE; /* XXX maybe should croak */ } save_input_locale = stdize_locale(savepv(save_input_locale)); if ((*save_input_locale == 'C' && save_input_locale[1] == '\0') || strEQ(save_input_locale, "POSIX")) { + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Current locale for category %d is %s\n", + category, save_input_locale)); Safefree(save_input_locale); return FALSE; } @@ -722,6 +739,8 @@ S_is_cur_LC_category_utf8(pTHX_ int category) /* Get the current LC_CTYPE locale */ save_ctype_locale = stdize_locale(savepv(setlocale(LC_CTYPE, NULL))); if (! save_ctype_locale) { + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Could not find current locale for LC_CTYPE\n")); goto cant_use_nllanginfo; } @@ -734,11 +753,17 @@ S_is_cur_LC_category_utf8(pTHX_ int category) save_ctype_locale = NULL; } else if (! setlocale(LC_CTYPE, save_input_locale)) { + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Could not change LC_CTYPE locale to %s\n", + save_input_locale)); Safefree(save_ctype_locale); goto cant_use_nllanginfo; } } + DEBUG_L(PerlIO_printf(Perl_debug_log, "Current LC_CTYPE locale=%s\n", + save_input_locale)); + /* Here the current LC_CTYPE is set to the locale of the category whose * information is desired. This means that nl_langinfo() and MB_CUR_MAX * should give the correct results */ @@ -757,6 +782,9 @@ S_is_cur_LC_category_utf8(pTHX_ int category) is_utf8 = foldEQ(codeset, STR_WITH_LEN("UTF-8")) || foldEQ(codeset, STR_WITH_LEN("UTF8")); + DEBUG_L(PerlIO_printf(Perl_debug_log, + "\tnllanginfo returned CODESET '%s'; ?UTF8 locale=%d\n", + codeset, is_utf8)); Safefree(codeset); Safefree(save_input_locale); return is_utf8; @@ -775,6 +803,10 @@ S_is_cur_LC_category_utf8(pTHX_ int category) * turns out to be wrong, other things will fail */ is_utf8 = MB_CUR_MAX >= 4; + DEBUG_L(PerlIO_printf(Perl_debug_log, + "\tMB_CUR_MAX=%d; ?UTF8 locale=%d\n", + (int) MB_CUR_MAX, is_utf8)); + Safefree(save_input_locale); # ifdef HAS_MBTOWC @@ -786,11 +818,16 @@ S_is_cur_LC_category_utf8(pTHX_ int category) if (is_utf8) { wchar_t wc; (void) mbtowc(&wc, NULL, 0); /* Reset any shift state */ + errno = 0; if (mbtowc(&wc, HYPHEN_UTF8, strlen(HYPHEN_UTF8)) != strlen(HYPHEN_UTF8) || wc != (wchar_t) 0x2010) { is_utf8 = FALSE; + DEBUG_L(PerlIO_printf(Perl_debug_log, "\thyphen=U+%x\n", wc)); + DEBUG_L(PerlIO_printf(Perl_debug_log, + "\treturn from mbtowc=%d; errno=%d; ?UTF8 locale=0\n", + mbtowc(&wc, HYPHEN_UTF8, strlen(HYPHEN_UTF8)), errno)); } } # endif @@ -834,9 +871,15 @@ S_is_cur_LC_category_utf8(pTHX_ int category) } if (*(name) == '8') { Safefree(save_input_locale); + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Locale %s ends with UTF-8 in name\n", + save_input_locale)); return TRUE; } } + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Locale %s doesn't end with UTF-8 in name\n", + save_input_locale)); } #ifdef WIN32 @@ -849,6 +892,9 @@ S_is_cur_LC_category_utf8(pTHX_ int category) && *(save_input_locale + final_pos - 4) == '6') { Safefree(save_input_locale); + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Locale %s ends with 10056 in name, is UTF-8 locale\n", + save_input_locale)); return TRUE; } #endif @@ -856,6 +902,9 @@ S_is_cur_LC_category_utf8(pTHX_ int category) /* Other common encodings are the ISO 8859 series, which aren't UTF-8 */ if (instr(save_input_locale, "8859")) { Safefree(save_input_locale); + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Locale %s has 8859 in name, not UTF-8 locale\n", + save_input_locale)); return FALSE; } @@ -884,11 +933,16 @@ S_is_cur_LC_category_utf8(pTHX_ int category) save_monetary_locale = stdize_locale(savepv(setlocale(LC_MONETARY, NULL))); if (! save_monetary_locale) { + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Could not find current locale for LC_MONETARY\n")); goto cant_use_monetary; } if (strNE(save_monetary_locale, save_input_locale)) { if (! setlocale(LC_MONETARY, save_input_locale)) { + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Could not change LC_MONETARY locale to %s\n", + save_input_locale)); Safefree(save_monetary_locale); goto cant_use_monetary; } @@ -900,9 +954,13 @@ S_is_cur_LC_category_utf8(pTHX_ int category) if (lc && lc->currency_symbol) { if (! is_utf8_string((U8 *) lc->currency_symbol, 0)) { + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Currency symbol for %s is not legal UTF-8\n", + save_input_locale)); illegal_utf8 = TRUE; } else if (is_ascii_string((U8 *) lc->currency_symbol, 0)) { + DEBUG_L(PerlIO_printf(Perl_debug_log, "Currency symbol for %s contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale)); only_ascii = TRUE; } } @@ -920,6 +978,9 @@ S_is_cur_LC_category_utf8(pTHX_ int category) * UTF-8. (We can't really tell if the locale is UTF-8 or not if the * symbol is just a '$', so we err on the side of it not being UTF-8) * */ + DEBUG_L(PerlIO_printf(Perl_debug_log, "\tis_utf8=%d\n", (illegal_utf8) + ? FALSE + : ! only_ascii)); return (illegal_utf8) ? FALSE : ! only_ascii; @@ -1006,6 +1067,9 @@ S_is_cur_LC_category_utf8(pTHX_ int category) #endif + DEBUG_L(PerlIO_printf(Perl_debug_log, + "Assuming locale %s is not a UTF-8 locale\n", + save_input_locale)); Safefree(save_input_locale); return FALSE; } diff --git a/perl.c b/perl.c index eb7b954..cd5f97f 100644 --- a/perl.c +++ b/perl.c @@ -3062,6 +3062,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp) " q quiet - currently only suppresses the 'EXECUTING' message\n" " M trace smart match resolution\n" " B dump suBroutine definitions, including special Blocks like BEGIN\n", + " L trace some locale setting information--for Perl core development\n", NULL }; int i = 0; @@ -3070,7 +3071,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp) if (isALPHA(**s)) { /* if adding extra options, remember to update DEBUG_MASK */ - static const char debopts[] = "psltocPmfrxuUHXDSTRJvCAqMB"; + static const char debopts[] = "psltocPmfrxuUHXDSTRJvCAqMBL"; for (; isWORDCHAR(**s); (*s)++) { const char * const d = strchr(debopts,**s); diff --git a/perl.h b/perl.h index db08bd3..ff00acd 100644 --- a/perl.h +++ b/perl.h @@ -3560,7 +3560,8 @@ Gid_t getegid (void); #define DEBUG_q_FLAG 0x00800000 /*8388608 */ #define DEBUG_M_FLAG 0x01000000 /*16777216*/ #define DEBUG_B_FLAG 0x02000000 /*33554432*/ -#define DEBUG_MASK 0x03FFEFFF /* mask of all the standard flags */ +#define DEBUG_L_FLAG 0x04000000 /*67108864*/ +#define DEBUG_MASK 0x07FFEFFF /* mask of all the standard flags */ #define DEBUG_DB_RECURSE_FLAG 0x40000000 #define DEBUG_TOP_FLAG 0x80000000 /* XXX what's this for ??? Signal @@ -3592,6 +3593,7 @@ Gid_t getegid (void); # define DEBUG_q_TEST_ (PL_debug & DEBUG_q_FLAG) # define DEBUG_M_TEST_ (PL_debug & DEBUG_M_FLAG) # define DEBUG_B_TEST_ (PL_debug & DEBUG_B_FLAG) +# define DEBUG_L_TEST_ (PL_debug & DEBUG_L_FLAG) # define DEBUG_Xv_TEST_ (DEBUG_X_TEST_ && DEBUG_v_TEST_) # define DEBUG_Uv_TEST_ (DEBUG_U_TEST_ && DEBUG_v_TEST_) # define DEBUG_Pv_TEST_ (DEBUG_P_TEST_ && DEBUG_v_TEST_) @@ -3624,6 +3626,7 @@ Gid_t getegid (void); # define DEBUG_q_TEST DEBUG_q_TEST_ # define DEBUG_M_TEST DEBUG_M_TEST_ # define DEBUG_B_TEST DEBUG_B_TEST_ +# define DEBUG_L_TEST DEBUG_L_TEST_ # define DEBUG_Xv_TEST DEBUG_Xv_TEST_ # define DEBUG_Uv_TEST DEBUG_Uv_TEST_ # define DEBUG_Pv_TEST DEBUG_Pv_TEST_ @@ -3675,6 +3678,7 @@ Gid_t getegid (void); # define DEBUG_q(a) DEBUG__(DEBUG_q_TEST, a) # define DEBUG_M(a) DEBUG__(DEBUG_M_TEST, a) # define DEBUG_B(a) DEBUG__(DEBUG_B_TEST, a) +# define DEBUG_L(a) DEBUG__(DEBUG_L_TEST, a) #else /* DEBUGGING */ @@ -3704,6 +3708,7 @@ Gid_t getegid (void); # define DEBUG_q_TEST (0) # define DEBUG_M_TEST (0) # define DEBUG_B_TEST (0) +# define DEBUG_L_TEST (0) # define DEBUG_Xv_TEST (0) # define DEBUG_Uv_TEST (0) # define DEBUG_Pv_TEST (0) @@ -3735,6 +3740,7 @@ Gid_t getegid (void); # define DEBUG_q(a) # define DEBUG_M(a) # define DEBUG_B(a) +# define DEBUG_L(a) # define DEBUG_Xv(a) # define DEBUG_Uv(a) # define DEBUG_Pv(a) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 45ee8b4..6d3a7e1 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -53,6 +53,11 @@ correctly. See L<perlguts/"Copy on Write"> for detail. This feature was actually added in 5.19.8, but was unintentionally omitted from its delta document. +=head2 C<-DL> runtime option now added for tracing locale setting + +This is designed for Perl core developers to aid in field debugging bugs +regarding locales. + =head1 Security XXX Any security-related notices go here. In particular, any security diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat index bb0c7d7..4f31eed 100644 --- a/t/porting/known_pod_issues.dat +++ b/t/porting/known_pod_issues.dat @@ -262,7 +262,6 @@ pod/perlos2.pod ? Should you be using L<...> instead of 2 pod/perlos2.pod Verbatim line length including indents exceeds 79 by 21 pod/perlos390.pod Verbatim line length including indents exceeds 79 by 11 pod/perlperf.pod Verbatim line length including indents exceeds 79 by 154 -pod/perlqnx.pod Verbatim line length including indents exceeds 79 by 1 pod/perlrun.pod Verbatim line length including indents exceeds 79 by 3 pod/perlsolaris.pod Verbatim line length including indents exceeds 79 by 14 pod/perlsource.pod ? Should you be using F<...> or maybe L<...> instead of 1 @@ -281,7 +280,6 @@ porting/todo.pod Verbatim line length including indents exceeds 79 by 7 utils/c2ph Verbatim line length including indents exceeds 79 by 44 lib/benchmark.pm Verbatim line length including indents exceeds 79 by 2 lib/config.pod ? Should you be using L<...> instead of -1 -lib/config.pod Verbatim line length including indents exceeds 79 by 5 lib/extutils/embed.pm Verbatim line length including indents exceeds 79 by 2 lib/perl5db.pl ? Should you be using L<...> instead of 1 lib/pod/text/overstrike.pm Verbatim line length including indents exceeds 79 by 1 -- Perl5 Master Repository