In perl.git, the branch smoke-me/leont/utf8-readline has been updated <http://perl5.git.perl.org/perl.git/commitdiff/dbf2c7ae25d9a57aef50bdbb934c6f8d39738ecf?hp=b660d2fdc616a934439fa8f22e3273ea35eda1c8>
discards b660d2fdc616a934439fa8f22e3273ea35eda1c8 (commit) discards ebc5f1af6a44217a4bc283f53a2e1ade5ac01fce (commit) discards e3c43849e0501e6a86725312113e0c1782e95f07 (commit) discards 93e6df8a7fb4377ab26b60010ad4594cf6e27aef (commit) discards 60f97582f1fcfce3da6c3e38726e87982a29594c (commit) - Log ----------------------------------------------------------------- commit dbf2c7ae25d9a57aef50bdbb934c6f8d39738ecf Author: Leon Timmermans <faw...@gmail.com> Date: Wed Dec 14 00:17:01 2016 +0100 Make :via and :scalar use readdelim M ext/PerlIO-scalar/scalar.pm M ext/PerlIO-scalar/scalar.xs M ext/PerlIO-via/via.pm M ext/PerlIO-via/via.xs commit 74c0c3c7085e2ab6453383fca4a16cf9ef1a57a0 Author: Leon Timmermans <faw...@gmail.com> Date: Mon Apr 9 21:49:11 2012 +0200 Made :utf8 an actual layer It will check the input for validity, by default strict validity though less strict forms are provided. This also means PerlIO::get_layers doesn't return a "utf8" pseudo-layer anymore, which can break some code making that assumption. M cpan/CPAN-Meta-YAML/t/11_read_string.t M lib/PerlIO.pm M perlio.c M perliol.h M pod/perldiag.pod M pod/perlfunc.pod M pod/perliol.pod M pod/perlrun.pod M pod/perlunifaq.pod M pod/perluniintro.pod M t/io/crlf.t M t/io/layers.t M t/io/utf8.t M t/op/print.t M universal.c commit 61ab095a2d3c9b81285be5a116332871ebaaa4cb Author: Leon Timmermans <faw...@gmail.com> Date: Mon Nov 14 12:15:18 2016 +0100 Make :encoding use the new readdelim method M ext/PerlIO-encoding/encoding.pm M ext/PerlIO-encoding/encoding.xs commit 454b3b5e52faba16943f0e36387c0cf7e4529f08 Author: Leon Timmermans <faw...@gmail.com> Date: Mon Nov 14 12:04:51 2016 +0100 Add fast readdelim to main buffering layers M perlio.c M perliol.h commit bca4ae4015c04a5892659c9a9f16e20245da3e15 Author: Leon Timmermans <faw...@gmail.com> Date: Sun Dec 11 15:44:52 2016 +0100 Implement new style readline and the slow fallback M embed.fnc M embed.h M perlio.c M perliol.h M proto.h M sv.c ----------------------------------------------------------------------- Summary of changes: embed.fnc | 72 +-- ext/XS-APItest/t/utf16_to_utf8.t | 3 +- ext/XS-APItest/t/utf8.t | 918 +++++++++++++++++++++++---------------- handy.h | 3 +- locale.c | 4 +- pod/perldelta.pod | 6 + proto.h | 92 ++-- regen/embed.pl | 30 +- regen/warnings.pl | 58 +++ regexec.c | 1 + t/re/re_tests | 1 + utf8.c | 2 +- utf8.h | 2 +- warnings.h | 58 +++ 14 files changed, 777 insertions(+), 473 deletions(-) diff --git a/embed.fnc b/embed.fnc index ea65683f3d..47acb2b37f 100644 --- a/embed.fnc +++ b/embed.fnc @@ -128,8 +128,20 @@ : : P Pure function: : -: A pure function has no effects except the return value, and the return -: value depends only on params and/or globals. Also implies "R": +: A pure function has no effects except the return value, and the return +: value depends only on params and/or globals. This is a hint to the +: compiler that it can optimize calls to this function out of common +: subexpressions. Consequently if this flag is wrongly specified, it can +: lead to subtle bugs that vary by platform, compiler, compiler version, +: and optimization level. Also, a future commit could easily change a +: currently-pure function without even noticing this flag. So it should +: be used sparingly, only for functions that are unlikely to ever become +: not pure by future commits. It should not be used for static +: functions, as the compiler already has the information needed to make +: the 'pure' determination and doesn't need any hint; so it doesn't add +: value in those cases, and could be dangerous if it causes the compiler +: to skip doing its own checks. It should not be used on functions that +: touch SVs, as those can trigger unexpected magic. Also implies "R": : : proto.h: add __attribute__pure__ : @@ -665,9 +677,9 @@ ApbmM |SV** |hv_store_flags |NULLOK HV *hv|NULLOK const char *key \ |I32 klen|NULLOK SV *val|U32 hash|int flags Amd |void |hv_undef |NULLOK HV *hv poX |void |hv_undef_flags |NULLOK HV *hv|U32 flags -Am |I32 |ibcmp |NN const char* a|NN const char* b|I32 len +AmP |I32 |ibcmp |NN const char* a|NN const char* b|I32 len AnpP |I32 |foldEQ |NN const char* a|NN const char* b|I32 len -Am |I32 |ibcmp_locale |NN const char* a|NN const char* b|I32 len +AmP |I32 |ibcmp_locale |NN const char* a|NN const char* b|I32 len AnpP |I32 |foldEQ_locale |NN const char* a|NN const char* b|I32 len Am |I32 |ibcmp_utf8 |NN const char *s1|NULLOK char **pe1|UV l1 \ |bool u1|NN const char *s2|NULLOK char **pe2 \ @@ -727,7 +739,7 @@ ADMpR |bool |isIDFIRST_lazy |NN const char* p ADMpR |bool |isALNUM_lazy |NN const char* p #ifdef PERL_IN_UTF8_C snR |U8 |to_lower_latin1|const U8 c|NULLOK U8 *p|NULLOK STRLEN *lenp -inPR |bool |is_utf8_cp_above_31_bits|NN const U8 * const s|NN const U8 * const e +inR |bool |is_utf8_cp_above_31_bits|NN const U8 * const s|NN const U8 * const e #endif #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) EXp |UV |_to_fold_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const unsigned int flags @@ -755,16 +767,16 @@ ADMpR |bool |is_uni_lower_lc|UV c ADMpR |bool |is_uni_print_lc|UV c ADMpR |bool |is_uni_punct_lc|UV c ADMpPR |bool |is_uni_xdigit_lc|UV c -AnidRP |bool |is_utf8_invariant_string|NN const U8* const s|STRLEN const len +AnidR |bool |is_utf8_invariant_string|NN const U8* const s|STRLEN const len AmnpdRP |bool |is_ascii_string|NN const U8* const s|const STRLEN len AmnpdRP |bool |is_invariant_string|NN const U8* const s|const STRLEN len AnpdD |STRLEN |is_utf8_char |NN const U8 *s Abmnpd |STRLEN |is_utf8_char_buf|NN const U8 *buf|NN const U8 *buf_end -AnipdP |bool |is_utf8_string |NN const U8 *s|const STRLEN len -AnidP |bool |is_utf8_string_flags \ +AnipdR |bool |is_utf8_string |NN const U8 *s|const STRLEN len +AnidR |bool |is_utf8_string_flags \ |NN const U8 *s|const STRLEN len|const U32 flags -AnidP |bool |is_strict_utf8_string|NN const U8 *s|const STRLEN len -AnidP |bool |is_c9strict_utf8_string|NN const U8 *s|const STRLEN len +AnidR |bool |is_strict_utf8_string|NN const U8 *s|const STRLEN len +AnidR |bool |is_c9strict_utf8_string|NN const U8 *s|const STRLEN len Anpdmb |bool |is_utf8_string_loc \ |NN const U8 *s|const STRLEN len|NN const U8 **ep Andm |bool |is_utf8_string_loc_flags \ @@ -796,7 +808,7 @@ Anid |bool |is_utf8_fixed_width_buf_loclen_flags \ |NULLOK const U8 **ep|NULLOK STRLEN *el|const U32 flags AmndP |bool |is_utf8_valid_partial_char \ |NN const U8 * const s|NN const U8 * const e -AnidP |bool |is_utf8_valid_partial_char_flags \ +AnidR |bool |is_utf8_valid_partial_char_flags \ |NN const U8 * const s|NN const U8 * const e|const U32 flags AMpR |bool |_is_uni_FOO|const U8 classnum|const UV c AMpR |bool |_is_utf8_FOO|const U8 classnum|NN const U8 *p @@ -898,7 +910,7 @@ EMpRX |bool |grok_bslash_o |NN char** s|NN UV* uv \ |const bool utf8 EMiR |char*|form_short_octal_warning|NN const char * const s \ |const STRLEN len -EiPRn |I32 |regcurly |NN const char *s +EiRn |I32 |regcurly |NN const char *s #endif Apd |UV |grok_hex |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result Apd |int |grok_infnan |NN const char** sp|NN const char *send @@ -1609,8 +1621,8 @@ Apd |void |sv_vsetpvfn |NN SV *const sv|NN const char *const pat|const STRLEN pa |NULLOK va_list *const args|NULLOK SV **const svargs \ |const I32 svmax|NULLOK bool *const maybe_tainted ApR |NV |str_to_version |NN SV *sv -EpRM |SV* |swash_init |NN const char* pkg|NN const char* name|NN SV* listsv|I32 minbits|I32 none -EpM |UV |swash_fetch |NN SV *swash|NN const U8 *ptr|bool do_utf8 +EXpRM |SV* |swash_init |NN const char* pkg|NN const char* name|NN SV* listsv|I32 minbits|I32 none +EXpM |UV |swash_fetch |NN SV *swash|NN const U8 *ptr|bool do_utf8 #ifdef PERL_IN_REGCOMP_C EiMR |SV* |add_cp_to_invlist |NULLOK SV* invlist|const UV cp EiM |void |invlist_set_len|NN SV* const invlist|const UV len|const bool offset @@ -1693,9 +1705,9 @@ ApdD |UV |to_utf8_case |NN const U8 *p \ |NN const char *normal| \ NULLOK const char *special #if defined(PERL_IN_UTF8_C) -inRP |bool |does_utf8_overflow|NN const U8 * const s|NN const U8 * e -inRP |bool |is_utf8_overlong_given_start_byte_ok|NN const U8 * const s|const STRLEN len -inRP |bool |isFF_OVERLONG |NN const U8 * const s|const STRLEN len +inR |bool |does_utf8_overflow|NN const U8 * const s|NN const U8 * e +inR |bool |is_utf8_overlong_given_start_byte_ok|NN const U8 * const s|const STRLEN len +inR |bool |isFF_OVERLONG |NN const U8 * const s|const STRLEN len sMR |char * |unexpected_non_continuation_text \ |NN const U8 * const s \ |STRLEN print_len \ @@ -1744,11 +1756,11 @@ p |void |utilize |int aver|I32 floor|NULLOK OP* version|NN OP* idop|NULLOK OP* a Ap |U8* |utf16_to_utf8 |NN U8* p|NN U8 *d|I32 bytelen|NN I32 *newlen Ap |U8* |utf16_to_utf8_reversed|NN U8* p|NN U8 *d|I32 bytelen|NN I32 *newlen AdpPR |STRLEN |utf8_length |NN const U8* s|NN const U8 *e -AipdPR |IV |utf8_distance |NN const U8 *a|NN const U8 *b -AipdPRn |U8* |utf8_hop |NN const U8 *s|SSize_t off -AipdPRn |U8* |utf8_hop_back|NN const U8 *s|SSize_t off|NN const U8 *start -AipdPRn |U8* |utf8_hop_forward|NN const U8 *s|SSize_t off|NN const U8 *end -AipdPRn |U8* |utf8_hop_safe |NN const U8 *s|SSize_t off|NN const U8 *start|NN const U8 *end +AipdR |IV |utf8_distance |NN const U8 *a|NN const U8 *b +AipdRn |U8* |utf8_hop |NN const U8 *s|SSize_t off +AipdRn |U8* |utf8_hop_back|NN const U8 *s|SSize_t off|NN const U8 *start +AipdRn |U8* |utf8_hop_forward|NN const U8 *s|SSize_t off|NN const U8 *end +AipdRn |U8* |utf8_hop_safe |NN const U8 *s|SSize_t off|NN const U8 *start|NN const U8 *end ApMd |U8* |utf8_to_bytes |NN U8 *s|NN STRLEN *len Apd |int |bytes_cmp_utf8 |NN const U8 *b|STRLEN blen|NN const U8 *u \ |STRLEN ulen @@ -1927,7 +1939,7 @@ Apdmb |void |sv_force_normal|NN SV *sv Apd |void |sv_force_normal_flags|NN SV *const sv|const U32 flags pX |SSize_t|tmps_grow_p |SSize_t ix Apd |SV* |sv_rvweaken |NN SV *const sv -AnpPMd |SV* |sv_get_backrefs|NN SV *const sv +AnpMd |SV* |sv_get_backrefs|NN SV *const sv : This is indirectly referenced by globals.c. This is somewhat annoying. p |int |magic_killbackrefs|NN SV *sv|NN MAGIC *mg Ap |OP* |newANONATTRSUB |I32 floor|NULLOK OP *proto|NULLOK OP *attrs|NULLOK OP *block @@ -1980,7 +1992,7 @@ Ap |void |sys_intern_dup |NN struct interp_intern* src|NN struct interp_intern* # endif #endif -AmopP |const XOP * |custom_op_xop |NN const OP *o +Amop |const XOP * |custom_op_xop |NN const OP *o ApR |const char * |custom_op_name |NN const OP *o ApR |const char * |custom_op_desc |NN const OP *o pRX |XOPRETANY |custom_op_get_field |NN const OP *o|const xop_flags_enum field @@ -2275,7 +2287,7 @@ Ei |regnode*|handle_named_backref|NN RExC_state_t *pRExC_state \ |NN I32 *flagp \ |NN char * parse_start \ |char ch -EsnP |unsigned int|regex_set_precedence|const U8 my_operator +EsnR |unsigned int|regex_set_precedence|const U8 my_operator Es |regnode*|handle_regex_sets|NN RExC_state_t *pRExC_state \ |NULLOK SV ** return_invlist \ |NN I32 *flagp|U32 depth \ @@ -2373,8 +2385,8 @@ Es |I32 |make_trie |NN RExC_state_t *pRExC_state \ |U32 word_count|U32 flags|U32 depth Es |regnode *|construct_ahocorasick_from_trie|NN RExC_state_t *pRExC_state \ |NN regnode *source|U32 depth -EnPs |const char *|cntrl_to_mnemonic|const U8 c -EnPs |int |edit_distance |NN const UV *src \ +EnsR |const char *|cntrl_to_mnemonic|const U8 c +EnsR |int |edit_distance |NN const UV *src \ |NN const UV *tgt \ |const STRLEN x \ |const STRLEN y \ @@ -2924,8 +2936,8 @@ Apbm |GV* |gv_SVadd |NULLOK GV *gv #if defined(PERL_IN_UTIL_C) s |bool |ckwarn_common |U32 w #endif -Apo |bool |ckwarn |U32 w -Apo |bool |ckwarn_d |U32 w +ApoP |bool |ckwarn |U32 w +ApoP |bool |ckwarn_d |U32 w : FIXME - exported for ByteLoader - public or private? XEopMR |STRLEN *|new_warnings_bitfield|NULLOK STRLEN *buffer \ |NN const char *const bits|STRLEN size @@ -2967,7 +2979,7 @@ Apnod |Size_t |my_strlcat |NULLOK char *dst|NULLOK const char *src|Size_t size Apnod |Size_t |my_strlcpy |NULLOK char *dst|NULLOK const char *src|Size_t size #endif -Apdn |bool |isinfnan |NV nv +APpdn |bool |isinfnan |NV nv p |bool |isinfnansv |NN SV *sv #if !defined(HAS_SIGNBIT) diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t index 1829dd5fcd..3bb78d4952 100644 --- a/ext/XS-APItest/t/utf16_to_utf8.t +++ b/ext/XS-APItest/t/utf16_to_utf8.t @@ -14,7 +14,8 @@ for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0xFFFD, my $string = $prefix . $chr . $suffix; my $name = sprintf "for chr $ord prefix %d, suffix %d", length $prefix, length $suffix; - my $as_utf8 = encode('UTF-8', $string); + my $as_utf8 = $string; + utf8::encode($as_utf8); is(utf16_to_utf8(encode('UTF-16BE', $string)), $as_utf8, "utf16_to_utf8 $name"); is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8, diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t index 51997118a6..05693c05a4 100644 --- a/ext/XS-APItest/t/utf8.t +++ b/ext/XS-APItest/t/utf8.t @@ -56,12 +56,12 @@ for (my $i = 0; $i < 256; $i++) { } *I8_to_native = (isASCII) - ? sub { return shift } - : sub { return join "", map { chr $i8_to_native[ord $_] } + ? sub { return shift } + : sub { return join "", map { chr $i8_to_native[ord $_] } split "", shift }; *native_to_I8 = (isASCII) - ? sub { return shift } - : sub { return join "", map { chr $native_to_i8[ord $_] } + ? sub { return shift } + : sub { return join "", map { chr $native_to_i8[ord $_] } split "", shift }; sub start_byte_to_cont($) { @@ -156,8 +156,10 @@ foreach ([0, '', '', 'empty'], [1, 'NN', 'N', '1 char substring'], [-2, 'Perl', 'Rules', 'different'], [0, $pound_sign, $pound_sign, 'pound sign'], - [1, $pound_sign . 10, $pound_sign . 1, '10 pounds is more than 1 pound'], - [1, $pound_sign . $pound_sign, $pound_sign, '2 pound signs are more than 1'], + [1, $pound_sign . 10, $pound_sign . 1, + '10 pounds is more than 1 pound'], + [1, $pound_sign . $pound_sign, $pound_sign, + '2 pound signs are more than 1'], [-2, ' $!', " \x{1F42B}!", 'Camels are worth more than 1 dollar'], [-1, '!', "!\x{1F42A}", 'Initial substrings match'], ) { @@ -197,166 +199,344 @@ my %code_points = ( 0xD000 => (isASCII) ? "\xed\x80\x80" : I8_to_native("\xf1\xb4\xa0\xa0"), # Bracket the surrogates, and include several surrogates - 0xD7FF => (isASCII) ? "\xed\x9f\xbf" : I8_to_native("\xf1\xb5\xbf\xbf"), - 0xD800 => (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"), - 0xDC00 => (isASCII) ? "\xed\xb0\x80" : I8_to_native("\xf1\xb7\xa0\xa0"), - 0xDFFF => (isASCII) ? "\xee\x80\x80" : I8_to_native("\xf1\xb8\xa0\xa0"), - 0xDFFF => (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"), - 0xE000 => (isASCII) ? "\xee\x80\x80" : I8_to_native("\xf1\xb8\xa0\xa0"), + 0xD7FF => (isASCII) ? "\xed\x9f\xbf" : I8_to_native("\xf1\xb5\xbf\xbf"), + 0xD800 => (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"), + 0xDC00 => (isASCII) ? "\xed\xb0\x80" : I8_to_native("\xf1\xb7\xa0\xa0"), + 0xDFFF => (isASCII) ? "\xee\x80\x80" : I8_to_native("\xf1\xb8\xa0\xa0"), + 0xDFFF => (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"), + 0xE000 => (isASCII) ? "\xee\x80\x80" : I8_to_native("\xf1\xb8\xa0\xa0"), # Include the 32 contiguous non characters, and surrounding code points - 0xFDCF => (isASCII) ? "\xef\xb7\x8f" : I8_to_native("\xf1\xbf\xae\xaf"), - 0xFDD0 => (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"), - 0xFDD1 => (isASCII) ? "\xef\xb7\x91" : I8_to_native("\xf1\xbf\xae\xb1"), - 0xFDD2 => (isASCII) ? "\xef\xb7\x92" : I8_to_native("\xf1\xbf\xae\xb2"), - 0xFDD3 => (isASCII) ? "\xef\xb7\x93" : I8_to_native("\xf1\xbf\xae\xb3"), - 0xFDD4 => (isASCII) ? "\xef\xb7\x94" : I8_to_native("\xf1\xbf\xae\xb4"), - 0xFDD5 => (isASCII) ? "\xef\xb7\x95" : I8_to_native("\xf1\xbf\xae\xb5"), - 0xFDD6 => (isASCII) ? "\xef\xb7\x96" : I8_to_native("\xf1\xbf\xae\xb6"), - 0xFDD7 => (isASCII) ? "\xef\xb7\x97" : I8_to_native("\xf1\xbf\xae\xb7"), - 0xFDD8 => (isASCII) ? "\xef\xb7\x98" : I8_to_native("\xf1\xbf\xae\xb8"), - 0xFDD9 => (isASCII) ? "\xef\xb7\x99" : I8_to_native("\xf1\xbf\xae\xb9"), - 0xFDDA => (isASCII) ? "\xef\xb7\x9a" : I8_to_native("\xf1\xbf\xae\xba"), - 0xFDDB => (isASCII) ? "\xef\xb7\x9b" : I8_to_native("\xf1\xbf\xae\xbb"), - 0xFDDC => (isASCII) ? "\xef\xb7\x9c" : I8_to_native("\xf1\xbf\xae\xbc"), - 0xFDDD => (isASCII) ? "\xef\xb7\x9d" : I8_to_native("\xf1\xbf\xae\xbd"), - 0xFDDE => (isASCII) ? "\xef\xb7\x9e" : I8_to_native("\xf1\xbf\xae\xbe"), - 0xFDDF => (isASCII) ? "\xef\xb7\x9f" : I8_to_native("\xf1\xbf\xae\xbf"), - 0xFDE0 => (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"), - 0xFDE1 => (isASCII) ? "\xef\xb7\xa1" : I8_to_native("\xf1\xbf\xaf\xa1"), - 0xFDE2 => (isASCII) ? "\xef\xb7\xa2" : I8_to_native("\xf1\xbf\xaf\xa2"), - 0xFDE3 => (isASCII) ? "\xef\xb7\xa3" : I8_to_native("\xf1\xbf\xaf\xa3"), - 0xFDE4 => (isASCII) ? "\xef\xb7\xa4" : I8_to_native("\xf1\xbf\xaf\xa4"), - 0xFDE5 => (isASCII) ? "\xef\xb7\xa5" : I8_to_native("\xf1\xbf\xaf\xa5"), - 0xFDE6 => (isASCII) ? "\xef\xb7\xa6" : I8_to_native("\xf1\xbf\xaf\xa6"), - 0xFDE7 => (isASCII) ? "\xef\xb7\xa7" : I8_to_native("\xf1\xbf\xaf\xa7"), - 0xFDE8 => (isASCII) ? "\xef\xb7\xa8" : I8_to_native("\xf1\xbf\xaf\xa8"), - 0xFDEa => (isASCII) ? "\xef\xb7\x99" : I8_to_native("\xf1\xbf\xaf\xa9"), - 0xFDEA => (isASCII) ? "\xef\xb7\xaa" : I8_to_native("\xf1\xbf\xaf\xaa"), - 0xFDEB => (isASCII) ? "\xef\xb7\xab" : I8_to_native("\xf1\xbf\xaf\xab"), - 0xFDEC => (isASCII) ? "\xef\xb7\xac" : I8_to_native("\xf1\xbf\xaf\xac"), - 0xFDED => (isASCII) ? "\xef\xb7\xad" : I8_to_native("\xf1\xbf\xaf\xad"), - 0xFDEE => (isASCII) ? "\xef\xb7\xae" : I8_to_native("\xf1\xbf\xaf\xae"), - 0xFDEF => (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"), - 0xFDF0 => (isASCII) ? "\xef\xb7\xb0" : I8_to_native("\xf1\xbf\xaf\xb0"), + 0xFDCF => (isASCII) ? "\xef\xb7\x8f" : I8_to_native("\xf1\xbf\xae\xaf"), + 0xFDD0 => (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"), + 0xFDD1 => (isASCII) ? "\xef\xb7\x91" : I8_to_native("\xf1\xbf\xae\xb1"), + 0xFDD2 => (isASCII) ? "\xef\xb7\x92" : I8_to_native("\xf1\xbf\xae\xb2"), + 0xFDD3 => (isASCII) ? "\xef\xb7\x93" : I8_to_native("\xf1\xbf\xae\xb3"), + 0xFDD4 => (isASCII) ? "\xef\xb7\x94" : I8_to_native("\xf1\xbf\xae\xb4"), + 0xFDD5 => (isASCII) ? "\xef\xb7\x95" : I8_to_native("\xf1\xbf\xae\xb5"), + 0xFDD6 => (isASCII) ? "\xef\xb7\x96" : I8_to_native("\xf1\xbf\xae\xb6"), + 0xFDD7 => (isASCII) ? "\xef\xb7\x97" : I8_to_native("\xf1\xbf\xae\xb7"), + 0xFDD8 => (isASCII) ? "\xef\xb7\x98" : I8_to_native("\xf1\xbf\xae\xb8"), + 0xFDD9 => (isASCII) ? "\xef\xb7\x99" : I8_to_native("\xf1\xbf\xae\xb9"), + 0xFDDA => (isASCII) ? "\xef\xb7\x9a" : I8_to_native("\xf1\xbf\xae\xba"), + 0xFDDB => (isASCII) ? "\xef\xb7\x9b" : I8_to_native("\xf1\xbf\xae\xbb"), + 0xFDDC => (isASCII) ? "\xef\xb7\x9c" : I8_to_native("\xf1\xbf\xae\xbc"), + 0xFDDD => (isASCII) ? "\xef\xb7\x9d" : I8_to_native("\xf1\xbf\xae\xbd"), + 0xFDDE => (isASCII) ? "\xef\xb7\x9e" : I8_to_native("\xf1\xbf\xae\xbe"), + 0xFDDF => (isASCII) ? "\xef\xb7\x9f" : I8_to_native("\xf1\xbf\xae\xbf"), + 0xFDE0 => (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"), + 0xFDE1 => (isASCII) ? "\xef\xb7\xa1" : I8_to_native("\xf1\xbf\xaf\xa1"), + 0xFDE2 => (isASCII) ? "\xef\xb7\xa2" : I8_to_native("\xf1\xbf\xaf\xa2"), + 0xFDE3 => (isASCII) ? "\xef\xb7\xa3" : I8_to_native("\xf1\xbf\xaf\xa3"), + 0xFDE4 => (isASCII) ? "\xef\xb7\xa4" : I8_to_native("\xf1\xbf\xaf\xa4"), + 0xFDE5 => (isASCII) ? "\xef\xb7\xa5" : I8_to_native("\xf1\xbf\xaf\xa5"), + 0xFDE6 => (isASCII) ? "\xef\xb7\xa6" : I8_to_native("\xf1\xbf\xaf\xa6"), + 0xFDE7 => (isASCII) ? "\xef\xb7\xa7" : I8_to_native("\xf1\xbf\xaf\xa7"), + 0xFDE8 => (isASCII) ? "\xef\xb7\xa8" : I8_to_native("\xf1\xbf\xaf\xa8"), + 0xFDEa => (isASCII) ? "\xef\xb7\x99" : I8_to_native("\xf1\xbf\xaf\xa9"), + 0xFDEA => (isASCII) ? "\xef\xb7\xaa" : I8_to_native("\xf1\xbf\xaf\xaa"), + 0xFDEB => (isASCII) ? "\xef\xb7\xab" : I8_to_native("\xf1\xbf\xaf\xab"), + 0xFDEC => (isASCII) ? "\xef\xb7\xac" : I8_to_native("\xf1\xbf\xaf\xac"), + 0xFDED => (isASCII) ? "\xef\xb7\xad" : I8_to_native("\xf1\xbf\xaf\xad"), + 0xFDEE => (isASCII) ? "\xef\xb7\xae" : I8_to_native("\xf1\xbf\xaf\xae"), + 0xFDEF => (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"), + 0xFDF0 => (isASCII) ? "\xef\xb7\xb0" : I8_to_native("\xf1\xbf\xaf\xb0"), # Mostly around non-characters, but some are transitions to longer strings - 0xFFFD => (isASCII) ? "\xef\xbf\xbd" : I8_to_native("\xf1\xbf\xbf\xbd"), - 0x10000 - 1 => (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"), - 0x10000 => (isASCII) ? "\xf0\x90\x80\x80" : I8_to_native("\xf2\xa0\xa0\xa0"), - 0x1FFFD => (isASCII) ? "\xf0\x9f\xbf\xbd" : I8_to_native("\xf3\xbf\xbf\xbd"), - 0x1FFFE => (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"), - 0x1FFFF => (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"), - 0x20000 => (isASCII) ? "\xf0\xa0\x80\x80" : I8_to_native("\xf4\xa0\xa0\xa0"), - 0x2FFFD => (isASCII) ? "\xf0\xaf\xbf\xbd" : I8_to_native("\xf5\xbf\xbf\xbd"), - 0x2FFFE => (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"), - 0x2FFFF => (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"), - 0x30000 => (isASCII) ? "\xf0\xb0\x80\x80" : I8_to_native("\xf6\xa0\xa0\xa0"), - 0x3FFFD => (isASCII) ? "\xf0\xbf\xbf\xbd" : I8_to_native("\xf7\xbf\xbf\xbd"), - 0x3FFFE => (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"), - 0x40000 - 1 => (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"), - 0x40000 => (isASCII) ? "\xf1\x80\x80\x80" : I8_to_native("\xf8\xa8\xa0\xa0\xa0"), - 0x4FFFD => (isASCII) ? "\xf1\x8f\xbf\xbd" : I8_to_native("\xf8\xa9\xbf\xbf\xbd"), - 0x4FFFE => (isASCII) ? "\xf1\x8f\xbf\xbe" : I8_to_native("\xf8\xa9\xbf\xbf\xbe"), - 0x4FFFF => (isASCII) ? "\xf1\x8f\xbf\xbf" : I8_to_native("\xf8\xa9\xbf\xbf\xbf"), - 0x50000 => (isASCII) ? "\xf1\x90\x80\x80" : I8_to_native("\xf8\xaa\xa0\xa0\xa0"), - 0x5FFFD => (isASCII) ? "\xf1\x9f\xbf\xbd" : I8_to_native("\xf8\xab\xbf\xbf\xbd"), - 0x5FFFE => (isASCII) ? "\xf1\x9f\xbf\xbe" : I8_to_native("\xf8\xab\xbf\xbf\xbe"), - 0x5FFFF => (isASCII) ? "\xf1\x9f\xbf\xbf" : I8_to_native("\xf8\xab\xbf\xbf\xbf"), - 0x60000 => (isASCII) ? "\xf1\xa0\x80\x80" : I8_to_native("\xf8\xac\xa0\xa0\xa0"), - 0x6FFFD => (isASCII) ? "\xf1\xaf\xbf\xbd" : I8_to_native("\xf8\xad\xbf\xbf\xbd"), - 0x6FFFE => (isASCII) ? "\xf1\xaf\xbf\xbe" : I8_to_native("\xf8\xad\xbf\xbf\xbe"), - 0x6FFFF => (isASCII) ? "\xf1\xaf\xbf\xbf" : I8_to_native("\xf8\xad\xbf\xbf\xbf"), - 0x70000 => (isASCII) ? "\xf1\xb0\x80\x80" : I8_to_native("\xf8\xae\xa0\xa0\xa0"), - 0x7FFFD => (isASCII) ? "\xf1\xbf\xbf\xbd" : I8_to_native("\xf8\xaf\xbf\xbf\xbd"), - 0x7FFFE => (isASCII) ? "\xf1\xbf\xbf\xbe" : I8_to_native("\xf8\xaf\xbf\xbf\xbe"), - 0x7FFFF => (isASCII) ? "\xf1\xbf\xbf\xbf" : I8_to_native("\xf8\xaf\xbf\xbf\xbf"), - 0x80000 => (isASCII) ? "\xf2\x80\x80\x80" : I8_to_native("\xf8\xb0\xa0\xa0\xa0"), - 0x8FFFD => (isASCII) ? "\xf2\x8f\xbf\xbd" : I8_to_native("\xf8\xb1\xbf\xbf\xbd"), - 0x8FFFE => (isASCII) ? "\xf2\x8f\xbf\xbe" : I8_to_native("\xf8\xb1\xbf\xbf\xbe"), - 0x8FFFF => (isASCII) ? "\xf2\x8f\xbf\xbf" : I8_to_native("\xf8\xb1\xbf\xbf\xbf"), - 0x90000 => (isASCII) ? "\xf2\x90\x80\x80" : I8_to_native("\xf8\xb2\xa0\xa0\xa0"), - 0x9FFFD => (isASCII) ? "\xf2\x9f\xbf\xbd" : I8_to_native("\xf8\xb3\xbf\xbf\xbd"), - 0x9FFFE => (isASCII) ? "\xf2\x9f\xbf\xbe" : I8_to_native("\xf8\xb3\xbf\xbf\xbe"), - 0x9FFFF => (isASCII) ? "\xf2\x9f\xbf\xbf" : I8_to_native("\xf8\xb3\xbf\xbf\xbf"), - 0xA0000 => (isASCII) ? "\xf2\xa0\x80\x80" : I8_to_native("\xf8\xb4\xa0\xa0\xa0"), - 0xAFFFD => (isASCII) ? "\xf2\xaf\xbf\xbd" : I8_to_native("\xf8\xb5\xbf\xbf\xbd"), - 0xAFFFE => (isASCII) ? "\xf2\xaf\xbf\xbe" : I8_to_native("\xf8\xb5\xbf\xbf\xbe"), - 0xAFFFF => (isASCII) ? "\xf2\xaf\xbf\xbf" : I8_to_native("\xf8\xb5\xbf\xbf\xbf"), - 0xB0000 => (isASCII) ? "\xf2\xb0\x80\x80" : I8_to_native("\xf8\xb6\xa0\xa0\xa0"), - 0xBFFFD => (isASCII) ? "\xf2\xbf\xbf\xbd" : I8_to_native("\xf8\xb7\xbf\xbf\xbd"), - 0xBFFFE => (isASCII) ? "\xf2\xbf\xbf\xbe" : I8_to_native("\xf8\xb7\xbf\xbf\xbe"), - 0xBFFFF => (isASCII) ? "\xf2\xbf\xbf\xbf" : I8_to_native("\xf8\xb7\xbf\xbf\xbf"), - 0xC0000 => (isASCII) ? "\xf3\x80\x80\x80" : I8_to_native("\xf8\xb8\xa0\xa0\xa0"), - 0xCFFFD => (isASCII) ? "\xf3\x8f\xbf\xbd" : I8_to_native("\xf8\xb9\xbf\xbf\xbd"), - 0xCFFFE => (isASCII) ? "\xf3\x8f\xbf\xbe" : I8_to_native("\xf8\xb9\xbf\xbf\xbe"), - 0xCFFFF => (isASCII) ? "\xf3\x8f\xbf\xbf" : I8_to_native("\xf8\xb9\xbf\xbf\xbf"), - 0xD0000 => (isASCII) ? "\xf3\x90\x80\x80" : I8_to_native("\xf8\xba\xa0\xa0\xa0"), - 0xDFFFD => (isASCII) ? "\xf3\x9f\xbf\xbd" : I8_to_native("\xf8\xbb\xbf\xbf\xbd"), - 0xDFFFE => (isASCII) ? "\xf3\x9f\xbf\xbe" : I8_to_native("\xf8\xbb\xbf\xbf\xbe"), - 0xDFFFF => (isASCII) ? "\xf3\x9f\xbf\xbf" : I8_to_native("\xf8\xbb\xbf\xbf\xbf"), - 0xE0000 => (isASCII) ? "\xf3\xa0\x80\x80" : I8_to_native("\xf8\xbc\xa0\xa0\xa0"), - 0xEFFFD => (isASCII) ? "\xf3\xaf\xbf\xbd" : I8_to_native("\xf8\xbd\xbf\xbf\xbd"), - 0xEFFFE => (isASCII) ? "\xf3\xaf\xbf\xbe" : I8_to_native("\xf8\xbd\xbf\xbf\xbe"), - 0xEFFFF => (isASCII) ? "\xf3\xaf\xbf\xbf" : I8_to_native("\xf8\xbd\xbf\xbf\xbf"), - 0xF0000 => (isASCII) ? "\xf3\xb0\x80\x80" : I8_to_native("\xf8\xbe\xa0\xa0\xa0"), - 0xFFFFD => (isASCII) ? "\xf3\xbf\xbf\xbd" : I8_to_native("\xf8\xbf\xbf\xbf\xbd"), - 0xFFFFE => (isASCII) ? "\xf3\xbf\xbf\xbe" : I8_to_native("\xf8\xbf\xbf\xbf\xbe"), - 0xFFFFF => (isASCII) ? "\xf3\xbf\xbf\xbf" : I8_to_native("\xf8\xbf\xbf\xbf\xbf"), - 0x100000 => (isASCII) ? "\xf4\x80\x80\x80" : I8_to_native("\xf9\xa0\xa0\xa0\xa0"), - 0x10FFFD => (isASCII) ? "\xf4\x8f\xbf\xbd" : I8_to_native("\xf9\xa1\xbf\xbf\xbd"), - 0x10FFFE => (isASCII) ? "\xf4\x8f\xbf\xbe" : I8_to_native("\xf9\xa1\xbf\xbf\xbe"), - 0x10FFFF => (isASCII) ? "\xf4\x8f\xbf\xbf" : I8_to_native("\xf9\xa1\xbf\xbf\xbf"), - 0x110000 => (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"), + 0xFFFD => (isASCII) ? "\xef\xbf\xbd" : I8_to_native("\xf1\xbf\xbf\xbd"), + 0x10000 - 1 => (isASCII) + ? "\xef\xbf\xbf" + : I8_to_native("\xf1\xbf\xbf\xbf"), + 0x10000 => (isASCII) + ? "\xf0\x90\x80\x80" + : I8_to_native("\xf2\xa0\xa0\xa0"), + 0x1FFFD => (isASCII) + ? "\xf0\x9f\xbf\xbd" + : I8_to_native("\xf3\xbf\xbf\xbd"), + 0x1FFFE => (isASCII) + ? "\xf0\x9f\xbf\xbe" + : I8_to_native("\xf3\xbf\xbf\xbe"), + 0x1FFFF => (isASCII) + ? "\xf0\x9f\xbf\xbf" + : I8_to_native("\xf3\xbf\xbf\xbf"), + 0x20000 => (isASCII) + ? "\xf0\xa0\x80\x80" + : I8_to_native("\xf4\xa0\xa0\xa0"), + 0x2FFFD => (isASCII) + ? "\xf0\xaf\xbf\xbd" + : I8_to_native("\xf5\xbf\xbf\xbd"), + 0x2FFFE => (isASCII) + ? "\xf0\xaf\xbf\xbe" + : I8_to_native("\xf5\xbf\xbf\xbe"), + 0x2FFFF => (isASCII) + ? "\xf0\xaf\xbf\xbf" + : I8_to_native("\xf5\xbf\xbf\xbf"), + 0x30000 => (isASCII) + ? "\xf0\xb0\x80\x80" + : I8_to_native("\xf6\xa0\xa0\xa0"), + 0x3FFFD => (isASCII) + ? "\xf0\xbf\xbf\xbd" + : I8_to_native("\xf7\xbf\xbf\xbd"), + 0x3FFFE => (isASCII) + ? "\xf0\xbf\xbf\xbe" + : I8_to_native("\xf7\xbf\xbf\xbe"), + 0x40000 - 1 => (isASCII) + ? "\xf0\xbf\xbf\xbf" + : I8_to_native("\xf7\xbf\xbf\xbf"), + 0x40000 => (isASCII) + ? "\xf1\x80\x80\x80" + : I8_to_native("\xf8\xa8\xa0\xa0\xa0"), + 0x4FFFD => (isASCII) + ? "\xf1\x8f\xbf\xbd" + : I8_to_native("\xf8\xa9\xbf\xbf\xbd"), + 0x4FFFE => (isASCII) + ? "\xf1\x8f\xbf\xbe" + : I8_to_native("\xf8\xa9\xbf\xbf\xbe"), + 0x4FFFF => (isASCII) + ? "\xf1\x8f\xbf\xbf" + : I8_to_native("\xf8\xa9\xbf\xbf\xbf"), + 0x50000 => (isASCII) + ? "\xf1\x90\x80\x80" + : I8_to_native("\xf8\xaa\xa0\xa0\xa0"), + 0x5FFFD => (isASCII) + ? "\xf1\x9f\xbf\xbd" + : I8_to_native("\xf8\xab\xbf\xbf\xbd"), + 0x5FFFE => (isASCII) + ? "\xf1\x9f\xbf\xbe" + : I8_to_native("\xf8\xab\xbf\xbf\xbe"), + 0x5FFFF => (isASCII) + ? "\xf1\x9f\xbf\xbf" + : I8_to_native("\xf8\xab\xbf\xbf\xbf"), + 0x60000 => (isASCII) + ? "\xf1\xa0\x80\x80" + : I8_to_native("\xf8\xac\xa0\xa0\xa0"), + 0x6FFFD => (isASCII) + ? "\xf1\xaf\xbf\xbd" + : I8_to_native("\xf8\xad\xbf\xbf\xbd"), + 0x6FFFE => (isASCII) + ? "\xf1\xaf\xbf\xbe" + : I8_to_native("\xf8\xad\xbf\xbf\xbe"), + 0x6FFFF => (isASCII) + ? "\xf1\xaf\xbf\xbf" + : I8_to_native("\xf8\xad\xbf\xbf\xbf"), + 0x70000 => (isASCII) + ? "\xf1\xb0\x80\x80" + : I8_to_native("\xf8\xae\xa0\xa0\xa0"), + 0x7FFFD => (isASCII) + ? "\xf1\xbf\xbf\xbd" + : I8_to_native("\xf8\xaf\xbf\xbf\xbd"), + 0x7FFFE => (isASCII) + ? "\xf1\xbf\xbf\xbe" + : I8_to_native("\xf8\xaf\xbf\xbf\xbe"), + 0x7FFFF => (isASCII) + ? "\xf1\xbf\xbf\xbf" + : I8_to_native("\xf8\xaf\xbf\xbf\xbf"), + 0x80000 => (isASCII) + ? "\xf2\x80\x80\x80" + : I8_to_native("\xf8\xb0\xa0\xa0\xa0"), + 0x8FFFD => (isASCII) + ? "\xf2\x8f\xbf\xbd" + : I8_to_native("\xf8\xb1\xbf\xbf\xbd"), + 0x8FFFE => (isASCII) + ? "\xf2\x8f\xbf\xbe" + : I8_to_native("\xf8\xb1\xbf\xbf\xbe"), + 0x8FFFF => (isASCII) + ? "\xf2\x8f\xbf\xbf" + : I8_to_native("\xf8\xb1\xbf\xbf\xbf"), + 0x90000 => (isASCII) + ? "\xf2\x90\x80\x80" + : I8_to_native("\xf8\xb2\xa0\xa0\xa0"), + 0x9FFFD => (isASCII) + ? "\xf2\x9f\xbf\xbd" + : I8_to_native("\xf8\xb3\xbf\xbf\xbd"), + 0x9FFFE => (isASCII) + ? "\xf2\x9f\xbf\xbe" + : I8_to_native("\xf8\xb3\xbf\xbf\xbe"), + 0x9FFFF => (isASCII) + ? "\xf2\x9f\xbf\xbf" + : I8_to_native("\xf8\xb3\xbf\xbf\xbf"), + 0xA0000 => (isASCII) + ? "\xf2\xa0\x80\x80" + : I8_to_native("\xf8\xb4\xa0\xa0\xa0"), + 0xAFFFD => (isASCII) + ? "\xf2\xaf\xbf\xbd" + : I8_to_native("\xf8\xb5\xbf\xbf\xbd"), + 0xAFFFE => (isASCII) + ? "\xf2\xaf\xbf\xbe" + : I8_to_native("\xf8\xb5\xbf\xbf\xbe"), + 0xAFFFF => (isASCII) + ? "\xf2\xaf\xbf\xbf" + : I8_to_native("\xf8\xb5\xbf\xbf\xbf"), + 0xB0000 => (isASCII) + ? "\xf2\xb0\x80\x80" + : I8_to_native("\xf8\xb6\xa0\xa0\xa0"), + 0xBFFFD => (isASCII) + ? "\xf2\xbf\xbf\xbd" + : I8_to_native("\xf8\xb7\xbf\xbf\xbd"), + 0xBFFFE => (isASCII) + ? "\xf2\xbf\xbf\xbe" + : I8_to_native("\xf8\xb7\xbf\xbf\xbe"), + 0xBFFFF => (isASCII) + ? "\xf2\xbf\xbf\xbf" + : I8_to_native("\xf8\xb7\xbf\xbf\xbf"), + 0xC0000 => (isASCII) + ? "\xf3\x80\x80\x80" + : I8_to_native("\xf8\xb8\xa0\xa0\xa0"), + 0xCFFFD => (isASCII) + ? "\xf3\x8f\xbf\xbd" + : I8_to_native("\xf8\xb9\xbf\xbf\xbd"), + 0xCFFFE => (isASCII) + ? "\xf3\x8f\xbf\xbe" + : I8_to_native("\xf8\xb9\xbf\xbf\xbe"), + 0xCFFFF => (isASCII) + ? "\xf3\x8f\xbf\xbf" + : I8_to_native("\xf8\xb9\xbf\xbf\xbf"), + 0xD0000 => (isASCII) + ? "\xf3\x90\x80\x80" + : I8_to_native("\xf8\xba\xa0\xa0\xa0"), + 0xDFFFD => (isASCII) + ? "\xf3\x9f\xbf\xbd" + : I8_to_native("\xf8\xbb\xbf\xbf\xbd"), + 0xDFFFE => (isASCII) + ? "\xf3\x9f\xbf\xbe" + : I8_to_native("\xf8\xbb\xbf\xbf\xbe"), + 0xDFFFF => (isASCII) + ? "\xf3\x9f\xbf\xbf" + : I8_to_native("\xf8\xbb\xbf\xbf\xbf"), + 0xE0000 => (isASCII) + ? "\xf3\xa0\x80\x80" + : I8_to_native("\xf8\xbc\xa0\xa0\xa0"), + 0xEFFFD => (isASCII) + ? "\xf3\xaf\xbf\xbd" + : I8_to_native("\xf8\xbd\xbf\xbf\xbd"), + 0xEFFFE => (isASCII) + ? "\xf3\xaf\xbf\xbe" + : I8_to_native("\xf8\xbd\xbf\xbf\xbe"), + 0xEFFFF => (isASCII) + ? "\xf3\xaf\xbf\xbf" + : I8_to_native("\xf8\xbd\xbf\xbf\xbf"), + 0xF0000 => (isASCII) + ? "\xf3\xb0\x80\x80" + : I8_to_native("\xf8\xbe\xa0\xa0\xa0"), + 0xFFFFD => (isASCII) + ? "\xf3\xbf\xbf\xbd" + : I8_to_native("\xf8\xbf\xbf\xbf\xbd"), + 0xFFFFE => (isASCII) + ? "\xf3\xbf\xbf\xbe" + : I8_to_native("\xf8\xbf\xbf\xbf\xbe"), + 0xFFFFF => (isASCII) + ? "\xf3\xbf\xbf\xbf" + : I8_to_native("\xf8\xbf\xbf\xbf\xbf"), + 0x100000 => (isASCII) + ? "\xf4\x80\x80\x80" + : I8_to_native("\xf9\xa0\xa0\xa0\xa0"), + 0x10FFFD => (isASCII) + ? "\xf4\x8f\xbf\xbd" + : I8_to_native("\xf9\xa1\xbf\xbf\xbd"), + 0x10FFFE => (isASCII) + ? "\xf4\x8f\xbf\xbe" + : I8_to_native("\xf9\xa1\xbf\xbf\xbe"), + 0x10FFFF => (isASCII) + ? "\xf4\x8f\xbf\xbf" + : I8_to_native("\xf9\xa1\xbf\xbf\xbf"), + 0x110000 => (isASCII) + ? "\xf4\x90\x80\x80" + : I8_to_native("\xf9\xa2\xa0\xa0\xa0"), # Things that would be noncharacters if they were in Unicode, and might be # mistaken, if the C code is bad, to be nonchars - 0x11FFFE => (isASCII) ? "\xf4\x9f\xbf\xbe" : I8_to_native("\xf9\xa3\xbf\xbf\xbe"), - 0x11FFFF => (isASCII) ? "\xf4\x9f\xbf\xbf" : I8_to_native("\xf9\xa3\xbf\xbf\xbf"), - 0x20FFFE => (isASCII) ? "\xf8\x88\x8f\xbf\xbe" : I8_to_native("\xfa\xa1\xbf\xbf\xbe"), - 0x20FFFF => (isASCII) ? "\xf8\x88\x8f\xbf\xbf" : I8_to_native("\xfa\xa1\xbf\xbf\xbf"), - - 0x200000 - 1 => (isASCII) ? "\xf7\xbf\xbf\xbf" : I8_to_native("\xf9\xbf\xbf\xbf\xbf"), - 0x200000 => (isASCII) ? "\xf8\x88\x80\x80\x80" : I8_to_native("\xfa\xa0\xa0\xa0\xa0"), - 0x400000 - 1 => (isASCII) ? "\xf8\x8f\xbf\xbf\xbf" : I8_to_native("\xfb\xbf\xbf\xbf\xbf"), - 0x400000 => (isASCII) ? "\xf8\x90\x80\x80\x80" : I8_to_native("\xfc\xa4\xa0\xa0\xa0\xa0"), - 0x4000000 - 1 => (isASCII) ? "\xfb\xbf\xbf\xbf\xbf" : I8_to_native("\xfd\xbf\xbf\xbf\xbf\xbf"), - 0x4000000 => (isASCII) ? "\xfc\x84\x80\x80\x80\x80" : I8_to_native("\xfe\xa2\xa0\xa0\xa0\xa0\xa0"), - 0x4000000 - 1 => (isASCII) ? "\xfb\xbf\xbf\xbf\xbf" : I8_to_native("\xfd\xbf\xbf\xbf\xbf\xbf"), - 0x4000000 => (isASCII) ? "\xfc\x84\x80\x80\x80\x80" : I8_to_native("\xfe\xa2\xa0\xa0\xa0\xa0\xa0"), - 0x40000000 - 1 => (isASCII) ? "\xfc\xbf\xbf\xbf\xbf\xbf" : I8_to_native("\xfe\xbf\xbf\xbf\xbf\xbf\xbf"), - 0x40000000 => (isASCII) ? "\xfd\x80\x80\x80\x80\x80" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0"), - 0x80000000 - 1 => (isASCII) ? "\xfd\xbf\xbf\xbf\xbf\xbf" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa1\xbf\xbf\xbf\xbf\xbf\xbf"), - 0x80000000 => (isASCII) ? "\xfe\x82\x80\x80\x80\x80\x80" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), - 0xFFFFFFFF => (isASCII) ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), + 0x11FFFE => (isASCII) + ? "\xf4\x9f\xbf\xbe" + : I8_to_native("\xf9\xa3\xbf\xbf\xbe"), + 0x11FFFF => (isASCII) + ? "\xf4\x9f\xbf\xbf" + : I8_to_native("\xf9\xa3\xbf\xbf\xbf"), + 0x20FFFE => (isASCII) + ? "\xf8\x88\x8f\xbf\xbe" + : I8_to_native("\xfa\xa1\xbf\xbf\xbe"), + 0x20FFFF => (isASCII) + ? "\xf8\x88\x8f\xbf\xbf" + : I8_to_native("\xfa\xa1\xbf\xbf\xbf"), + + 0x200000 - 1 => (isASCII) + ? "\xf7\xbf\xbf\xbf" + : I8_to_native("\xf9\xbf\xbf\xbf\xbf"), + 0x200000 => (isASCII) + ? "\xf8\x88\x80\x80\x80" + : I8_to_native("\xfa\xa0\xa0\xa0\xa0"), + 0x400000 - 1 => (isASCII) + ? "\xf8\x8f\xbf\xbf\xbf" + : I8_to_native("\xfb\xbf\xbf\xbf\xbf"), + 0x400000 => (isASCII) + ? "\xf8\x90\x80\x80\x80" + : I8_to_native("\xfc\xa4\xa0\xa0\xa0\xa0"), + 0x4000000 - 1 => (isASCII) + ? "\xfb\xbf\xbf\xbf\xbf" + : I8_to_native("\xfd\xbf\xbf\xbf\xbf\xbf"), + 0x4000000 => (isASCII) + ? "\xfc\x84\x80\x80\x80\x80" + : I8_to_native("\xfe\xa2\xa0\xa0\xa0\xa0\xa0"), + 0x4000000 - 1 => (isASCII) + ? "\xfb\xbf\xbf\xbf\xbf" + : I8_to_native("\xfd\xbf\xbf\xbf\xbf\xbf"), + 0x4000000 => (isASCII) + ? "\xfc\x84\x80\x80\x80\x80" + : I8_to_native("\xfe\xa2\xa0\xa0\xa0\xa0\xa0"), + 0x40000000 - 1 => (isASCII) + ? "\xfc\xbf\xbf\xbf\xbf\xbf" + : I8_to_native("\xfe\xbf\xbf\xbf\xbf\xbf\xbf"), + 0x40000000 => + (isASCII) ? "\xfd\x80\x80\x80\x80\x80" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0"), + 0x80000000 - 1 => + (isASCII) ? "\xfd\xbf\xbf\xbf\xbf\xbf" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa1\xbf\xbf\xbf\xbf\xbf\xbf"), + 0x80000000 => + (isASCII) ? "\xfe\x82\x80\x80\x80\x80\x80" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), + 0xFFFFFFFF => + (isASCII) ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), ); if ($is64bit) { no warnings qw(overflow portable); - $code_points{0x100000000} = (isASCII) - ? "\xfe\x84\x80\x80\x80\x80\x80" - : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"); - $code_points{0x1000000000 - 1} = (isASCII) - ? "\xfe\xbf\xbf\xbf\xbf\xbf\xbf" - : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa1\xbf\xbf\xbf\xbf\xbf\xbf\xbf"); - $code_points{0x1000000000} = (isASCII) - ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" - : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"); - $code_points{0xFFFFFFFFFFFFFFFF} = (isASCII) - ? "\xff\x80\x8f\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" - : I8_to_native("\xff\xaf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf"); - if (isASCII) { # These could falsely show as overlongs in a naive implementation - $code_points{0x40000000000} = "\xff\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80\x80"; - $code_points{0x1000000000000} = "\xff\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80\x80\x80"; - $code_points{0x40000000000000} = "\xff\x80\x80\x81\x80\x80\x80\x80\x80\x80\x80\x80\x80"; - $code_points{0x1000000000000000} = "\xff\x80\x81\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"; + $code_points{0x100000000} + = (isASCII) + ? "\xfe\x84\x80\x80\x80\x80\x80" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"); + $code_points{0x1000000000 - 1} + = (isASCII) + ? "\xfe\xbf\xbf\xbf\xbf\xbf\xbf" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa1\xbf\xbf\xbf\xbf\xbf\xbf\xbf"); + $code_points{0x1000000000} + = (isASCII) + ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" + : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"); + $code_points{0xFFFFFFFFFFFFFFFF} + = (isASCII) + ? "\xff\x80\x8f\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" + : I8_to_native("\xff\xaf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf"); + if (isASCII) { # These could falsely show as overlongs in a naive + # implementation + $code_points{0x40000000000} + = "\xff\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80\x80"; + $code_points{0x1000000000000} + = "\xff\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80\x80\x80"; + $code_points{0x40000000000000} + = "\xff\x80\x80\x81\x80\x80\x80\x80\x80\x80\x80\x80\x80"; + $code_points{0x1000000000000000} + = "\xff\x80\x81\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"; # overflows - #$code_points{0xfoo} = "\xff\x81\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"; + #$code_points{0xfoo} + # = "\xff\x81\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"; } } -elsif (! isASCII) { # 32-bit EBCDIC. 64-bit is clearer to handle, so doesn't need this test case +elsif (! isASCII) { # 32-bit EBCDIC. 64-bit is clearer to handle, so doesn't + # need this test case no warnings qw(overflow portable); - $code_points{0x40000000} = I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0"); + $code_points{0x40000000} = I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0"); } # Now add in entries for each of code points 0-255, which require special @@ -470,20 +650,24 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } undef @warnings; if ($j == $byte_length - 1) { - my $ret = test_is_utf8_valid_partial_char_flags($n_chr, $byte_length, 0); - is($ret, 0, " Verify is_utf8_valid_partial_char_flags(" . display_bytes($n_chr) . ") returns 0 for full character"); + my $ret + = test_is_utf8_valid_partial_char_flags($n_chr, $byte_length, 0); + is($ret, 0, " Verify is_utf8_valid_partial_char_flags(" + . display_bytes($n_chr) + . ") returns 0 for full character"); } else { my $bytes_so_far = substr($n_chr, 0, $j + 1); - my $ret = test_is_utf8_valid_partial_char_flags($bytes_so_far, $j + 1, 0); - is($ret, 1, " Verify is_utf8_valid_partial_char_flags(" . display_bytes($bytes_so_far) . ") returns 1"); + my $ret + = test_is_utf8_valid_partial_char_flags($bytes_so_far, $j + 1, 0); + is($ret, 1, " Verify is_utf8_valid_partial_char_flags(" + . display_bytes($bytes_so_far) + . ") returns 1"); } - unless (is(scalar @warnings, 0, - " Verify is_utf8_valid_partial_char_flags generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, " Verify is_utf8_valid_partial_char_flags" + . " generated no warnings") + or output_warnings(@warnings); my $b = substr($n_chr, $j, 1); my $hex_b = sprintf("\"\\x%02x\"", ord $b); @@ -554,7 +738,7 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } $valid_under_c9strict = 0; if ($n > 2 ** 31 - 1) { $this_utf8_flags &= - ~($UTF8_DISALLOW_ABOVE_31_BIT|$UTF8_WARN_ABOVE_31_BIT); + ~($UTF8_DISALLOW_ABOVE_31_BIT|$UTF8_WARN_ABOVE_31_BIT); $valid_for_fits_in_31_bits = 0; } } @@ -593,7 +777,8 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } undef @warnings; my $ret = test_isUTF8_CHAR($bytes, $len); - is($ret, $len, "Verify isUTF8_CHAR($display_bytes) returns expected length: $len"); + is($ret, $len, + "Verify isUTF8_CHAR($display_bytes) returns expected length: $len"); unless (is(scalar @warnings, 0, "Verify isUTF8_CHAR() for $hex_n generated no warnings")) @@ -604,115 +789,107 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } undef @warnings; $ret = test_isUTF8_CHAR($bytes, $len - 1); - is($ret, 0, "Verify isUTF8_CHAR() with too short length parameter returns 0"); + is($ret, 0, + "Verify isUTF8_CHAR() with too short length parameter returns 0"); - unless (is(scalar @warnings, 0, - "Verify isUTF8_CHAR() generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, "Verify isUTF8_CHAR() generated no warnings") + or output_warnings(@warnings); undef @warnings; $ret = test_isUTF8_CHAR_flags($bytes, $len, 0); - is($ret, $len, "Verify isUTF8_CHAR_flags($display_bytes, 0) returns expected length: $len"); + is($ret, $len, "Verify isUTF8_CHAR_flags($display_bytes, 0)" + . " returns expected length: $len"); - unless (is(scalar @warnings, 0, - "Verify isUTF8_CHAR_flags() for $hex_n generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "Verify isUTF8_CHAR_flags() for $hex_n generated no warnings") + or output_warnings(@warnings); undef @warnings; $ret = test_isUTF8_CHAR_flags($bytes, $len - 1, 0); - is($ret, 0, "Verify isUTF8_CHAR_flags() with too short length parameter returns 0"); + is($ret, 0, + "Verify isUTF8_CHAR_flags() with too short length parameter returns 0"); - unless (is(scalar @warnings, 0, - "Verify isUTF8_CHAR_flags() generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, "Verify isUTF8_CHAR_flags() generated no warnings") + or output_warnings(@warnings); undef @warnings; $ret = test_isSTRICT_UTF8_CHAR($bytes, $len); my $expected_len = ($valid_under_strict) ? $len : 0; - is($ret, $expected_len, "Verify isSTRICT_UTF8_CHAR($display_bytes) returns expected length: $expected_len"); + is($ret, $expected_len, "Verify isSTRICT_UTF8_CHAR($display_bytes)" + . " returns expected length: $expected_len"); - unless (is(scalar @warnings, 0, - "Verify isSTRICT_UTF8_CHAR() for $hex_n generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "Verify isSTRICT_UTF8_CHAR() for $hex_n generated no warnings") + or output_warnings(@warnings); undef @warnings; $ret = test_isSTRICT_UTF8_CHAR($bytes, $len - 1); - is($ret, 0, "Verify isSTRICT_UTF8_CHAR() with too short length parameter returns 0"); + is($ret, 0, + "Verify isSTRICT_UTF8_CHAR() with too short length parameter returns 0"); - unless (is(scalar @warnings, 0, - "Verify isSTRICT_UTF8_CHAR() generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, "Verify isSTRICT_UTF8_CHAR() generated no warnings") + or output_warnings(@warnings); undef @warnings; - $ret = test_isUTF8_CHAR_flags($bytes, $len, $UTF8_DISALLOW_ILLEGAL_INTERCHANGE); - is($ret, $expected_len, "Verify isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE') acts like isSTRICT_UTF8_CHAR"); + $ret = test_isUTF8_CHAR_flags($bytes, $len, + $UTF8_DISALLOW_ILLEGAL_INTERCHANGE); + is($ret, $expected_len, + "Verify isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')" + . " acts like isSTRICT_UTF8_CHAR"); - unless (is(scalar @warnings, 0, - "Verify isUTF8_CHAR() for $hex_n generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "Verify isUTF8_CHAR() for $hex_n generated no warnings") + or output_warnings(@warnings); undef @warnings; $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $len); $expected_len = ($valid_under_c9strict) ? $len : 0; - is($ret, $expected_len, "Verify isC9_STRICT_UTF8_CHAR($display_bytes) returns expected length: $len"); + is($ret, $expected_len, "Verify isC9_STRICT_UTF8_CHAR($display_bytes)" + . " returns expected length: $len"); - unless (is(scalar @warnings, 0, - "Verify isC9_STRICT_UTF8_CHAR() for $hex_n generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "Verify isC9_STRICT_UTF8_CHAR() for $hex_n generated no warnings") + or output_warnings(@warnings); undef @warnings; $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $len - 1); - is($ret, 0, "Verify isC9_STRICT_UTF8_CHAR() with too short length parameter returns 0"); + is($ret, 0, + "Verify isC9_STRICT_UTF8_CHAR() with too short length parameter returns 0"); - unless (is(scalar @warnings, 0, - "Verify isC9_STRICT_UTF8_CHAR() generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "Verify isC9_STRICT_UTF8_CHAR() generated no warnings") + or output_warnings(@warnings); undef @warnings; - $ret = test_isUTF8_CHAR_flags($bytes, $len, $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); - is($ret, $expected_len, "Verify isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE') acts like isC9_STRICT_UTF8_CHAR"); + $ret = test_isUTF8_CHAR_flags($bytes, $len, + $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); + is($ret, $expected_len, + "Verify isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')" + ." acts like isC9_STRICT_UTF8_CHAR"); - unless (is(scalar @warnings, 0, - "Verify isUTF8_CHAR() for $hex_n generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "Verify isUTF8_CHAR() for $hex_n generated no warnings") + or output_warnings(@warnings); undef @warnings; $ret_ref = test_valid_utf8_to_uvchr($bytes); - is($ret_ref->[0], $n, "Verify valid_utf8_to_uvchr($display_bytes) returns $hex_n"); - is($ret_ref->[1], $len, "Verify valid_utf8_to_uvchr() for $hex_n returns expected length: $len"); + is($ret_ref->[0], $n, + "Verify valid_utf8_to_uvchr($display_bytes) returns $hex_n"); + is($ret_ref->[1], $len, + "Verify valid_utf8_to_uvchr() for $hex_n returns expected length: $len"); - unless (is(scalar @warnings, 0, - "Verify valid_utf8_to_uvchr() for $hex_n generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "Verify valid_utf8_to_uvchr() for $hex_n generated no warnings") + or output_warnings(@warnings); # Similarly for uvchr_to_utf8 my $this_uvchr_flags = $look_for_everything_uvchr_to; @@ -727,21 +904,23 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } $this_uvchr_flags &= ~($UNICODE_DISALLOW_NONCHAR|$UNICODE_WARN_NONCHAR); } elsif ($n >= 0xD800 && $n <= 0xDFFF) { - $this_uvchr_flags &= ~($UNICODE_DISALLOW_SURROGATE|$UNICODE_WARN_SURROGATE); + $this_uvchr_flags + &= ~($UNICODE_DISALLOW_SURROGATE|$UNICODE_WARN_SURROGATE); } $display_flags = sprintf "0x%x", $this_uvchr_flags; undef @warnings; $ret = test_uvchr_to_utf8_flags($n, $this_uvchr_flags); - ok(defined $ret, "Verify uvchr_to_utf8_flags($hex_n, $display_flags) returned success"); - is($ret, $bytes, "Verify uvchr_to_utf8_flags($hex_n, $display_flags) returns correct bytes"); + ok(defined $ret, + "Verify uvchr_to_utf8_flags($hex_n, $display_flags) returned success"); + is($ret, $bytes, + "Verify uvchr_to_utf8_flags($hex_n, $display_flags) returns correct bytes"); - unless (is(scalar @warnings, 0, - "Verify uvchr_to_utf8_flags($hex_n, $display_flags) for $hex_n generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "Verify uvchr_to_utf8_flags($hex_n, $display_flags) for $hex_n" + . " generated no warnings") + or output_warnings(@warnings); # Now append this code point to a string that we will test various # versions of is_foo_utf8_string_bar on, and keep a count of how many code @@ -862,7 +1041,9 @@ for my $restriction (sort keys %restriction_types) { # a continuation byte makes it invalid; appending a # partial character makes the 'string' form invalid, # but not the 'fixed_width_buf' form. - if ($this_error_type eq $cont_byte || $this_error_type eq $p) { + if ( $this_error_type eq $cont_byte + || $this_error_type eq $p) + { $bytes .= $this_error_type; if ($this_error_type eq $cont_byte) { $test_name_suffix @@ -1188,8 +1369,9 @@ else { # 64-bit ASCII, or EBCDIC of any size. push @malformations, [ "overlong malformation, lowest max-byte", (isASCII) - ? "\xff\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80" - : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + ? "\xff\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $max_bytes, $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, 0, # NUL @@ -1198,8 +1380,9 @@ else { # 64-bit ASCII, or EBCDIC of any size. ], [ "overlong malformation, highest max-byte", (isASCII) # 2**36-1 on ASCII; 2**30-1 on EBCDIC - ? "\xff\x80\x80\x80\x80\x80\x80\xbf\xbf\xbf\xbf\xbf\xbf" - : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xbf\xbf\xbf\xbf\xbf\xbf"), + ? "\xff\x80\x80\x80\x80\x80\x80\xbf\xbf\xbf\xbf\xbf\xbf" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xbf\xbf\xbf\xbf\xbf\xbf"), $max_bytes, $UTF8_ALLOW_LONG, $UTF8_GOT_LONG, (isASCII) ? 0xFFFFFFFFF : 0x3FFFFFFF, @@ -1210,7 +1393,8 @@ else { # 64-bit ASCII, or EBCDIC of any size. if (! $is64bit) { # 32-bit EBCDIC push @malformations, [ "overflow malformation", - I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"), + I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"), $max_bytes, 0, # There is no way to allow this malformation $UTF8_GOT_OVERFLOW, @@ -1223,8 +1407,9 @@ else { # 64-bit ASCII, or EBCDIC of any size. push @malformations, [ "overflow malformation", (isASCII) - ? "\xff\x80\x90\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0" - : I8_to_native("\xff\xb0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + ? "\xff\x80\x90\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0" + : I8_to_native( + "\xff\xb0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $max_bytes, 0, # There is no way to allow this malformation $UTF8_GOT_OVERFLOW, @@ -1251,37 +1436,28 @@ foreach my $test (@malformations) { my $ret = test_isUTF8_CHAR($bytes, $length); is($ret, 0, "$testname: isUTF8_CHAR returns 0"); - unless (is(scalar @warnings, 0, - "$testname: isUTF8_CHAR() generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, "$testname: isUTF8_CHAR() generated no warnings") + or output_warnings(@warnings); undef @warnings; $ret = test_isUTF8_CHAR_flags($bytes, $length, 0); is($ret, 0, "$testname: isUTF8_CHAR_flags returns 0"); - unless (is(scalar @warnings, 0, - "$testname: isUTF8_CHAR() generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, "$testname: isUTF8_CHAR_flags() generated no" + . " warnings") + or output_warnings(@warnings); $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); is($ret, 0, "$testname: isSTRICT_UTF8_CHAR returns 0"); - unless (is(scalar @warnings, 0, - "$testname: isSTRICT_UTF8_CHAR() generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "$testname: isSTRICT_UTF8_CHAR() generated no warnings") + or output_warnings(@warnings); $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length); is($ret, 0, "$testname: isC9_STRICT_UTF8_CHAR returns 0"); - unless (is(scalar @warnings, 0, - "$testname: isC9_STRICT_UTF8_CHAR() generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "$testname: isC9_STRICT_UTF8_CHAR() generated no warnings") + or output_warnings(@warnings); for my $j (1 .. $length - 1) { my $partial = substr($bytes, 0, $j); @@ -1300,11 +1476,10 @@ foreach my $test (@malformations) { is($ret, $ret_should_be, "$testname: is_utf8_valid_partial_char_flags(" . display_bytes($partial) . ")$comment returns $ret_should_be"); - unless (is(scalar @warnings, 0, - "$testname: is_utf8_valid_partial_char_flags() generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "$testname: is_utf8_valid_partial_char_flags() generated" + . " no warnings") + or output_warnings(@warnings); } @@ -1356,7 +1531,9 @@ foreach my $test (@malformations) { $ret_ref = test_utf8n_to_uvchr_error($bytes, $length, $UTF8_CHECK_ONLY); is($ret_ref->[0], 0, "$testname: CHECK_ONLY: Returns 0"); is($ret_ref->[1], -1, "$testname: CHECK_ONLY: returns -1 for length"); - if (! is(scalar @warnings, 0, "$testname: CHECK_ONLY: no warnings generated")) { + if (! is(scalar @warnings, 0, + "$testname: CHECK_ONLY: no warnings generated")) + { output_warnings(@warnings); } is($ret_ref->[2], $expected_error_flags, @@ -1740,8 +1917,9 @@ my @tests = ( ], [ "requires at least 32 bits", (isASCII) - ? "\xfe\x82\x80\x80\x80\x80\x80" - : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), + ? "\xfe\x82\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), # This code point is chosen so that it is representable in a UV on # 32-bit machines $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, @@ -1753,8 +1931,9 @@ my @tests = ( ], [ "highest 32 bit code point", (isASCII) - ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" - : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), + ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, 'utf8', 0xFFFFFFFF, @@ -1762,10 +1941,12 @@ my @tests = ( (isASCII) ? 1 : 8, nonportable_regex(0xffffffff) ], - [ "requires at least 32 bits, and use SUPER-type flags, instead of ABOVE_31_BIT", + [ "requires at least 32 bits, and use SUPER-type flags, instead of" + . " ABOVE_31_BIT", (isASCII) ? "\xfe\x82\x80\x80\x80\x80\x80" - : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_SUPER, $UTF8_DISALLOW_SUPER, $UTF8_GOT_SUPER, 'utf8', 0x80000000, (isASCII) ? 7 : $max_bytes, @@ -1783,11 +1964,13 @@ my @tests = ( # since we have no reports of failures with it. (($is64bit) ? ((isASCII) - ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" - : I8_to_native("\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")) + ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" + : I8_to_native( + "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")) : ((isASCII) - ? "\xfe\x86\x80\x80\x80\x80\x80" - : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))), + ? "\xfe\x86\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))), $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, @@ -1817,8 +2000,9 @@ else { push @tests, [ "More than 32 bits", (isASCII) - ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" - : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" + : I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, 'utf8', 0x1000000000, @@ -1826,9 +2010,11 @@ else { qr/and( is)? not portable/ ]; if (! isASCII) { - push @tests, # These could falsely show wrongly in a naive implementation + push @tests, # These could falsely show wrongly in a naive + # implementation [ "requires at least 32 bits", - I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, 'utf8', 0x800000000, @@ -1836,7 +2022,8 @@ else { nonportable_regex(0x80000000) ], [ "requires at least 32 bits", - I8_to_native("\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + I8_to_native( + "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, 'utf8', 0x10000000000, @@ -1844,7 +2031,8 @@ else { nonportable_regex(0x10000000000) ], [ "requires at least 32 bits", - I8_to_native("\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + I8_to_native( + "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, 'utf8', 0x200000000000, @@ -1852,7 +2040,8 @@ else { nonportable_regex(0x20000000000) ], [ "requires at least 32 bits", - I8_to_native("\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + I8_to_native( + "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, 'utf8', 0x4000000000000, @@ -1860,7 +2049,8 @@ else { nonportable_regex(0x4000000000000) ], [ "requires at least 32 bits", - I8_to_native("\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), + I8_to_native( + "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, 'utf8', 0x80000000000000, @@ -1868,8 +2058,8 @@ else { nonportable_regex(0x80000000000000) ], [ "requires at least 32 bits", - I8_to_native("\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), - #IBM-1047 \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41 + I8_to_native( + "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), $UTF8_WARN_ABOVE_31_BIT,$UTF8_DISALLOW_ABOVE_31_BIT, $UTF8_GOT_ABOVE_31_BIT, 'utf8', 0x1000000000000000, @@ -1881,7 +2071,8 @@ else { foreach my $test (@tests) { my ($testname, $bytes, $warn_flags, $disallow_flags, $expected_error_flags, - $category, $allowed_uv, $expected_len, $needed_to_discern_len, $message ) = @$test; + $category, $allowed_uv, $expected_len, $needed_to_discern_len, $message + ) = @$test; my $length = length $bytes; my $will_overflow = $testname =~ /overflow/ ? 'overflow' : ""; @@ -1898,14 +2089,13 @@ foreach my $test (@tests) { else { is($ret, $length, "isUTF8_CHAR() $testname: returns expected length: $length"); - is($ret_flags, $length, - "isUTF8_CHAR_flags(...,0) $testname: returns expected length: $length"); - } - unless (is(scalar @warnings, 0, - "isUTF8_CHAR() and isUTF8_CHAR()_flags $testname: generated no warnings")) - { - output_warnings(@warnings); + is($ret_flags, $length, "isUTF8_CHAR_flags(...,0) $testname:" + . " returns expected length: $length"); } + is(scalar @warnings, 0, + "isUTF8_CHAR() and isUTF8_CHAR()_flags $testname: generated" + . " no warnings") + or output_warnings(@warnings); undef @warnings; $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); @@ -1917,18 +2107,18 @@ foreach my $test (@tests) { || $allowed_uv > 0x10FFFF) ? 0 : $length; - is($ret, $expected_ret, - "isSTRICT_UTF8_CHAR() $testname: returns expected length: $expected_ret"); + is($ret, $expected_ret, "isSTRICT_UTF8_CHAR() $testname: returns" + . " expected length: $expected_ret"); $ret = test_isUTF8_CHAR_flags($bytes, $length, $UTF8_DISALLOW_ILLEGAL_INTERCHANGE); is($ret, $expected_ret, - "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE') acts like isSTRICT_UTF8_CHAR"); - } - unless (is(scalar @warnings, 0, - "isSTRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname: generated no warnings")) - { - output_warnings(@warnings); + "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')" + . " acts like isSTRICT_UTF8_CHAR"); } + is(scalar @warnings, 0, + "isSTRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" + . " generated no warnings") + or output_warnings(@warnings); undef @warnings; $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length); @@ -1940,18 +2130,18 @@ foreach my $test (@tests) { || $allowed_uv > 0x10FFFF) ? 0 : $length; - is($ret, $expected_ret, - "isC9_STRICT_UTF8_CHAR() $testname: returns expected length: $expected_ret"); + is($ret, $expected_ret, "isC9_STRICT_UTF8_CHAR() $testname:" + ." returns expected length: $expected_ret"); $ret = test_isUTF8_CHAR_flags($bytes, $length, $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); is($ret, $expected_ret, - "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE') acts like isC9_STRICT_UTF8_CHAR"); - } - unless (is(scalar @warnings, 0, - "isC9_STRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname: generated no warnings")) - { - output_warnings(@warnings); + "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')" + . " acts like isC9_STRICT_UTF8_CHAR"); } + is(scalar @warnings, 0, + "isC9_STRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" + . " generated no warnings") + or output_warnings(@warnings); # Test partial character handling, for each byte not a full character for my $j (1.. $length - 1) { @@ -1971,7 +2161,8 @@ foreach my $test (@tests) { $comment = "disallowed"; if ($j < $needed_to_discern_len) { $ret_should_be = 1; - $comment .= ", but need $needed_to_discern_len bytes to discern:"; + $comment .= ", but need $needed_to_discern_len bytes" + . " to discern:"; } } else { @@ -1981,15 +2172,16 @@ foreach my $test (@tests) { undef @warnings; - $ret = test_is_utf8_valid_partial_char_flags($partial, $j, $disallow_flag); - is($ret, $ret_should_be, "$testname: is_utf8_valid_partial_char_flags(" + $ret = test_is_utf8_valid_partial_char_flags($partial, $j, + $disallow_flag); + is($ret, $ret_should_be, + "$testname: is_utf8_valid_partial_char_flags(" . display_bytes($partial) . "), $comment: returns $ret_should_be"); - unless (is(scalar @warnings, 0, - "$testname: is_utf8_valid_partial_char_flags() generated no warnings")) - { - output_warnings(@warnings); - } + is(scalar @warnings, 0, + "$testname: is_utf8_valid_partial_char_flags()" + . " generated no warnings") + or output_warnings(@warnings); } } } @@ -2136,26 +2328,19 @@ foreach my $test (@tests) { next; } if ($disallowed) { - unless (is($ret_ref->[0], 0, - "$this_name: Returns 0")) - { - diag $call; - } + is($ret_ref->[0], 0, "$this_name: Returns 0") + or diag $call; } else { - unless (is($ret_ref->[0], $expected_uv, + is($ret_ref->[0], $expected_uv, "$this_name: Returns expected uv: " - . sprintf("0x%04X", $expected_uv))) - { - diag $call; - } - } - unless (is($ret_ref->[1], $this_expected_len, - "$this_name: Returns expected length:" - . " $this_expected_len")) - { - diag $call; + . sprintf("0x%04X", $expected_uv)) + or diag $call; } + is($ret_ref->[1], $this_expected_len, + "$this_name: Returns expected length:" + . " $this_expected_len") + or diag $call; my $errors = $ret_ref->[2]; @@ -2168,20 +2353,16 @@ foreach my $test (@tests) { } splice @expected_errors, $i, 1; } - unless (is(scalar @expected_errors, 0, - "Got all the expected malformation errors")) - { - diag Dumper \@expected_errors; - } + is(scalar @expected_errors, 0, + "Got all the expected malformation errors") + or diag Dumper \@expected_errors; if ( $this_expected_len >= $this_needed_to_discern_len && ($warn_flag || $disallow_flag)) { - unless (is($errors, $expected_error_flags, - "Got the correct error flag")) - { - diag $call; - } + is($errors, $expected_error_flags, + "Got the correct error flag") + or diag $call; } else { is($errors, 0, "Got no other error flag"); @@ -2226,11 +2407,9 @@ foreach my $test (@tests) { if (is(scalar @warnings, 1, "$this_name: Got a single warning ")) { - unless (like($warnings[0], $message, - "$this_name: Got expected warning")) - { - diag $call; - } + like($warnings[0], $message, + "$this_name: Got expected warning") + or diag $call; } else { diag $call; @@ -2257,17 +2436,12 @@ foreach my $test (@tests) { $ret_ref = test_utf8n_to_uvchr_error( $this_bytes, $this_length, $disallow_flag|$UTF8_CHECK_ONLY); - unless (is($ret_ref->[0], 0, - "$this_name, CHECK_ONLY: Returns 0")) - { - diag $call; - } - unless (is($ret_ref->[1], -1, - "$this_name: CHECK_ONLY: returns -1 for" - . " length")) - { - diag $call; - } + is($ret_ref->[0], 0, + "$this_name, CHECK_ONLY: Returns 0") + or diag $call; + is($ret_ref->[1], -1, + "$this_name: CHECK_ONLY: returns -1 for length") + or diag $call; if (! is(scalar @warnings, 0, "$this_name, CHECK_ONLY: no warnings" . " generated")) @@ -2369,18 +2543,12 @@ foreach my $test (@tests) { next; } if ($disallowed) { - unless (is($ret, undef, - "$this_name: Returns undef")) - { - diag $call; - } + is($ret, undef, "$this_name: Returns undef") + or diag $call; } else { - unless (is($ret, $bytes, - "$this_name: Returns expected string")) - { - diag $call; - } + is($ret, $bytes, "$this_name: Returns expected string") + or diag $call; } if (! $do_warning && ($warning eq 'utf8' || $warning eq $category)) @@ -2399,11 +2567,9 @@ foreach my $test (@tests) { if (is(scalar @warnings, 1, "$this_name: Got a single warning ")) { - unless (like($warnings[0], $message, - "$this_name: Got expected warning")) - { - diag $call; - } + like($warnings[0], $message, + "$this_name: Got expected warning") + or diag $call; } else { diag $call; diff --git a/handy.h b/handy.h index 5e31d1e0ad..1eb88923bf 100644 --- a/handy.h +++ b/handy.h @@ -549,7 +549,8 @@ ASCII character in the named class based on platform, Unicode, and Perl rules. If the input is a number that doesn't fit in an octet, FALSE is returned. Variant C<isFOO_A> (e.g., C<isALPHA_A()>) is identical to the base function -with no suffix C<"_A">. +with no suffix C<"_A">. This variant is used to emphasize by its name that +only ASCII-range characters can return TRUE. Variant C<isFOO_L1> imposes the Latin-1 (or EBCDIC equivlalent) character set onto the platform. That is, the code points that are ASCII are unaffected, diff --git a/locale.c b/locale.c index a871b9eb2b..07f599c032 100644 --- a/locale.c +++ b/locale.c @@ -2540,9 +2540,9 @@ Perl_my_strerror(pTHX_ const int errnum) dVAR; # ifdef USE_THREAD_SAFE_LOCALE - locale_t save_locale; + locale_t save_locale = NULL; # else - char * save_locale; + char * save_locale = NULL; bool locale_is_C = FALSE; /* We have a critical section to prevent another thread from changing the diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 73cee596a1..5848856a79 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -102,6 +102,12 @@ There may well be none in a stable release. We use a different hash function for short and long keys. This should improve performance and security, especially for long keys. +=item * readline is faster + +Reading from a file line-by-line with c<readline()> or C<< <> >> should +now typically be faster due to a better implementation of the code that +searches for the next newline character. + =back =head1 Modules and Pragmata diff --git a/proto.h b/proto.h index 925a7b4be1..bd14ff79f8 100644 --- a/proto.h +++ b/proto.h @@ -521,8 +521,14 @@ PERL_CALLCONV void Perl_ck_warner_d(pTHX_ U32 err, const char* pat, ...) #define PERL_ARGS_ASSERT_CK_WARNER_D \ assert(pat) -PERL_CALLCONV bool Perl_ckwarn(pTHX_ U32 w); -PERL_CALLCONV bool Perl_ckwarn_d(pTHX_ U32 w); +PERL_CALLCONV bool Perl_ckwarn(pTHX_ U32 w) + __attribute__warn_unused_result__ + __attribute__pure__; + +PERL_CALLCONV bool Perl_ckwarn_d(pTHX_ U32 w) + __attribute__warn_unused_result__ + __attribute__pure__; + PERL_CALLCONV void Perl_clear_defarray(pTHX_ AV* av, bool abandon); #define PERL_ARGS_ASSERT_CLEAR_DEFARRAY \ assert(av) @@ -591,10 +597,7 @@ PERL_CALLCONV const char * Perl_custom_op_name(pTHX_ const OP *o) PERL_CALLCONV void Perl_custom_op_register(pTHX_ Perl_ppaddr_t ppaddr, const XOP *xop); #define PERL_ARGS_ASSERT_CUSTOM_OP_REGISTER \ assert(ppaddr); assert(xop) -/* PERL_CALLCONV const XOP * Perl_custom_op_xop(pTHX_ const OP *o) - __attribute__warn_unused_result__ - __attribute__pure__; */ - +/* PERL_CALLCONV const XOP * Perl_custom_op_xop(pTHX_ const OP *o); */ PERL_CALLCONV void Perl_cv_ckproto_len_flags(pTHX_ const CV* cv, const GV* gv, const char* p, const STRLEN len, const U32 flags); #define PERL_ARGS_ASSERT_CV_CKPROTO_LEN_FLAGS \ assert(cv) @@ -1284,8 +1287,14 @@ PERL_CALLCONV SV** Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, #endif /* PERL_CALLCONV void hv_undef(pTHX_ HV *hv); */ PERL_CALLCONV void Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags); -/* PERL_CALLCONV I32 ibcmp(pTHX_ const char* a, const char* b, I32 len); */ -/* PERL_CALLCONV I32 ibcmp_locale(pTHX_ const char* a, const char* b, I32 len); */ +/* PERL_CALLCONV I32 ibcmp(pTHX_ const char* a, const char* b, I32 len) + __attribute__warn_unused_result__ + __attribute__pure__; */ + +/* PERL_CALLCONV I32 ibcmp_locale(pTHX_ const char* a, const char* b, I32 len) + __attribute__warn_unused_result__ + __attribute__pure__; */ + /* PERL_CALLCONV I32 ibcmp_utf8(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const char *s2, char **pe2, UV l2, bool u2); */ PERL_CALLCONV void Perl_init_argv_symbols(pTHX_ int argc, char **argv); #define PERL_ARGS_ASSERT_INIT_ARGV_SYMBOLS \ @@ -1331,8 +1340,7 @@ PERL_CALLCONV bool Perl_isIDFIRST_lazy(pTHX_ const char* p) __attribute__pure__; */ PERL_STATIC_INLINE bool S_is_c9strict_utf8_string(const U8 *s, const STRLEN len) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING \ assert(s) @@ -1348,8 +1356,7 @@ PERL_CALLCONV I32 Perl_is_lvalue_sub(pTHX) __attribute__warn_unused_result__; PERL_STATIC_INLINE bool S_is_strict_utf8_string(const U8 *s, const STRLEN len) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING \ assert(s) @@ -1563,8 +1570,7 @@ PERL_CALLCONV bool Perl_is_utf8_idfirst(pTHX_ const U8 *p) assert(p) PERL_STATIC_INLINE bool S_is_utf8_invariant_string(const U8* const s, STRLEN const len) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING \ assert(s) @@ -1617,14 +1623,12 @@ PERL_CALLCONV bool Perl_is_utf8_space(pTHX_ const U8 *p) assert(p) PERL_STATIC_INLINE bool Perl_is_utf8_string(const U8 *s, const STRLEN len) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_STRING \ assert(s) PERL_STATIC_INLINE bool S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS \ assert(s) @@ -1651,8 +1655,7 @@ PERL_CALLCONV bool Perl_is_utf8_upper(pTHX_ const U8 *p) __attribute__pure__; */ PERL_STATIC_INLINE bool S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS \ assert(s); assert(e) @@ -1674,7 +1677,10 @@ PERL_CALLCONV bool Perl_is_utf8_xidfirst(pTHX_ const U8 *p) #define PERL_ARGS_ASSERT_IS_UTF8_XIDFIRST \ assert(p) -PERL_CALLCONV bool Perl_isinfnan(NV nv); +PERL_CALLCONV bool Perl_isinfnan(NV nv) + __attribute__warn_unused_result__ + __attribute__pure__; + PERL_CALLCONV bool Perl_isinfnansv(pTHX_ SV *sv); #define PERL_ARGS_ASSERT_ISINFNANSV \ assert(sv) @@ -3077,12 +3083,9 @@ PERL_CALLCONV void Perl_sv_free2(pTHX_ SV *const sv, const U32 refcnt); #define PERL_ARGS_ASSERT_SV_FREE2 \ assert(sv) PERL_CALLCONV void Perl_sv_free_arenas(pTHX); -PERL_CALLCONV SV* Perl_sv_get_backrefs(SV *const sv) - __attribute__warn_unused_result__ - __attribute__pure__; +PERL_CALLCONV SV* Perl_sv_get_backrefs(SV *const sv); #define PERL_ARGS_ASSERT_SV_GET_BACKREFS \ assert(sv) - PERL_CALLCONV char* Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append); #define PERL_ARGS_ASSERT_SV_GETS \ assert(sv); assert(fp) @@ -3492,32 +3495,27 @@ PERL_CALLCONV U8* Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8 *d, I32 bytelen, I #define PERL_ARGS_ASSERT_UTF16_TO_UTF8_REVERSED \ assert(p); assert(d); assert(newlen) PERL_STATIC_INLINE IV Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_UTF8_DISTANCE \ assert(a); assert(b) PERL_STATIC_INLINE U8* Perl_utf8_hop(const U8 *s, SSize_t off) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_UTF8_HOP \ assert(s) PERL_STATIC_INLINE U8* Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_UTF8_HOP_BACK \ assert(s); assert(start) PERL_STATIC_INLINE U8* Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_UTF8_HOP_FORWARD \ assert(s); assert(end) PERL_STATIC_INLINE U8* Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_UTF8_HOP_SAFE \ assert(s); assert(start); assert(end) @@ -4904,8 +4902,7 @@ PERL_STATIC_INLINE void S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_s #define PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT \ assert(pRExC_state); assert(node); assert(flagp) STATIC const char * S_cntrl_to_mnemonic(const U8 c) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; PERL_STATIC_INLINE U8 S_compute_EXACTish(RExC_state_t *pRExC_state); #define PERL_ARGS_ASSERT_COMPUTE_EXACTISH \ @@ -4914,8 +4911,7 @@ STATIC regnode * S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_sta #define PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE \ assert(pRExC_state); assert(source) STATIC int S_edit_distance(const UV *src, const UV *tgt, const STRLEN x, const STRLEN y, const SSize_t maxDistance) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_EDIT_DISTANCE \ assert(src); assert(tgt) @@ -5027,8 +5023,7 @@ STATIC regnode* S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 dept #define PERL_ARGS_ASSERT_REGCLASS \ assert(pRExC_state); assert(flagp) STATIC unsigned int S_regex_set_precedence(const U8 my_operator) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; STATIC void S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth); #define PERL_ARGS_ASSERT_REGINSERT \ @@ -5184,8 +5179,7 @@ PERL_CALLCONV bool Perl_grok_bslash_x(pTHX_ char** s, UV* uv, const char** error assert(s); assert(uv); assert(error_msg) PERL_STATIC_INLINE I32 S_regcurly(const char *s) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_REGCURLY \ assert(s) @@ -5613,14 +5607,12 @@ STATIC UV S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV res assert(p); assert(ustrp); assert(lenp) PERL_STATIC_INLINE bool S_does_utf8_overflow(const U8 * const s, const U8 * e) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW \ assert(s); assert(e) PERL_STATIC_INLINE bool S_isFF_OVERLONG(const U8 * const s, const STRLEN len) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_ISFF_OVERLONG \ assert(s) @@ -5630,14 +5622,12 @@ PERL_STATIC_INLINE bool S_is_utf8_common(pTHX_ const U8 *const p, SV **swash, co assert(p); assert(swash); assert(swashname) PERL_STATIC_INLINE bool S_is_utf8_cp_above_31_bits(const U8 * const s, const U8 * const e) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_CP_ABOVE_31_BITS \ assert(s); assert(e) PERL_STATIC_INLINE bool S_is_utf8_overlong_given_start_byte_ok(const U8 * const s, const STRLEN len) - __attribute__warn_unused_result__ - __attribute__pure__; + __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_OVERLONG_GIVEN_START_BYTE_OK \ assert(s) diff --git a/regen/embed.pl b/regen/embed.pl index 9b432ba825..01bcc78d15 100755 --- a/regen/embed.pl +++ b/regen/embed.pl @@ -39,6 +39,13 @@ my $unflagged_pointers; # implicit interpreter context argument. # +my $error_count = 0; +sub die_at_end ($) { # Keeps going for now, but makes sure the regen doesn't + # succeed. + warn shift; + $error_count++; +} + sub full_name ($$) { # Returns the function name with potentially the # prefixes 'S_' or 'Perl_' my ($func, $flags) = @_; @@ -75,7 +82,7 @@ my ($embed, $core, $ext, $api) = setup_embed(); my ($flags,$retval,$plain_func,@args) = @$_; if ($flags =~ / ( [^AabDdEfiMmnOoPpRrsUWXx] ) /x) { - warn "flag $1 is not legal (for function $plain_func)"; + die_at_end "flag $1 is not legal (for function $plain_func)"; } my @nonnull; my $has_depth = ( $flags =~ /W/ ); @@ -92,7 +99,7 @@ my ($embed, $core, $ext, $api) = setup_embed(); warn "It is nonsensical to require the return value of a void function ($plain_func) to be checked"; } - warn "$plain_func: s flag is mutually exclusive from the i and p plags" + die_at_end "$plain_func: s flag is mutually exclusive from the i and p plags" if $flags =~ /s/ && $flags =~ /[ip]/; if ($flags =~ /([si])/) { @@ -104,6 +111,7 @@ my ($embed, $core, $ext, $api) = setup_embed(); $type = $1 eq 's' ? "STATIC" : "PERL_STATIC_INLINE"; } $retval = "$type $retval"; + die_at_end "Don't declare static function '$plain_func' pure" if $flags =~ /P/; } else { if ($never_returns) { @@ -140,7 +148,7 @@ my ($embed, $core, $ext, $api) = setup_embed(); $temp_arg =~ s/\s*\bstruct\b\s*/ /g; if ( ($temp_arg ne "...") && ($temp_arg !~ /\w+\s+(\w+)(?:\[\d+\])?\s*$/) ) { - warn "$func: $arg ($n) doesn't have a name\n"; + die_at_end "$func: $arg ($n) doesn't have a name\n"; } if (defined $1 && $nn && !($commented_out && !$binarycompat)) { push @names_of_nn, $1; @@ -221,10 +229,10 @@ my ($embed, $core, $ext, $api) = setup_embed(); END_EXTERN_C EOF - read_only_bottom_close_and_rename($pr); + read_only_bottom_close_and_rename($pr) if ! $error_count; } -warn "$unflagged_pointers pointer arguments to clean up\n" if $unflagged_pointers; +die_at_end "$unflagged_pointers pointer arguments to clean up\n" if $unflagged_pointers; sub readvars { my ($file, $pre) = @_; @@ -235,7 +243,7 @@ sub readvars { while (<FILE>) { s/[ \t]*#.*//; # Delete comments. if (/PERLVARA?I?C?\($pre,\s*(\w+)/) { - warn "duplicate symbol $1 while processing $file line $.\n" + die_at_end "duplicate symbol $1 while processing $file line $.\n" if $seen{$1}++; } } @@ -427,7 +435,7 @@ print $em <<'END'; #endif END -read_only_bottom_close_and_rename($em); +read_only_bottom_close_and_rename($em) if ! $error_count; $em = open_print_header('embedvar.h'); @@ -492,7 +500,7 @@ print $em <<'END'; #endif /* PERL_GLOBAL_STRUCT */ END -read_only_bottom_close_and_rename($em); +read_only_bottom_close_and_rename($em) if ! $error_count; my $capih = open_print_header('perlapi.h'); @@ -595,7 +603,7 @@ print $capih <<'EOT'; #endif /* __perlapi_h__ */ EOT -read_only_bottom_close_and_rename($capih); +read_only_bottom_close_and_rename($capih) if ! $error_count; my $capi = open_print_header('perlapi.c', <<'EOQ'); * @@ -644,6 +652,8 @@ END_EXTERN_C #endif /* MULTIPLICITY && PERL_GLOBAL_STRUCT */ EOT -read_only_bottom_close_and_rename($capi); +read_only_bottom_close_and_rename($capi) if ! $error_count; + +die "$error_count errors found" if $error_count; # ex: set ts=8 sts=4 sw=4 noet: diff --git a/regen/warnings.pl b/regen/warnings.pl index 739a7b4894..83bf8bc3c4 100644 --- a/regen/warnings.pl +++ b/regen/warnings.pl @@ -373,6 +373,64 @@ EOM : (STRLEN*)CopyD(p, PerlMemShared_malloc(sizeof(*p)+*p), sizeof(*p)+*p, \ char)) +/* + +=head1 Warning and Dieing + +=for apidoc Am|bool|ckWARN|U32 w + +Returns a boolean as to whether or not warnings are enabled for the warning +category C<w>. If the category is by default enabled even if not within the +scope of S<C<use warnings>>, instead use the L</ckWARN_d> macro. + +=for apidoc Am|bool|ckWARN_d|U32 w + +Like C<L</ckWARN>>, but for use if and only if the warning category is by +default enabled even if not within the scope of S<C<use warnings>>. + +=for apidoc Am|bool|ckWARN2|U32 w1|U32 w2 + +Like C<L</ckWARN>>, but takes two warnings categories as input, and returns +TRUE if either is enabled. If either category is by default enabled even if +not within the scope of S<C<use warnings>>, instead use the L</ckWARN2_d> +macro. The categories must be completely independent, one may not be +subclassed from the other. + +=for apidoc Am|bool|ckWARN2_d|U32 w1|U32 w2 + +Like C<L</ckWARN2>>, but for use if and only if either warning category is by +default enabled even if not within the scope of S<C<use warnings>>. + +=for apidoc Am|bool|ckWARN3|U32 w1|U32 w2|U32 w3 + +Like C<L</ckWARN2>>, but takes three warnings categories as input, and returns +TRUE if any is enabled. If any of the categories is by default enabled even +if not within the scope of S<C<use warnings>>, instead use the L</ckWARN3_d> +macro. The categories must be completely independent, one may not be +subclassed from any other. + +=for apidoc Am|bool|ckWARN3_d|U32 w1|U32 w2|U32 w3 + +Like C<L</ckWARN3>>, but for use if and only if any of the warning categories +is by default enabled even if not within the scope of S<C<use warnings>>. + +=for apidoc Am|bool|ckWARN4|U32 w1|U32 w2|U32 w3|U32 w4 + +Like C<L</ckWARN3>>, but takes four warnings categories as input, and returns +TRUE if any is enabled. If any of the categories is by default enabled even +if not within the scope of S<C<use warnings>>, instead use the L</ckWARN4_d> +macro. The categories must be completely independent, one may not be +subclassed from any other. + +=for apidoc Am|bool|ckWARN4_d|U32 w1|U32 w2|U32 w3|U32 w4 + +Like C<L</ckWARN4>>, but for use if and only if any of the warning categories +is by default enabled even if not within the scope of S<C<use warnings>>. + +=cut + +*/ + #define ckWARN(w) Perl_ckwarn(aTHX_ packWARN(w)) /* The w1, w2 ... should be independent warnings categories; one shouldn't be diff --git a/regexec.c b/regexec.c index 2b7a200d1e..013ccc54a8 100644 --- a/regexec.c +++ b/regexec.c @@ -7496,6 +7496,7 @@ NULL DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_ "whilem: (cache) already tried at this position...\n", depth) ); + cur_curlyx->u.curlyx.count--; sayNO; /* cache records failure */ } ST.cache_offset = offset; diff --git a/t/re/re_tests b/t/re/re_tests index 1797ddc09d..e8a7fa9f34 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1975,6 +1975,7 @@ AB\s+\x{100} AB \x{100}X y - - \b\z0*\x{100} .\x{100} n - - # [perl #129350] crashed in intuit_start (.*?(a(a)|i(i))n) riiaan y $2-$3-$4-$1 aa-a--riiaan # Jump trie capture buffer issue [perl #129897] (^(?:(\d)x)?\d$) 1 y [$1-$2] [1-] # make sure that we reset capture buffers properly (from regtry) **** PATCH TRUNCATED AT 2000 LINES -- 99 NOT SHOWN **** -- Perl5 Master Repository