In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/bb1b88dd7be03975ef001e007081e75d83f8cb6f?hp=c7f61edaeb9bf126c6c222500d3350ccfced3e3f>
- Log ----------------------------------------------------------------- commit bb1b88dd7be03975ef001e007081e75d83f8cb6f Author: Karl Williamson <k...@cpan.org> Date: Fri Nov 24 21:56:09 2017 -0700 sv_utf8_decode: Reverse order of tests for speed Not that we have a fast is_utf8_invariant_string_loc(), use it first to quickly find any variants. Then use is_utf8_string() from then on. This is the reverse order as to how it worked before this commit. This speeds things up two ways: 1) we use the faster function first, and 2) use the information it returns to avoid reparsing the string starting at the beginning. commit 8cd29efcae02781bf1c9843e66b7b08105659d6b Author: Karl Williamson <k...@cpan.org> Date: Sat Nov 18 16:36:45 2017 -0700 pp_sys.c: Avoid reparsing string By using is_utf8_invariant_string_loc() instead of plain is_utf8_invariant_string(), we can start parsing at the first variant (if any is found) instead of the previous behavior of starting again at the beginning of the string. ----------------------------------------------------------------------- Summary of changes: pp_sys.c | 8 ++++++-- sv.c | 8 ++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pp_sys.c b/pp_sys.c index a66cb4ec94..a3caf01c65 100644 --- a/pp_sys.c +++ b/pp_sys.c @@ -3499,6 +3499,7 @@ PP(pp_fttext) SV *sv = NULL; GV *gv; PerlIO *fp; + const U8 * first_variant; tryAMAGICftest_MG(PL_op->op_type == OP_FTTEXT ? 'T' : 'B'); @@ -3632,11 +3633,14 @@ PP(pp_fttext) #endif assert(len); - if (! is_utf8_invariant_string((U8 *) s, len)) { + if (! is_utf8_invariant_string_loc((U8 *) s, len, &first_variant)) { /* Here contains a variant under UTF-8 . See if the entire string is * UTF-8. */ - if (is_utf8_fixed_width_buf_flags((U8 *) s, len, 0)) { + if (is_utf8_fixed_width_buf_flags(first_variant, + len - ((char *) first_variant - s), + 0)) + { if (PL_op->op_type == OP_FTTEXT) { FT_RETURNYES; } diff --git a/sv.c b/sv.c index bf0b153359..225a743d14 100644 --- a/sv.c +++ b/sv.c @@ -3784,7 +3784,7 @@ Perl_sv_utf8_decode(pTHX_ SV *const sv) PERL_ARGS_ASSERT_SV_UTF8_DECODE; if (SvPOKp(sv)) { - const U8 *start, *c; + const U8 *start, *c, *first_variant; /* The octets may have got themselves encoded - get them back as * bytes @@ -3796,9 +3796,9 @@ Perl_sv_utf8_decode(pTHX_ SV *const sv) * we want to make sure everything inside is valid utf8 first. */ c = start = (const U8 *) SvPVX_const(sv); - if (!is_utf8_string(c, SvCUR(sv))) - return FALSE; - if (! is_utf8_invariant_string(c, SvCUR(sv))) { + if (! is_utf8_invariant_string_loc(c, SvCUR(sv), &first_variant)) { + if (!is_utf8_string(first_variant, SvCUR(sv) - (first_variant -c))) + return FALSE; SvUTF8_on(sv); } if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { -- Perl5 Master Repository