In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/e8ea8356d847d9e52f20d12b33fbd589beced868?hp=0acfb02fed689f2745813114bae77c22a2211cc7>

- Log -----------------------------------------------------------------
commit e8ea8356d847d9e52f20d12b33fbd589beced868
Author: Jarkko Hietaniemi <j...@iki.fi>
Date:   Wed May 7 09:19:00 2014 -0400

    Off-by-one in PL_fold_locale use.
    
    Fix for Coverity perl5 CID 29033: Out-of-bounds read
     (OVERRUN) overrun-local: Overrunning array PL_fold_locale of 256 bytes at
     byte offset 256 using index c1 (which evaluates to 256).
    
    - the "c1 > 256" was off-by-one, it needed to be "c1 > 255",
      it could have caused the PL_fold_locale to be accessed one past the end,
      at offset 256, but we have dodged the bullet thanks to the regex engine
      optimizing the bad case away before we hit it (analysis by Karl 
Williamson):
      regexec.c
    - comment fixes (pointed out by Karl Williamson): regexec.c
    - add tests to nail down the behaviour of fold matching
      for the last of Latin-1 (0xFF, lowercase which curiously does not have
      uppercase within Latin-1). and the first pure Unicode: t/re/pat.t

M       regexec.c
M       t/re/pat.t

commit 3f49e765225af085951605f8b1c60eadd98ef39f
Author: Jarkko Hietaniemi <j...@iki.fi>
Date:   Tue May 13 08:18:05 2014 -0400

    Cannot rotl u32 (hek_hash) by 64 bits.
    
    Fix for Coverity perl5 CID 28935:
    Operands don't affect result (CONSTANT_EXPRESSION_RESULT)
    result_independent_of_operands: (unsigned long)entry->hent_hek->hek_hash >> 
47 /* 64 - 17 */ is 0 regardless of the values of its operands. This occurs as 
the bitwise second operand of '|'.

M       hv.c

commit 461a975bd8196c522aeb2e0a06eb6980c25ffa2e
Author: Daniel Dragan <bul...@hotmail.com>
Date:   Wed May 14 04:08:13 2014 -0400

    fix sv_usepvn_flags's docs
    
    Newx != malloc, mixing Newx and malloc leads to heap corruption on some
    builds like Win32, use the official Perl API for allocating memory.

M       sv.c

commit c3caa5c3bdbd0ad0bc7ce5e7cd1a8eb5b7ca6a69
Author: Jarkko Hietaniemi <j...@iki.fi>
Date:   Mon May 19 06:52:24 2014 -0400

    Use the C_ARRAY_LENGTH.
    
    Use the C_ARRAY_LENGTH instead of sizeof(c_array)/sizeof(c_array[0])
    or sizeof(c_array)/sizeof(type_of_element_in_c_array), and C_ARRAY_END
    for c_array + C_ARRAY_LENGTH(c_array).
    
    While doing this found potential off-by-one error in sv.c:Perl_sv_magic:
    how > C_ARRAY_LENGTH(PL_magic_data)
    should probably have been
    how >= C_ARRAY_LENGTH(PL_magic_data)
    No tests fail, but this seems to be more of an internal sanity check.

M       dump.c
M       handy.h
M       patchlevel.h
M       perl.c
M       sv.c
M       universal.c
M       util.c

commit 40b5a549d4793cde8b4d93ccdd03c16e039440c9
Author: Jarkko Hietaniemi <j...@iki.fi>
Date:   Tue May 6 10:40:21 2014 -0400

    UCHARAT unnecessary with isSPACE().
    
    UCHARAT is not only unnecessary here (and no other spot in the core
    seems to use UCHARAT with isFOO()), but it also narrows the type so
    that some of the code in isSPACE() becomes unreachable.
    
    Fix for Coverity perl5 CIDs 28937, 28938.

M       regcomp.c

commit e76fdebf5815ffaf53ebcfd2c7b78b0e9eacbfd2
Author: Jarkko Hietaniemi <j...@iki.fi>
Date:   Fri May 9 11:05:30 2014 -0400

    Do not invert a NULL cp_list.
    
    Fix for Coverity perl5 CID 28966.

M       regcomp.c
-----------------------------------------------------------------------

Summary of changes:
 dump.c       |  5 ++---
 handy.h      |  7 ++++++-
 hv.c         |  2 +-
 patchlevel.h |  2 +-
 perl.c       |  2 +-
 regcomp.c    |  9 +++++----
 regexec.c    |  8 ++++----
 sv.c         | 15 ++++++++-------
 t/re/pat.t   | 26 +++++++++++++++++++++++++-
 universal.c  |  3 +--
 util.c       |  4 ++--
 11 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/dump.c b/dump.c
index 354cd57..59be3e0 100644
--- a/dump.c
+++ b/dump.c
@@ -868,8 +868,7 @@ const struct op_private_by_op op_private_names[] = {
 static bool
 S_op_private_to_names(pTHX_ SV *tmpsv, U32 optype, U32 op_private) {
     const struct op_private_by_op *start = op_private_names;
-    const struct op_private_by_op *const end
-       = op_private_names + C_ARRAY_LENGTH(op_private_names);
+    const struct op_private_by_op *const end = C_ARRAY_END(op_private_names);
 
     /* This is a linear search, but no worse than the code that it replaced.
        It's debugging code - size is more important than speed.  */
@@ -1894,7 +1893,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, 
I32 nest, I32 maxnest, bo
        if (HvARRAY(sv) && usedkeys) {
            /* Show distribution of HEs in the ARRAY */
            int freq[200];
-#define FREQ_MAX ((int)(sizeof freq / sizeof freq[0] - 1))
+#define FREQ_MAX ((int)(C_ARRAY_LENGTH(freq) - 1))
            int i;
            int max = 0;
            U32 pow2 = 2, keys = usedkeys;
diff --git a/handy.h b/handy.h
index 3f84eff..9332f8d 100644
--- a/handy.h
+++ b/handy.h
@@ -1936,8 +1936,13 @@ void Perl_mem_log_del_sv(const SV *sv, const char 
*filename, const int linenumbe
 #define StructCopy(s,d,t) Copy(s,d,1,t)
 #endif
 
+/* C_ARRAY_LENGTH is the number of elements in the C array (so you
+ * want your zero-based indices to be less than but not equal to).
+ *
+ * C_ARRAY_END is one past the last: half-open/half-closed range,
+ * not last-inclusive range. */
 #define C_ARRAY_LENGTH(a)      (sizeof(a)/sizeof((a)[0]))
-#define C_ARRAY_END(a)         (a) + (sizeof(a)/sizeof((a)[0]))
+#define C_ARRAY_END(a)         ((a) + C_ARRAY_LENGTH(a))
 
 #ifdef NEED_VA_COPY
 # ifdef va_copy
diff --git a/hv.c b/hv.c
index ef686ab..4577363 100644
--- a/hv.c
+++ b/hv.c
@@ -1259,7 +1259,7 @@ S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN 
newsize)
                  * and use the new low bit to decide if we insert at top,
                  * or next from top. IOW, we only rotate on a collision.*/
                 if (aep[j] && PL_HASH_RAND_BITS_ENABLED) {
-                    PL_hash_rand_bits+= ROTL_UV(HeHASH(entry), 17);
+                    PL_hash_rand_bits+= ROTL32(HeHASH(entry), 17);
                     PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
                     if (PL_hash_rand_bits & 1) {
                         HeNEXT(entry)= HeNEXT(aep[j]);
diff --git a/patchlevel.h b/patchlevel.h
index 3235cc3..3101f08 100644
--- a/patchlevel.h
+++ b/patchlevel.h
@@ -144,7 +144,7 @@ static const char * const local_patches[] = {
 
 /* Initial space prevents this variable from being inserted in config.sh  */
 #  define      LOCAL_PATCH_COUNT       \
-       ((int)(sizeof(local_patches)/sizeof(local_patches[0])-2))
+       ((int)(C_ARRAY_LENGTH(local_patches)-2))
 
 /* the old terms of reference, add them only when explicitly included */
 #define PATCHLEVEL             PERL_VERSION
diff --git a/perl.c b/perl.c
index 1b9b429..86fb8d9 100644
--- a/perl.c
+++ b/perl.c
@@ -677,7 +677,7 @@ perl_destruct(pTHXx)
                msg.msg_name = NULL;
                msg.msg_namelen = 0;
                msg.msg_iov = vec;
-               msg.msg_iovlen = sizeof(vec)/sizeof(vec[0]);
+               msg.msg_iovlen = C_ARRAY_LENGTH(vec);
 
                vec[0].iov_base = (void*)&target;
                vec[0].iov_len = sizeof(target);
diff --git a/regcomp.c b/regcomp.c
index eaee604..5cc4105 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -13443,12 +13443,12 @@ parseit:
                    e = strchr(RExC_parse++, '}');
                     if (!e)
                         vFAIL2("Missing right brace on \\%c{}", c);
-                   while (isSPACE(UCHARAT(RExC_parse)))
+                   while (isSPACE(*RExC_parse))
                        RExC_parse++;
                     if (e == RExC_parse)
                         vFAIL2("Empty \\%c{}", c);
                    n = e - RExC_parse;
-                   while (isSPACE(UCHARAT(RExC_parse + n - 1)))
+                   while (isSPACE(*(RExC_parse + n - 1)))
                        n--;
                }
                else {
@@ -13468,7 +13468,7 @@ parseit:
                           * that bit) */
                          value ^= 'P' ^ 'p';
 
-                        while (isSPACE(UCHARAT(RExC_parse))) {
+                        while (isSPACE(*RExC_parse)) {
                              RExC_parse++;
                              n--;
                         }
@@ -14662,7 +14662,8 @@ parseit:
      * at compile time.  Besides not inverting folded locale now, we can't
      * invert if there are things such as \w, which aren't known until runtime
      * */
-    if (invert
+    if (cp_list
+        && invert
         && ! (ANYOF_FLAGS(ret) & (ANYOF_LOCALE_FLAGS))
        && ! depends_list
        && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
diff --git a/regexec.c b/regexec.c
index 362390b..7d6827a 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3695,7 +3695,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const 
text_node, int *c1p,
         }
         else { /* an EXACTFish node which doesn't begin with a multi-char fold 
*/
             c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
-            if (c1 > 256) {
+            if (c1 > 255) {
                 /* Load the folds hash, if not already done */
                 SV** listp;
                 if (! PL_utf8_foldclosures) {
@@ -3748,10 +3748,10 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const 
text_node, int *c1p,
                         /* Folds that cross the 255/256 boundary are forbidden
                          * if EXACTFL (and isnt a UTF8 locale), or EXACTFA and
                          * one is ASCIII.  Since the pattern character is above
-                         * 256, and its only other match is below 256, the only
+                         * 255, and its only other match is below 256, the only
                          * legal match will be to itself.  We have thrown away
                          * the original, so have to compute which is the one
-                         * above 255 */
+                         * above 255. */
                         if ((c1 < 256) != (c2 < 256)) {
                             if ((OP(text_node) == EXACTFL
                                  && ! IN_UTF8_CTYPE_LOCALE)
@@ -3770,7 +3770,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const 
text_node, int *c1p,
                     }
                 }
             }
-            else /* Here, c1 is < 255 */
+            else /* Here, c1 is <= 255 */
                 if (utf8_target
                     && HAS_NONLATIN1_FOLD_CLOSURE(c1)
                     && ( ! (OP(text_node) == EXACTFL && ! 
IN_UTF8_CTYPE_LOCALE))
diff --git a/sv.c b/sv.c
index 1dbd3fe..c213919 100644
--- a/sv.c
+++ b/sv.c
@@ -4839,10 +4839,12 @@ Perl_sv_sethek(pTHX_ SV *const sv, const HEK *const hek)
 Tells an SV to use C<ptr> to find its string value.  Normally the
 string is stored inside the SV but sv_usepvn allows the SV to use an
 outside string.  The C<ptr> should point to memory that was allocated
-by C<malloc>.  It must be the start of a mallocked block
-of memory, and not a pointer to the middle of it.  The
-string length, C<len>, must be supplied.  By default
-this function will realloc (i.e. move) the memory pointed to by C<ptr>,
+by L<Newx|perlclib/Memory Management and String Handling>. It must be
+the start of a Newx-ed block of memory, and not a pointer to the
+middle of it (beware of L<OOK|perlguts/Offsets> and copy-on-write),
+and not be from a non-Newx memory allocator like C<malloc>. The
+string length, C<len>, must be supplied.  By default this function
+will C<Renew> (i.e. realloc, move) the memory pointed to by C<ptr>,
 so that pointer should not be freed or used by the programmer after
 giving it to sv_usepvn, and neither should any pointers from "behind"
 that pointer (e.g. ptr + 1) be used.
@@ -5574,7 +5576,7 @@ Perl_sv_magic(pTHX_ SV *const sv, SV *const obj, const 
int how,
 
     PERL_ARGS_ASSERT_SV_MAGIC;
 
-    if (how < 0 || (unsigned)how > C_ARRAY_LENGTH(PL_magic_data)
+    if (how < 0 || (unsigned)how >= C_ARRAY_LENGTH(PL_magic_data)
        || ((flags = PL_magic_data[how]),
            (vtable_index = flags & PERL_MAGIC_VTABLE_MASK)
            > magic_vtable_max))
@@ -12248,8 +12250,7 @@ Perl_ptr_table_store(pTHX_ PTR_TBL_t *const tbl, const 
void *const oldsv, void *
            new_arena->next = tbl->tbl_arena;
            tbl->tbl_arena = new_arena;
            tbl->tbl_arena_next = new_arena->array;
-           tbl->tbl_arena_end = new_arena->array
-               + sizeof(new_arena->array) / sizeof(new_arena->array[0]);
+           tbl->tbl_arena_end = C_ARRAY_END(new_arena->array);
        }
 
        tblent = tbl->tbl_arena_next++;
diff --git a/t/re/pat.t b/t/re/pat.t
index 04f8b84..81cb64b 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -20,7 +20,7 @@ BEGIN {
     require './test.pl';
 }
 
-plan tests => 721;  # Update this when adding/deleting tests.
+plan tests => 733;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -1580,7 +1580,31 @@ EOP
         like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works";
     }
 
+    {
+        # Verify that the very last Latin-1 U+00FF
+        # (LATIN SMALL LETTER Y WITH DIAERESIS)
+        # and its UPPER counterpart (U+0178 which is pure Unicode),
+        # and likewise for the very first pure Unicode
+        # (LATIN CAPITAL LETTER A WITH MACRON) fold-match properly,
+        # and there are no off-by-one logic errors in the transition zone.
+
+        ok("\xFF" =~ /\xFF/i, "Y WITH DIAERESIS l =~ l");
+        ok("\xFF" =~ /\x{178}/i, "Y WITH DIAERESIS l =~ u");
+        ok("\x{178}" =~ /\xFF/i, "Y WITH DIAERESIS u =~ l");
+        ok("\x{178}" =~ /\x{178}/i, "Y WITH DIAERESIS u =~ u");
 
+        # U+00FF with U+05D0 (non-casing Hebrew letter).
+        ok("\xFF\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS l =~ l");
+        ok("\xFF\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS l =~ u");
+        ok("\x{178}\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS u =~ l");
+        ok("\x{178}\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS u =~ u");
+
+        # U+0100.
+        ok("\x{100}" =~ /\x{100}/i, "A WITH MACRON u =~ u");
+        ok("\x{100}" =~ /\x{101}/i, "A WITH MACRON u =~ l");
+        ok("\x{101}" =~ /\x{100}/i, "A WITH MACRON l =~ u");
+        ok("\x{101}" =~ /\x{101}/i, "A WITH MACRON l =~ l");
+    }
 
 } # End of sub run_tests
 
diff --git a/universal.c b/universal.c
index bccc8fb..a29696d 100644
--- a/universal.c
+++ b/universal.c
@@ -1059,8 +1059,7 @@ Perl_boot_core_UNIVERSAL(pTHX)
     dVAR;
     static const char file[] = __FILE__;
     const struct xsub_details *xsub = details;
-    const struct xsub_details *end
-       = details + sizeof(details) / sizeof(details[0]);
+    const struct xsub_details *end = C_ARRAY_END(details);
 
     do {
        newXS_flags(xsub->name, xsub->xsub, file, xsub->proto, 0);
diff --git a/util.c b/util.c
index 0a0ee40..b90abe5 100644
--- a/util.c
+++ b/util.c
@@ -4576,8 +4576,8 @@ Perl_init_global_struct(pTHX)
 {
     struct perl_vars *plvarsp = NULL;
 # ifdef PERL_GLOBAL_STRUCT
-    const IV nppaddr = sizeof(Gppaddr)/sizeof(Perl_ppaddr_t);
-    const IV ncheck  = sizeof(Gcheck) /sizeof(Perl_check_t);
+    const IV nppaddr = C_ARRAY_LENGTH(Gppaddr);
+    const IV ncheck  = C_ARRAY_LENGTH(Gcheck);
 #  ifdef PERL_GLOBAL_STRUCT_PRIVATE
     /* PerlMem_malloc() because can't use even safesysmalloc() this early. */
     plvarsp = (struct perl_vars*)PerlMem_malloc(sizeof(struct perl_vars));

--
Perl5 Master Repository

Reply via email to