In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/0275405c4fd00a8e74de313c3fd5a82c0a1ecbea?hp=6ef7fe531911e0b41ffcc04c1d6b6ec25a8b1bc9>

- Log -----------------------------------------------------------------
commit 0275405c4fd00a8e74de313c3fd5a82c0a1ecbea
Author: David Mitchell <da...@iabyn.com>
Date:   Tue Mar 12 11:05:48 2019 +0000

    fix paren buffer leak in (?|...)
    
    The (?| ... | ... |... ) alternation resets the paren index for each
    branch. This can trick the code in S_reg() into re-mallocing the open
    and close paren index buffers during every branch, leaking the old ones.
    
    This commit fixes the leak by introducing a new compilation variable,
    RExC_parens_buf_size, which records the actual number of slots malloced.
    
    By decoupling RExC_parens_buf_size from RExC_npar, it also means that
    the code can allocate a buffer larger than currently needed. This means
    that
        /(.)(.)(.)(.)/
    
    no longer does a malloc() and 3 reallocs(), but instead initially
    mallocs 10 slots, and if more than 10 captures are seen during
    compilation, reallocs() to 20, 40, 80 ....

-----------------------------------------------------------------------

Summary of changes:
 regcomp.c | 38 +++++++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 816d735f5b..658896f615 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -163,6 +163,7 @@ struct RExC_state_t {
     I32                seen_zerolen;
     regnode_offset *open_parens;       /* offsets to open parens */
     regnode_offset *close_parens;      /* offsets to close parens */
+    I32      parens_buf_size;           /* #slots malloced open/close_parens */
     regnode     *end_op;                /* END node in program */
     I32                utf8;           /* whether the pattern is utf8 or not */
     I32                orig_utf8;      /* whether the pattern was originally 
in utf8 */
@@ -253,6 +254,7 @@ struct RExC_state_t {
 #define RExC_maxlen        (pRExC_state->maxlen)
 #define RExC_npar      (pRExC_state->npar)
 #define RExC_total_parens      (pRExC_state->total_par)
+#define RExC_parens_buf_size   (pRExC_state->parens_buf_size)
 #define RExC_nestroot   (pRExC_state->nestroot)
 #define RExC_seen_zerolen      (pRExC_state->seen_zerolen)
 #define RExC_utf8      (pRExC_state->utf8)
@@ -7666,6 +7668,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int 
pat_count,
 
     RExC_naughty = 0;
     RExC_npar = 1;
+    RExC_parens_buf_size = 0;
     RExC_emit_start = RExC_rxi->program;
     pRExC_state->code_index = 0;
 
@@ -11975,31 +11978,44 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 
*flagp, U32 depth)
             if (! ALL_PARENS_COUNTED) {
                 /* If we are in our first pass through (and maybe only pass),
                  * we  need to allocate memory for the capturing parentheses
-                 * data structures.  Since we start at npar=1, when it reaches
-                 * 2, for the first time it has something to put in it.  Above
-                 * 2 means we extend what we already have */
-                if (RExC_npar == 2) {
+                 * data structures.
+                 */
+
+                if (!RExC_parens_buf_size) {
+                    /* first guess at number of parens we might encounter */
+                    RExC_parens_buf_size = 10;
+
                     /* setup RExC_open_parens, which holds the address of each
                      * OPEN tag, and to make things simpler for the 0 index the
                      * start of the program - this is used later for offsets */
-                    Newxz(RExC_open_parens, RExC_npar, regnode_offset);
+                    Newxz(RExC_open_parens, RExC_parens_buf_size,
+                            regnode_offset);
                     RExC_open_parens[0] = 1;    /* +1 for REG_MAGIC */
 
                     /* setup RExC_close_parens, which holds the address of each
                      * CLOSE tag, and to make things simpler for the 0 index
                      * the end of the program - this is used later for offsets
                      * */
-                    Newxz(RExC_close_parens, RExC_npar, regnode_offset);
+                    Newxz(RExC_close_parens, RExC_parens_buf_size,
+                            regnode_offset);
                     /* we dont know where end op starts yet, so we dont need to
                      * set RExC_close_parens[0] like we do RExC_open_parens[0]
                      * above */
                 }
-                else {
-                    Renew(RExC_open_parens, RExC_npar, regnode_offset);
-                    Zero(RExC_open_parens + RExC_npar - 1, 1, regnode_offset);
+                else if (RExC_npar > RExC_parens_buf_size) {
+                    I32 old_size = RExC_parens_buf_size;
+
+                    RExC_parens_buf_size *= 2;
+
+                    Renew(RExC_open_parens, RExC_parens_buf_size,
+                            regnode_offset);
+                    Zero(RExC_open_parens + old_size,
+                            RExC_parens_buf_size - old_size, regnode_offset);
 
-                    Renew(RExC_close_parens, RExC_npar, regnode_offset);
-                    Zero(RExC_close_parens + RExC_npar - 1, 1, regnode_offset);
+                    Renew(RExC_close_parens, RExC_parens_buf_size,
+                            regnode_offset);
+                    Zero(RExC_close_parens + old_size,
+                            RExC_parens_buf_size - old_size, regnode_offset);
                 }
             }
 

-- 
Perl5 Master Repository

Reply via email to