Control: tags -1 + patch

Attached is a patch which I tested with the demo program at the end of
sipXportLib/src/utl/UtlRegex.cpp.
Description: Port to PCRE2.
Bug-Debian: https://bugs.debian.org/999960
Author: Yavor Doganov <ya...@gnu.org>
Forwarded: no
Last-Update: 2023-12-18
---

--- sipxtapi-3.3.0~test18+dfsg.1.orig/configure.ac
+++ sipxtapi-3.3.0~test18+dfsg.1/configure.ac
@@ -59,7 +59,7 @@
 CHECK_OPUS
 
 # PCRE is mandatory
-PKG_CHECK_MODULES([DEPS_PCRE], [libpcre >= 4.5])
+PKG_CHECK_MODULES([DEPS_PCRE], [libpcre2-8])
 
 # OpenSSL is mandatory, find it or configure fails:
 CHECK_SSL
--- sipxtapi-3.3.0~test18+dfsg.1.orig/sipXportLib/include/utl/UtlRegex.h
+++ sipxtapi-3.3.0~test18+dfsg.1/sipXportLib/include/utl/UtlRegex.h
@@ -12,7 +12,8 @@
 #define _REGEX_H
 
 #include <string.h>
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 #include "utl/UtlString.h"
 
 /**
@@ -61,8 +62,8 @@
    
    /// Compile a regular expression to create the matching object.
   RegEx( const char * regex, //< the regular expression
-         int               options = 0, //< any sum of PCRE options bits
-         unsigned long int maxDepth = MAX_RECURSION // see MAX_RECURSION
+         uint32_t          options = 0, //< any sum of PCRE options bits
+         uint32_t maxDepth = MAX_RECURSION // see MAX_RECURSION
          );
   /**<
    * If compiling the regular expression fails, an error message string is
@@ -71,7 +72,7 @@
    */
 
   /// Default maximum for the recursion depth in searches.
-  static const unsigned long int MAX_RECURSION;
+  static const uint32_t MAX_RECURSION;
   /**<
    * The PCRE internal match() function implements some searches by recursion.
    * This value is the default maximumm allowed depth for that recursion.  It 
can
@@ -115,7 +116,7 @@
   ~RegEx();
 
   /// Count the number of possible substrings returned by this expression
-  int SubStrings(void) const;
+  uint32_t SubStrings(void) const;
   /**<
    *   SubStrings() @returns the number of substrings defined by
    *   the regular expression.
@@ -142,8 +143,8 @@
 
   /// Search a string for matches to this regular expression
   bool Search( const char * subject,  ///< the string to be searched for a 
match
-               int len = -1,          ///< the length of the subject string
-               int options = 0        ///< sum of any PCRE options flags
+               PCRE2_SIZE len = -1,   ///< the length of the subject string
+               uint32_t options = 0   ///< sum of any PCRE options flags
                );
   /**<
    *    Apply the regular expression to the subject string.
@@ -156,9 +157,9 @@
 
   /// Search a string starting at some offset for matches to this regular 
expression
   bool SearchAt(const char* subject,  ///< the string to be searched for a 
match
-                int offset,           ///< offset to begin search in subject 
string
-                int len = -1,         ///< the length of the subject string
-                int options = 0       ///< sum of any PCRE options flags
+                PCRE2_SIZE offset,    ///< offset to begin search in subject 
string
+                PCRE2_SIZE len = 0,   ///< the length of the subject string
+                uint32_t options = 0  ///< sum of any PCRE options flags
                 );
   /**<
    *    Apply the regular expression to the subject string, starting at the 
given offset.
@@ -175,7 +176,7 @@
 
 
   /// Repeat the last search operation, starting immediately after the 
previous match
-  bool SearchAgain( int options = 0        ///< sum of any PCRE options flags
+  bool SearchAgain( uint32_t options = 0   ///< sum of any PCRE options flags
                    );
   /**<
    *    SearchAgain() applies the regular expression to the same
@@ -432,17 +433,17 @@
 
   void ClearMatchList(void);
 
-  pcre * re;
+  pcre2_code * re;
+  pcre2_general_context * gen_ctxt;
+  pcre2_match_context * match_ctxt;
   size_t re_size;
-  pcre_extra * pe;
-  bool allocated_study;
-  size_t  study_size;
-  int substrcount;         // maximum substrings in pattern
+  uint32_t substrcount;    // maximum substrings in pattern
   const char * subjectStr; // original subject
-  int subjectLen;          // original length
-  int lastStart;           // offset of start for most recent Search or 
SearchAgain
-  int lastMatches;         // pcre_exec return for most recent Search or 
SearchAgain
-  int * ovector;           // results from (and workspace for) pcre_exec
+  PCRE2_SIZE subjectLen;   // original length
+  PCRE2_SIZE lastStart;    // offset of start for most recent Search or 
SearchAgain
+  int lastMatches;         // pcre2_match return for most recent Search or 
SearchAgain
+  PCRE2_SIZE * ovector;    // results from (and workspace for) pcre2_match
+  pcre2_match_data * md;   // PCRE match data block
   const char * * matchlist;// string cache for Match
 };
 
--- sipxtapi-3.3.0~test18+dfsg.1.orig/sipXportLib/src/utl/UtlRegex.cpp
+++ sipxtapi-3.3.0~test18+dfsg.1/sipXportLib/src/utl/UtlRegex.cpp
@@ -25,59 +25,43 @@
 #   define SIPX_MAX_REGEX_RECURSION 800
 #endif
 
-const unsigned long int RegEx::MAX_RECURSION = SIPX_MAX_REGEX_RECURSION;
-
-// Some versions do not separately define a limit for stack recursion;
-// for those, we must just limit the number of matches.
-#ifndef   PCRE_EXTRA_MATCH_LIMIT_RECURSION
-#  define PCRE_EXTRA_MATCH_LIMIT_RECURSION PCRE_EXTRA_MATCH_LIMIT
-#  define match_limit_recursion match_limit
-#endif
+const uint32_t RegEx::MAX_RECURSION = SIPX_MAX_REGEX_RECURSION;
 
 /////////////////////////////////
-RegEx::RegEx(const char * regex, int options, unsigned long int maxDepth)
+RegEx::RegEx(const char * regex, uint32_t options, uint32_t maxDepth)
 {
-   const char*  pcre_error;
-   int          erroffset;
+   int          pcre_error;
+   PCRE2_SIZE   erroffset;
+
+   gen_ctxt = pcre2_general_context_create(NULL, NULL, NULL);
+   match_ctxt = pcre2_match_context_create(gen_ctxt);
 
    // compile and study the expression
-   re = pcre_compile(regex, options, &pcre_error, &erroffset, NULL);
+   re = pcre2_compile((PCRE2_SPTR)regex, strlen(regex), options,
+                      &pcre_error, &erroffset, NULL);
    if (re == NULL)
    {
       assert(!"Regular expression failed to compile!");
       abort();
    }
-   pe = pcre_study(re, 0, &pcre_error);
-   if ( pcre_error == NULL )
+   if ( pcre_error >= 0 )
    {
       // save the compilation block sizes for the copy constructor.
-      pcre_fullinfo(re, pe, PCRE_INFO_SIZE, &re_size);
-      pcre_fullinfo(re, pe, PCRE_INFO_STUDYSIZE, &study_size);
-      allocated_study = false;
+      pcre2_pattern_info(re, PCRE2_INFO_SIZE, &re_size);
    }
    else
    {
       re_size = 0;
-      study_size = 0;
    }
    
-   if (!pe)
-   {
-      // pcre_study didn't return any study data,
-      // but we need the pcre_extra block anyway for the recursion limit,
-      // so get one
-      pe = (pcre_extra*)pcre_malloc(sizeof(pcre_extra));
-      memset(pe, 0, sizeof(pcre_extra));
-   }
-   // set the maximum recursion depth option in the pcre_extra (pe) block
-   pe->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
-   pe->match_limit_recursion = maxDepth;
-      
+   // set the maximum recursion depth option
+   pcre2_set_depth_limit(match_ctxt, maxDepth);
+
    // allocate space for match results based on how many substrings
    // there are in the expression (+1 for the entire match)
-   pcre_fullinfo(re, pe, PCRE_INFO_CAPTURECOUNT, &substrcount);
+   pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &substrcount);
    substrcount++;
-   ovector = new int[3*substrcount];
+   md = pcre2_match_data_create(substrcount*3, gen_ctxt);
    matchlist = NULL;
 };
 
@@ -85,47 +69,14 @@
 RegEx::RegEx(const RegEx& regex)
 {
    // allocate memory for the compiled regular expression information
-   re = (pcre*)pcre_malloc(regex.re_size); 
+   re = pcre2_code_copy(regex.re);
    if (re)
    {
       // copy the compiled regular expression information
-      memcpy(re, regex.re, regex.re_size);
       re_size = regex.re_size;
          
-      pe = NULL;
-      study_size = 0;
-      allocated_study = false;
-      if (regex.pe) // should always be true, because constructor allocates it
-      {
-         // allocate memory for the extra study information and recursion limit
-         pe = (pcre_extra*)pcre_malloc(sizeof(pcre_extra));
-         if (pe)
-         {
-            // copy the extra information
-            memcpy(pe, regex.pe, sizeof(pcre_extra)) ;
-
-            // copy any study information
-            if (regex.study_size > 0)
-            {
-               void* copied_study_data = pcre_malloc(regex.study_size);
-
-               if (copied_study_data)
-               {
-                  pe->study_data = copied_study_data;
-                  memcpy(pe->study_data, regex.pe->study_data, 
regex.study_size) ;
-                  study_size = regex.study_size;
-                  allocated_study = true;
-               }
-            }
-         }
-      }
-      else
-      {
-         // no extra or study data to copy
-         // this should not happen because we always want the recursion limit
-      }
       substrcount = regex.substrcount;
-      ovector = new int[3*substrcount];
+      md = pcre2_match_data_create(3*substrcount, gen_ctxt);
       matchlist = NULL;
    }
    else
@@ -139,45 +90,37 @@
 RegEx::~RegEx()
 {
   ClearMatchList();
-  if (ovector != NULL)
-  {
-     delete [] ovector;
-  }
-  if (pe)
-  {
-     if (allocated_study && study_size)
-     {
-        pcre_free(pe->study_data);
-     }
-     pcre_free(pe);
-  }
-  pcre_free(re);
+  pcre2_match_data_free(md);
+  pcre2_code_free(re);
+  pcre2_match_context_free(match_ctxt);
+  pcre2_general_context_free(gen_ctxt);
 }
 
 /////////////////////////////////
-int RegEx::SubStrings(void) const
+uint32_t RegEx::SubStrings(void) const
 {
   return substrcount;
 }
 
 /////////////////////////////////
-bool RegEx::Search(const char * subject, int len, int options)
+bool RegEx::Search(const char * subject, PCRE2_SIZE len, uint32_t options)
 {
   ClearMatchList();
 
   subjectStr  = subject;
   lastStart   = 0;
   subjectLen  = (len >= 0) ? len : strlen(subject);
-  lastMatches = pcre_exec(re, pe, subjectStr, subjectLen, 0, options, ovector, 
3*substrcount);
+  lastMatches = pcre2_match(re, (PCRE2_SPTR)subjectStr, subjectLen,
+                            0, options, md, match_ctxt);
 
   return lastMatches > 0;
 }
 
 /////////////////////////////////
 bool RegEx::SearchAt(const char* subject,  ///< the string to be searched for 
a match
-                     int offset,           ///< offset to begin search in 
subject string
-                     int len,              ///< offset to begin search in 
subject string
-                     int options           ///< sum of any PCRE options flags
+                     PCRE2_SIZE offset,    ///< offset to begin search in 
subject string
+                     PCRE2_SIZE len,       ///< offset to begin search in 
subject string
+                     uint32_t options      ///< sum of any PCRE options flags
                      )
 {
    /*
@@ -189,21 +132,23 @@
    subjectStr  = subject;
    lastStart   = 0;
    subjectLen  = (len >= 0) ? len : strlen(subject);
-   lastMatches = pcre_exec(re, pe, subject, subjectLen, offset, options, 
ovector, 3*substrcount);
+   lastMatches = pcre2_match(re, (PCRE2_SPTR)subject, subjectLen,
+                             offset, options, md, match_ctxt);
 
    return lastMatches > 0;
 }
 
 /////////////////////////////////
-bool RegEx::SearchAgain(int options)
+bool RegEx::SearchAgain(uint32_t options)
 {
   ClearMatchList();
   bool matched;
+  ovector = pcre2_get_ovector_pointer(md);
   lastStart = ovector[1];
   if (lastStart < subjectLen)
   {
-     lastMatches = pcre_exec(re, pe, subjectStr, subjectLen, lastStart, 
options,
-                             ovector, 3*substrcount);
+     lastMatches = pcre2_match(re, (PCRE2_SPTR)subjectStr, subjectLen,
+                               lastStart, options, md, match_ctxt);
      matched = lastMatches > 0;
   }
   else
@@ -248,8 +193,9 @@
    
    if (lastMatches) // any matches in the last search?
    {
-      int startOffset = ovector[0]; // start of all of most recent match
-      if (lastStart < startOffset) // anything before the last match?
+      ovector = pcre2_get_ovector_pointer(md);
+      int startOffset = (int)ovector[0]; // start of all of most recent match
+      if ((int)lastStart < startOffset) // anything before the last match?
       {
          int length = startOffset - lastStart;
          if (NULL!=before)
@@ -272,6 +218,7 @@
     * May only be called after a successful call to Search() or SearchAgain() 
and applies to
     * the results of that call. 
     */
+   ovector = pcre2_get_ovector_pointer(md);
    return (  i < lastMatches
            ? ovector[(2*i)+1]
            : -1
@@ -284,8 +231,9 @@
    
    if (lastMatches) // any matches in the last search?
    {
-      int endOffset = ovector[1]; // end of all of most recent match
-      if (endOffset < subjectLen) // anything after the last match?
+      ovector = pcre2_get_ovector_pointer(md);
+      int endOffset = (int)ovector[1]; // end of all of most recent match
+      if (endOffset < (int)subjectLen) // anything after the last match?
       {
          int length = subjectLen - endOffset;
          if (NULL!=after)
@@ -302,6 +250,7 @@
 {
    bool hadMatch = false; // assume no match
    
+   ovector = pcre2_get_ovector_pointer(md);
    if (i < lastMatches) // enough matches in the last search?
    {
       if (-1 == i) // return entire subject string
@@ -314,10 +263,10 @@
       }
       else
       {
-         int startOffset = ovector[i*2];
+         int startOffset = (int)ovector[i*2];
          if (0 <= startOffset) // did ith string match?
          {
-            int length = ovector[(i*2)+1] - startOffset;
+            int length = (int)ovector[(i*2)+1] - startOffset;
             if (0<length)
             {
                if (NULL!=matched)
@@ -356,10 +305,11 @@
  * @returns true if the last search had an n'th match, false if not
  */
    assert(i < lastMatches);
+   ovector = pcre2_get_ovector_pointer(md);
    if (i <= lastMatches)
    {
-      offset = ovector[(2*i)];
-      length = ovector[(2*i)+1] - ovector[(2*i)];
+      offset = (int)ovector[(2*i)];
+      length = (int)(ovector[(2*i)+1] - ovector[(2*i)]);
 
       i_matched = offset != -1;
    }
@@ -376,8 +326,9 @@
                )
 {
    assert(i < lastMatches);
+   ovector = pcre2_get_ovector_pointer(md);
    return (  (i <= lastMatches)
-           ? ovector[(2*i)]
+           ? (int)ovector[(2*i)]
            : -1
            );
 }
@@ -390,7 +341,7 @@
    {
       if (matchlist == NULL)
       {
-         pcre_get_substring_list(subjectStr, ovector, substrcount, &matchlist);
+         pcre2_substring_list_get(md, (PCRE2_UCHAR ***)&matchlist, NULL);
       }
       return matchlist[i];
    }
@@ -406,7 +357,7 @@
 {
    if (matchlist)
    {
-      pcre_free_substring_list(matchlist);
+      pcre2_substring_list_free((PCRE2_SPTR *)matchlist);
       matchlist = NULL;
    }
 }
--- sipxtapi-3.3.0~test18+dfsg.1.orig/sipXportLib/src/Makefile.am
+++ sipxtapi-3.3.0~test18+dfsg.1/sipXportLib/src/Makefile.am
@@ -22,7 +22,7 @@
 
 libsipXport_la_LIBADD = \
        -lssl -lcrypto \
-       -lpcre \
+       -lpcre2-8 \
        -ldl \
        -lpthread  \
        -lstdc++

Reply via email to