Control: tags -1 + patch Attached is a patch which I tested with the demo program at the end of sipXportLib/src/utl/UtlRegex.cpp.
Description: Port to PCRE2. Bug-Debian: https://bugs.debian.org/999960 Author: Yavor Doganov <ya...@gnu.org> Forwarded: no Last-Update: 2023-12-18 ---
--- sipxtapi-3.3.0~test18+dfsg.1.orig/configure.ac +++ sipxtapi-3.3.0~test18+dfsg.1/configure.ac @@ -59,7 +59,7 @@ CHECK_OPUS # PCRE is mandatory -PKG_CHECK_MODULES([DEPS_PCRE], [libpcre >= 4.5]) +PKG_CHECK_MODULES([DEPS_PCRE], [libpcre2-8]) # OpenSSL is mandatory, find it or configure fails: CHECK_SSL --- sipxtapi-3.3.0~test18+dfsg.1.orig/sipXportLib/include/utl/UtlRegex.h +++ sipxtapi-3.3.0~test18+dfsg.1/sipXportLib/include/utl/UtlRegex.h @@ -12,7 +12,8 @@ #define _REGEX_H #include <string.h> -#include <pcre.h> +#define PCRE2_CODE_UNIT_WIDTH 8 +#include <pcre2.h> #include "utl/UtlString.h" /** @@ -61,8 +62,8 @@ /// Compile a regular expression to create the matching object. RegEx( const char * regex, //< the regular expression - int options = 0, //< any sum of PCRE options bits - unsigned long int maxDepth = MAX_RECURSION // see MAX_RECURSION + uint32_t options = 0, //< any sum of PCRE options bits + uint32_t maxDepth = MAX_RECURSION // see MAX_RECURSION ); /**< * If compiling the regular expression fails, an error message string is @@ -71,7 +72,7 @@ */ /// Default maximum for the recursion depth in searches. - static const unsigned long int MAX_RECURSION; + static const uint32_t MAX_RECURSION; /**< * The PCRE internal match() function implements some searches by recursion. * This value is the default maximumm allowed depth for that recursion. It can @@ -115,7 +116,7 @@ ~RegEx(); /// Count the number of possible substrings returned by this expression - int SubStrings(void) const; + uint32_t SubStrings(void) const; /**< * SubStrings() @returns the number of substrings defined by * the regular expression. @@ -142,8 +143,8 @@ /// Search a string for matches to this regular expression bool Search( const char * subject, ///< the string to be searched for a match - int len = -1, ///< the length of the subject string - int options = 0 ///< sum of any PCRE options flags + PCRE2_SIZE len = -1, ///< the length of the subject string + uint32_t options = 0 ///< sum of any PCRE options flags ); /**< * Apply the regular expression to the subject string. @@ -156,9 +157,9 @@ /// Search a string starting at some offset for matches to this regular expression bool SearchAt(const char* subject, ///< the string to be searched for a match - int offset, ///< offset to begin search in subject string - int len = -1, ///< the length of the subject string - int options = 0 ///< sum of any PCRE options flags + PCRE2_SIZE offset, ///< offset to begin search in subject string + PCRE2_SIZE len = 0, ///< the length of the subject string + uint32_t options = 0 ///< sum of any PCRE options flags ); /**< * Apply the regular expression to the subject string, starting at the given offset. @@ -175,7 +176,7 @@ /// Repeat the last search operation, starting immediately after the previous match - bool SearchAgain( int options = 0 ///< sum of any PCRE options flags + bool SearchAgain( uint32_t options = 0 ///< sum of any PCRE options flags ); /**< * SearchAgain() applies the regular expression to the same @@ -432,17 +433,17 @@ void ClearMatchList(void); - pcre * re; + pcre2_code * re; + pcre2_general_context * gen_ctxt; + pcre2_match_context * match_ctxt; size_t re_size; - pcre_extra * pe; - bool allocated_study; - size_t study_size; - int substrcount; // maximum substrings in pattern + uint32_t substrcount; // maximum substrings in pattern const char * subjectStr; // original subject - int subjectLen; // original length - int lastStart; // offset of start for most recent Search or SearchAgain - int lastMatches; // pcre_exec return for most recent Search or SearchAgain - int * ovector; // results from (and workspace for) pcre_exec + PCRE2_SIZE subjectLen; // original length + PCRE2_SIZE lastStart; // offset of start for most recent Search or SearchAgain + int lastMatches; // pcre2_match return for most recent Search or SearchAgain + PCRE2_SIZE * ovector; // results from (and workspace for) pcre2_match + pcre2_match_data * md; // PCRE match data block const char * * matchlist;// string cache for Match }; --- sipxtapi-3.3.0~test18+dfsg.1.orig/sipXportLib/src/utl/UtlRegex.cpp +++ sipxtapi-3.3.0~test18+dfsg.1/sipXportLib/src/utl/UtlRegex.cpp @@ -25,59 +25,43 @@ # define SIPX_MAX_REGEX_RECURSION 800 #endif -const unsigned long int RegEx::MAX_RECURSION = SIPX_MAX_REGEX_RECURSION; - -// Some versions do not separately define a limit for stack recursion; -// for those, we must just limit the number of matches. -#ifndef PCRE_EXTRA_MATCH_LIMIT_RECURSION -# define PCRE_EXTRA_MATCH_LIMIT_RECURSION PCRE_EXTRA_MATCH_LIMIT -# define match_limit_recursion match_limit -#endif +const uint32_t RegEx::MAX_RECURSION = SIPX_MAX_REGEX_RECURSION; ///////////////////////////////// -RegEx::RegEx(const char * regex, int options, unsigned long int maxDepth) +RegEx::RegEx(const char * regex, uint32_t options, uint32_t maxDepth) { - const char* pcre_error; - int erroffset; + int pcre_error; + PCRE2_SIZE erroffset; + + gen_ctxt = pcre2_general_context_create(NULL, NULL, NULL); + match_ctxt = pcre2_match_context_create(gen_ctxt); // compile and study the expression - re = pcre_compile(regex, options, &pcre_error, &erroffset, NULL); + re = pcre2_compile((PCRE2_SPTR)regex, strlen(regex), options, + &pcre_error, &erroffset, NULL); if (re == NULL) { assert(!"Regular expression failed to compile!"); abort(); } - pe = pcre_study(re, 0, &pcre_error); - if ( pcre_error == NULL ) + if ( pcre_error >= 0 ) { // save the compilation block sizes for the copy constructor. - pcre_fullinfo(re, pe, PCRE_INFO_SIZE, &re_size); - pcre_fullinfo(re, pe, PCRE_INFO_STUDYSIZE, &study_size); - allocated_study = false; + pcre2_pattern_info(re, PCRE2_INFO_SIZE, &re_size); } else { re_size = 0; - study_size = 0; } - if (!pe) - { - // pcre_study didn't return any study data, - // but we need the pcre_extra block anyway for the recursion limit, - // so get one - pe = (pcre_extra*)pcre_malloc(sizeof(pcre_extra)); - memset(pe, 0, sizeof(pcre_extra)); - } - // set the maximum recursion depth option in the pcre_extra (pe) block - pe->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; - pe->match_limit_recursion = maxDepth; - + // set the maximum recursion depth option + pcre2_set_depth_limit(match_ctxt, maxDepth); + // allocate space for match results based on how many substrings // there are in the expression (+1 for the entire match) - pcre_fullinfo(re, pe, PCRE_INFO_CAPTURECOUNT, &substrcount); + pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &substrcount); substrcount++; - ovector = new int[3*substrcount]; + md = pcre2_match_data_create(substrcount*3, gen_ctxt); matchlist = NULL; }; @@ -85,47 +69,14 @@ RegEx::RegEx(const RegEx& regex) { // allocate memory for the compiled regular expression information - re = (pcre*)pcre_malloc(regex.re_size); + re = pcre2_code_copy(regex.re); if (re) { // copy the compiled regular expression information - memcpy(re, regex.re, regex.re_size); re_size = regex.re_size; - pe = NULL; - study_size = 0; - allocated_study = false; - if (regex.pe) // should always be true, because constructor allocates it - { - // allocate memory for the extra study information and recursion limit - pe = (pcre_extra*)pcre_malloc(sizeof(pcre_extra)); - if (pe) - { - // copy the extra information - memcpy(pe, regex.pe, sizeof(pcre_extra)) ; - - // copy any study information - if (regex.study_size > 0) - { - void* copied_study_data = pcre_malloc(regex.study_size); - - if (copied_study_data) - { - pe->study_data = copied_study_data; - memcpy(pe->study_data, regex.pe->study_data, regex.study_size) ; - study_size = regex.study_size; - allocated_study = true; - } - } - } - } - else - { - // no extra or study data to copy - // this should not happen because we always want the recursion limit - } substrcount = regex.substrcount; - ovector = new int[3*substrcount]; + md = pcre2_match_data_create(3*substrcount, gen_ctxt); matchlist = NULL; } else @@ -139,45 +90,37 @@ RegEx::~RegEx() { ClearMatchList(); - if (ovector != NULL) - { - delete [] ovector; - } - if (pe) - { - if (allocated_study && study_size) - { - pcre_free(pe->study_data); - } - pcre_free(pe); - } - pcre_free(re); + pcre2_match_data_free(md); + pcre2_code_free(re); + pcre2_match_context_free(match_ctxt); + pcre2_general_context_free(gen_ctxt); } ///////////////////////////////// -int RegEx::SubStrings(void) const +uint32_t RegEx::SubStrings(void) const { return substrcount; } ///////////////////////////////// -bool RegEx::Search(const char * subject, int len, int options) +bool RegEx::Search(const char * subject, PCRE2_SIZE len, uint32_t options) { ClearMatchList(); subjectStr = subject; lastStart = 0; subjectLen = (len >= 0) ? len : strlen(subject); - lastMatches = pcre_exec(re, pe, subjectStr, subjectLen, 0, options, ovector, 3*substrcount); + lastMatches = pcre2_match(re, (PCRE2_SPTR)subjectStr, subjectLen, + 0, options, md, match_ctxt); return lastMatches > 0; } ///////////////////////////////// bool RegEx::SearchAt(const char* subject, ///< the string to be searched for a match - int offset, ///< offset to begin search in subject string - int len, ///< offset to begin search in subject string - int options ///< sum of any PCRE options flags + PCRE2_SIZE offset, ///< offset to begin search in subject string + PCRE2_SIZE len, ///< offset to begin search in subject string + uint32_t options ///< sum of any PCRE options flags ) { /* @@ -189,21 +132,23 @@ subjectStr = subject; lastStart = 0; subjectLen = (len >= 0) ? len : strlen(subject); - lastMatches = pcre_exec(re, pe, subject, subjectLen, offset, options, ovector, 3*substrcount); + lastMatches = pcre2_match(re, (PCRE2_SPTR)subject, subjectLen, + offset, options, md, match_ctxt); return lastMatches > 0; } ///////////////////////////////// -bool RegEx::SearchAgain(int options) +bool RegEx::SearchAgain(uint32_t options) { ClearMatchList(); bool matched; + ovector = pcre2_get_ovector_pointer(md); lastStart = ovector[1]; if (lastStart < subjectLen) { - lastMatches = pcre_exec(re, pe, subjectStr, subjectLen, lastStart, options, - ovector, 3*substrcount); + lastMatches = pcre2_match(re, (PCRE2_SPTR)subjectStr, subjectLen, + lastStart, options, md, match_ctxt); matched = lastMatches > 0; } else @@ -248,8 +193,9 @@ if (lastMatches) // any matches in the last search? { - int startOffset = ovector[0]; // start of all of most recent match - if (lastStart < startOffset) // anything before the last match? + ovector = pcre2_get_ovector_pointer(md); + int startOffset = (int)ovector[0]; // start of all of most recent match + if ((int)lastStart < startOffset) // anything before the last match? { int length = startOffset - lastStart; if (NULL!=before) @@ -272,6 +218,7 @@ * May only be called after a successful call to Search() or SearchAgain() and applies to * the results of that call. */ + ovector = pcre2_get_ovector_pointer(md); return ( i < lastMatches ? ovector[(2*i)+1] : -1 @@ -284,8 +231,9 @@ if (lastMatches) // any matches in the last search? { - int endOffset = ovector[1]; // end of all of most recent match - if (endOffset < subjectLen) // anything after the last match? + ovector = pcre2_get_ovector_pointer(md); + int endOffset = (int)ovector[1]; // end of all of most recent match + if (endOffset < (int)subjectLen) // anything after the last match? { int length = subjectLen - endOffset; if (NULL!=after) @@ -302,6 +250,7 @@ { bool hadMatch = false; // assume no match + ovector = pcre2_get_ovector_pointer(md); if (i < lastMatches) // enough matches in the last search? { if (-1 == i) // return entire subject string @@ -314,10 +263,10 @@ } else { - int startOffset = ovector[i*2]; + int startOffset = (int)ovector[i*2]; if (0 <= startOffset) // did ith string match? { - int length = ovector[(i*2)+1] - startOffset; + int length = (int)ovector[(i*2)+1] - startOffset; if (0<length) { if (NULL!=matched) @@ -356,10 +305,11 @@ * @returns true if the last search had an n'th match, false if not */ assert(i < lastMatches); + ovector = pcre2_get_ovector_pointer(md); if (i <= lastMatches) { - offset = ovector[(2*i)]; - length = ovector[(2*i)+1] - ovector[(2*i)]; + offset = (int)ovector[(2*i)]; + length = (int)(ovector[(2*i)+1] - ovector[(2*i)]); i_matched = offset != -1; } @@ -376,8 +326,9 @@ ) { assert(i < lastMatches); + ovector = pcre2_get_ovector_pointer(md); return ( (i <= lastMatches) - ? ovector[(2*i)] + ? (int)ovector[(2*i)] : -1 ); } @@ -390,7 +341,7 @@ { if (matchlist == NULL) { - pcre_get_substring_list(subjectStr, ovector, substrcount, &matchlist); + pcre2_substring_list_get(md, (PCRE2_UCHAR ***)&matchlist, NULL); } return matchlist[i]; } @@ -406,7 +357,7 @@ { if (matchlist) { - pcre_free_substring_list(matchlist); + pcre2_substring_list_free((PCRE2_SPTR *)matchlist); matchlist = NULL; } } --- sipxtapi-3.3.0~test18+dfsg.1.orig/sipXportLib/src/Makefile.am +++ sipxtapi-3.3.0~test18+dfsg.1/sipXportLib/src/Makefile.am @@ -22,7 +22,7 @@ libsipXport_la_LIBADD = \ -lssl -lcrypto \ - -lpcre \ + -lpcre2-8 \ -ldl \ -lpthread \ -lstdc++