I've written the following patch to trunk to allow us to configure, compile and link against PCRE2 (10.x). The autoconf in particular is streamlined for cross-compilation detection, while retaining the ability to override the path to (and name of) pcre[2]-config.
It isn't in a commit-ready state due to t/TEST t/apache/expr.t failures (among others), and the defects appear to revolve around the way substring patterns are recorded. Attached the test failure cases (many similar test patterns do succeed, interestingly.) One test looks outright wrong. I'd rather not beat my head against these if the answer is blatantly obvious. If anyone has patience for exploring this further, any help is welcomed. Philip starts with this assertion; "The original, very widely deployed PCRE library, originally released in 1997, is at version 8.39, and the API and feature set are stable—future releases will be for bugfixes only. All new future features will be to PCRE2, not the original PCRE 8.x series." But he has gone on to state that many fuzzing error cases which are handled correctly in PCRE2 cannot be realistically fixed in PCRE 8.x. I've placed this up there with other parsing rewrites in httpd, that starting over is simply the correct answer, and I'd like to see if we can have httpd 3.0 choosing PCRE2 over PCRE in the near future (and perhaps backport this if we determine behavior is consistent.) Cheers, Bill
Index: configure.in =================================================================== --- configure.in (revision 1772810) +++ configure.in (working copy) @@ -223,18 +223,18 @@ AC_ARG_WITH(pcre, APACHE_HELP_STRING(--with-pcre=PATH,Use external PCRE library)) -AC_PATH_PROG(PCRE_CONFIG, pcre-config, false) -if test -d "$with_pcre" && test -x "$with_pcre/bin/pcre-config"; then - PCRE_CONFIG=$with_pcre/bin/pcre-config -elif test -x "$with_pcre"; then - PCRE_CONFIG=$with_pcre -fi +AC_CHECK_TARGET_TOOLS(PCRE_CONFIG, [pcre2-config pcre-config], + [`which $with_pcre 2>/dev/null`], + [$with_pcre/bin:$with_pcre]) -if test "$PCRE_CONFIG" != "false"; then +if test "x$PCRE_CONFIG" != "x"; then if $PCRE_CONFIG --version >/dev/null 2>&1; then :; else - AC_MSG_ERROR([Did not find pcre-config script at $PCRE_CONFIG]) + AC_MSG_ERROR([Did not find working script at $PCRE_CONFIG]) fi case `$PCRE_CONFIG --version` in + [1[0-9].*]) + AC_DEFINE(HAVE_PCRE2, 1, [Detected PCRE2]) + ;; [[1-5].*]) AC_MSG_ERROR([Need at least pcre version 6.7]) ;; @@ -244,10 +244,10 @@ esac AC_MSG_NOTICE([Using external PCRE library from $PCRE_CONFIG]) APR_ADDTO(PCRE_INCLUDES, [`$PCRE_CONFIG --cflags`]) - APR_ADDTO(PCRE_LIBS, [`$PCRE_CONFIG --libs`]) + APR_ADDTO(PCRE_LIBS, [`$PCRE_CONFIG --libs8 2>/dev/null || $PCRE_CONFIG --libs`]) APR_ADDTO(HTTPD_LIBS, [\$(PCRE_LIBS)]) else - AC_MSG_ERROR([pcre-config for libpcre not found. PCRE is required and available from http://pcre.org/]) + AC_MSG_ERROR([pcre(2)-config for libpcre not found. PCRE is required and available from http://pcre.org/]) fi APACHE_SUBST(PCRE_LIBS) Index: server/util_pcre.c =================================================================== --- server/util_pcre.c (revision 1772810) +++ server/util_pcre.c (working copy) @@ -46,10 +46,18 @@ #include "httpd.h" #include "apr_strings.h" #include "apr_tables.h" + +#ifdef HAVE_PCRE2 +#define PCRE2_CODE_UNIT_WIDTH 8 +#include "pcre2.h" +#define PCREn(x) PCRE2_ ## x +#else #include "pcre.h" +#define PCREn(x) PCRE_ ## x +#endif /* PCRE_DUPNAMES is only present since version 6.7 of PCRE */ -#ifndef PCRE_DUPNAMES +#if !defined(PCRE_DUPNAMES) && !defined(HAVE_PCRE2) #error PCRE Version 6.7 or later required! #else @@ -74,11 +82,19 @@ AP_DECLARE(const char *) ap_pcre_version_string(int which) { +#ifdef HAVE_PCRE2 + static char buf[80]; +#endif switch (which) { case AP_REG_PCRE_COMPILED: - return APR_STRINGIFY(PCRE_MAJOR) "." APR_STRINGIFY(PCRE_MINOR) " " APR_STRINGIFY(PCRE_DATE); + return APR_STRINGIFY(PCREn(MAJOR)) "." APR_STRINGIFY(PCREn(MINOR)) " " APR_STRINGIFY(PCREn(DATE)); case AP_REG_PCRE_LOADED: +#ifdef HAVE_PCRE2 + pcre2_config(PCRE2_CONFIG_VERSION, buf); + return buf; +#else return pcre_version(); +#endif default: return "Unknown"; } @@ -118,7 +134,11 @@ AP_DECLARE(void) ap_regfree(ap_regex_t *preg) { - (pcre_free)(preg->re_pcre); +#ifdef HAVE_PCRE2 + pcre2_code_free(preg->re_pcre); +#else + pcre_free(preg->re_pcre); +#endif } @@ -139,34 +159,46 @@ */ AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags) { +#ifdef HAVE_PCRE2 + size_t erroffset; +#else const char *errorptr; int erroffset; +#endif int errcode = 0; - int options = PCRE_DUPNAMES; + int options = PCREn(DUPNAMES); if ((cflags & AP_REG_ICASE) != 0) - options |= PCRE_CASELESS; + options |= PCREn(CASELESS); if ((cflags & AP_REG_NEWLINE) != 0) - options |= PCRE_MULTILINE; + options |= PCREn(MULTILINE); if ((cflags & AP_REG_DOTALL) != 0) - options |= PCRE_DOTALL; + options |= PCREn(DOTALL); - preg->re_pcre = - pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL); +#ifdef HAVE_PCRE2 + preg->re_pcre = pcre2_compile((const unsigned char *)pattern, + PCRE2_ZERO_TERMINATED, options, &errcode, + &erroffset, NULL); +#else + preg->re_pcre = pcre_compile2(pattern, options, &errcode, + &errorptr, &erroffset, NULL); +#endif + preg->re_erroffset = erroffset; - if (preg->re_pcre == NULL) { - /* - * There doesn't seem to be constants defined for compile time error - * codes. 21 is "failed to get memory" according to pcreapi(3). - */ + /* Internal ERR21 is "failed to get memory" according to pcreapi(3) */ if (errcode == 21) return AP_REG_ESPACE; return AP_REG_INVARG; } +#ifdef HAVE_PCRE2 + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_CAPTURECOUNT, &(preg->re_nsub)); +#else pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub)); + PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub)); +#endif return 0; } @@ -198,18 +230,27 @@ { int rc; int options = 0; - int *ovector = NULL; +#ifdef HAVE_PCRE2 + pcre2_match_data *matchdata = NULL; + size_t *ovector = NULL; +#else int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; int allocated_ovector = 0; + int *ovector = NULL; +#endif if ((eflags & AP_REG_NOTBOL) != 0) - options |= PCRE_NOTBOL; + options |= PCREn(NOTBOL); if ((eflags & AP_REG_NOTEOL) != 0) - options |= PCRE_NOTEOL; + options |= PCREn(NOTEOL); ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */ if (nmatch > 0) { +#ifdef HAVE_PCRE2 + matchdata = pcre2_match_data_create(nmatch, NULL); + ovector = pcre2_get_ovector_pointer(matchdata); +#else if (nmatch <= POSIX_MALLOC_THRESHOLD) { ovector = &(small_ovector[0]); } @@ -219,10 +260,17 @@ return AP_REG_ESPACE; allocated_ovector = 1; } +#endif } +#ifdef HAVE_PCRE2 + rc = pcre2_match((const pcre2_code *)preg->re_pcre, + (const unsigned char *)buff, len, + 0, options, matchdata, NULL); +#else rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len, 0, options, ovector, nmatch * 3); +#endif if (rc == 0) rc = nmatch; /* All captured slots were filled in */ @@ -233,39 +281,51 @@ pmatch[i].rm_so = ovector[i * 2]; pmatch[i].rm_eo = ovector[i * 2 + 1]; } - if (allocated_ovector) - free(ovector); for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; + } + +#ifdef HAVE_PCRE2 + if (matchdata) + pcre2_match_data_free(matchdata); +#else + if (allocated_ovector) + free(ovector); +#endif + + if (rc >= 0) { return 0; } - else { - if (allocated_ovector) - free(ovector); +#ifdef HAVE_PCRE2 + if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21) + return AP_REG_INVARG; +#endif switch (rc) { - case PCRE_ERROR_NOMATCH: + case PCREn(ERROR_NOMATCH): return AP_REG_NOMATCH; - case PCRE_ERROR_NULL: + case PCREn(ERROR_NULL): return AP_REG_INVARG; - case PCRE_ERROR_BADOPTION: + case PCREn(ERROR_BADOPTION): return AP_REG_INVARG; - case PCRE_ERROR_BADMAGIC: + case PCREn(ERROR_BADMAGIC): return AP_REG_INVARG; - case PCRE_ERROR_UNKNOWN_NODE: - return AP_REG_ASSERT; - case PCRE_ERROR_NOMEMORY: + case PCREn(ERROR_NOMEMORY): return AP_REG_ESPACE; -#ifdef PCRE_ERROR_MATCHLIMIT - case PCRE_ERROR_MATCHLIMIT: +#if defined(HAVE_PCRE2) || defined(PCRE_ERROR_MATCHLIMIT) + case PCREn(ERROR_MATCHLIMIT): return AP_REG_ESPACE; #endif -#ifdef PCRE_ERROR_BADUTF8 - case PCRE_ERROR_BADUTF8: +#if defined(PCRE_ERROR_UNKNOWN_NODE) + case PCRE_ERROR_UNKNOWN_NODE: + return AP_REG_ASSERT; +#endif +#if defined(PCRE_ERROR_BADUTF8) + case PCREn(ERROR_BADUTF8): return AP_REG_INVARG; #endif -#ifdef PCRE_ERROR_BADUTF8_OFFSET - case PCRE_ERROR_BADUTF8_OFFSET: +#if defined(PCRE_ERROR_BADUTF8_OFFSET) + case PCREn(ERROR_BADUTF8_OFFSET): return AP_REG_INVARG; #endif default: @@ -283,12 +343,21 @@ int i; char *nametable; - pcre_fullinfo((const pcre *)preg->re_pcre, NULL, +#ifdef HAVE_PCRE2 + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMECOUNT, &namecount); + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize); + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMETABLE, &nametable); +#else + pcren(fullinfo)((const pcre *)preg->re_pcre, NULL, PCRE_INFO_NAMECOUNT, &namecount); - pcre_fullinfo((const pcre *)preg->re_pcre, NULL, + pcren(fullinfo)((const pcre *)preg->re_pcre, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); - pcre_fullinfo((const pcre *)preg->re_pcre, NULL, + pcren(fullinfo)((const pcre *)preg->re_pcre, NULL, PCRE_INFO_NAMETABLE, &nametable); +#endif for (i = 0; i < namecount; i++) { const char *offset = nametable + i * nameentrysize;
# writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess " 'abc' =~ /bc/ " should evaluate to true, got false not ok 44 # writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess " 'abc' =~ /BC/i " should evaluate to true, got false not ok 45 # writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess " 'abc' !~ m!bc! " should evaluate to false, got true not ok 46 # writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess " 'abc' !~ m!BC!i " should evaluate to false, got true not ok 47 # writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess " 'abc' =~ /bc/ && $0 == '' " should evaluate to true, got false not ok 55 # writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess " 'abc' =~ /(bc)/ && 'xy' =~ /x/ && $0 == 'bc' " should evaluate to true, got false not ok 56 # writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess "%{TIME_YEAR} =~ /^\d{4}$/" should evaluate to true, got false not ok 57 # writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess "%{TIME} =~ /^\d{14}$/" should evaluate to true, got false not ok 64