I've written the following patch to trunk to allow us to configure, compile
and link against PCRE2 (10.x). The autoconf in particular is streamlined
for cross-compilation detection, while retaining the ability to override
the path to (and name of) pcre[2]-config.

It isn't in a commit-ready state due to t/TEST t/apache/expr.t failures
(among others), and the defects appear to revolve around the way substring
patterns are recorded.

Attached the test failure cases (many similar test patterns do succeed,
interestingly.) One test looks outright wrong. I'd rather not beat my head
against these if the answer is blatantly obvious.

If anyone has patience for exploring this further, any help is welcomed.
Philip starts with this assertion; "The original, very widely deployed PCRE
library, originally released in 1997, is at version 8.39, and the API and
feature set are stable—future releases will be for bugfixes only. All new
future features will be to PCRE2, not the original PCRE 8.x series." But he
has gone on to state that many fuzzing error cases which are handled
correctly in PCRE2 cannot be realistically fixed in PCRE 8.x. I've placed
this up there with other parsing rewrites in httpd, that starting over is
simply the correct answer, and I'd like to see if we can have httpd 3.0
choosing PCRE2 over PCRE in the near future (and perhaps backport this if
we determine behavior is consistent.)

Cheers,

Bill
Index: configure.in
===================================================================
--- configure.in	(revision 1772810)
+++ configure.in	(working copy)
@@ -223,18 +223,18 @@
 AC_ARG_WITH(pcre,
 APACHE_HELP_STRING(--with-pcre=PATH,Use external PCRE library))
 
-AC_PATH_PROG(PCRE_CONFIG, pcre-config, false)
-if test -d "$with_pcre" && test -x "$with_pcre/bin/pcre-config"; then
-   PCRE_CONFIG=$with_pcre/bin/pcre-config
-elif test -x "$with_pcre"; then
-   PCRE_CONFIG=$with_pcre
-fi
+AC_CHECK_TARGET_TOOLS(PCRE_CONFIG, [pcre2-config pcre-config],
+                      [`which $with_pcre 2>/dev/null`],
+                      [$with_pcre/bin:$with_pcre])
 
-if test "$PCRE_CONFIG" != "false"; then
+if test "x$PCRE_CONFIG" != "x"; then
   if $PCRE_CONFIG --version >/dev/null 2>&1; then :; else
-    AC_MSG_ERROR([Did not find pcre-config script at $PCRE_CONFIG])
+    AC_MSG_ERROR([Did not find working script at $PCRE_CONFIG])
   fi
   case `$PCRE_CONFIG --version` in
+  [1[0-9].*])
+    AC_DEFINE(HAVE_PCRE2, 1, [Detected PCRE2]) 
+    ;;
   [[1-5].*])
     AC_MSG_ERROR([Need at least pcre version 6.7])
     ;;
@@ -244,10 +244,10 @@
   esac
   AC_MSG_NOTICE([Using external PCRE library from $PCRE_CONFIG])
   APR_ADDTO(PCRE_INCLUDES, [`$PCRE_CONFIG --cflags`])
-  APR_ADDTO(PCRE_LIBS, [`$PCRE_CONFIG --libs`])
+  APR_ADDTO(PCRE_LIBS, [`$PCRE_CONFIG --libs8 2>/dev/null || $PCRE_CONFIG --libs`])
   APR_ADDTO(HTTPD_LIBS, [\$(PCRE_LIBS)])
 else
-  AC_MSG_ERROR([pcre-config for libpcre not found. PCRE is required and available from http://pcre.org/])
+  AC_MSG_ERROR([pcre(2)-config for libpcre not found. PCRE is required and available from http://pcre.org/])
 fi
 APACHE_SUBST(PCRE_LIBS)
 
Index: server/util_pcre.c
===================================================================
--- server/util_pcre.c	(revision 1772810)
+++ server/util_pcre.c	(working copy)
@@ -46,10 +46,18 @@
 #include "httpd.h"
 #include "apr_strings.h"
 #include "apr_tables.h"
+
+#ifdef HAVE_PCRE2
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include "pcre2.h"
+#define PCREn(x) PCRE2_ ## x
+#else
 #include "pcre.h"
+#define PCREn(x) PCRE_ ## x
+#endif
 
 /* PCRE_DUPNAMES is only present since version 6.7 of PCRE */
-#ifndef PCRE_DUPNAMES
+#if !defined(PCRE_DUPNAMES) && !defined(HAVE_PCRE2)
 #error PCRE Version 6.7 or later required!
 #else
 
@@ -74,11 +82,19 @@
 
 AP_DECLARE(const char *) ap_pcre_version_string(int which)
 {
+#ifdef HAVE_PCRE2
+    static char buf[80];
+#endif
     switch (which) {
     case AP_REG_PCRE_COMPILED:
-        return APR_STRINGIFY(PCRE_MAJOR) "." APR_STRINGIFY(PCRE_MINOR) " " APR_STRINGIFY(PCRE_DATE);
+        return APR_STRINGIFY(PCREn(MAJOR)) "." APR_STRINGIFY(PCREn(MINOR)) " " APR_STRINGIFY(PCREn(DATE));
     case AP_REG_PCRE_LOADED:
+#ifdef HAVE_PCRE2
+        pcre2_config(PCRE2_CONFIG_VERSION, buf);
+        return buf;
+#else
         return pcre_version();
+#endif
     default:
         return "Unknown";
     }
@@ -118,7 +134,11 @@
 
 AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
 {
-    (pcre_free)(preg->re_pcre);
+#ifdef HAVE_PCRE2
+    pcre2_code_free(preg->re_pcre);
+#else
+    pcre_free(preg->re_pcre);
+#endif
 }
 
 
@@ -139,34 +159,46 @@
 */
 AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags)
 {
+#ifdef HAVE_PCRE2
+    size_t erroffset;
+#else
     const char *errorptr;
     int erroffset;
+#endif
     int errcode = 0;
-    int options = PCRE_DUPNAMES;
+    int options = PCREn(DUPNAMES);
 
     if ((cflags & AP_REG_ICASE) != 0)
-        options |= PCRE_CASELESS;
+        options |= PCREn(CASELESS);
     if ((cflags & AP_REG_NEWLINE) != 0)
-        options |= PCRE_MULTILINE;
+        options |= PCREn(MULTILINE);
     if ((cflags & AP_REG_DOTALL) != 0)
-        options |= PCRE_DOTALL;
+        options |= PCREn(DOTALL);
 
-    preg->re_pcre =
-        pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL);
+#ifdef HAVE_PCRE2
+    preg->re_pcre = pcre2_compile((const unsigned char *)pattern,
+                                  PCRE2_ZERO_TERMINATED, options, &errcode,
+                                  &erroffset, NULL);
+#else
+    preg->re_pcre = pcre_compile2(pattern, options, &errcode,
+                                  &errorptr, &erroffset, NULL);
+#endif
+
     preg->re_erroffset = erroffset;
-
     if (preg->re_pcre == NULL) {
-        /*
-         * There doesn't seem to be constants defined for compile time error
-         * codes. 21 is "failed to get memory" according to pcreapi(3).
-         */
+        /* Internal ERR21 is "failed to get memory" according to pcreapi(3) */
         if (errcode == 21)
             return AP_REG_ESPACE;
         return AP_REG_INVARG;
     }
 
+#ifdef HAVE_PCRE2
+    pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+                       PCRE2_INFO_CAPTURECOUNT, &(preg->re_nsub));
+#else
     pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
-                   PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub));
+                  PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub));
+#endif
     return 0;
 }
 
@@ -198,18 +230,27 @@
 {
     int rc;
     int options = 0;
-    int *ovector = NULL;
+#ifdef HAVE_PCRE2
+    pcre2_match_data *matchdata = NULL;
+    size_t *ovector = NULL;
+#else
     int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
     int allocated_ovector = 0;
+    int *ovector = NULL;
+#endif
 
     if ((eflags & AP_REG_NOTBOL) != 0)
-        options |= PCRE_NOTBOL;
+        options |= PCREn(NOTBOL);
     if ((eflags & AP_REG_NOTEOL) != 0)
-        options |= PCRE_NOTEOL;
+        options |= PCREn(NOTEOL);
 
     ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1);    /* Only has meaning after compile */
 
     if (nmatch > 0) {
+#ifdef HAVE_PCRE2
+        matchdata = pcre2_match_data_create(nmatch, NULL);
+        ovector = pcre2_get_ovector_pointer(matchdata);
+#else
         if (nmatch <= POSIX_MALLOC_THRESHOLD) {
             ovector = &(small_ovector[0]);
         }
@@ -219,10 +260,17 @@
                 return AP_REG_ESPACE;
             allocated_ovector = 1;
         }
+#endif
     }
 
+#ifdef HAVE_PCRE2
+    rc = pcre2_match((const pcre2_code *)preg->re_pcre,
+                     (const unsigned char *)buff, len,
+                     0, options, matchdata, NULL);
+#else
     rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len,
                    0, options, ovector, nmatch * 3);
+#endif
 
     if (rc == 0)
         rc = nmatch;            /* All captured slots were filled in */
@@ -233,39 +281,51 @@
             pmatch[i].rm_so = ovector[i * 2];
             pmatch[i].rm_eo = ovector[i * 2 + 1];
         }
-        if (allocated_ovector)
-            free(ovector);
         for (; i < nmatch; i++)
             pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+    }
+
+#ifdef HAVE_PCRE2
+    if (matchdata)
+        pcre2_match_data_free(matchdata);
+#else
+    if (allocated_ovector)
+        free(ovector);
+#endif
+
+    if (rc >= 0) {
         return 0;
     }
-
     else {
-        if (allocated_ovector)
-            free(ovector);
+#ifdef HAVE_PCRE2
+        if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
+            return AP_REG_INVARG;
+#endif
         switch (rc) {
-        case PCRE_ERROR_NOMATCH:
+        case PCREn(ERROR_NOMATCH):
             return AP_REG_NOMATCH;
-        case PCRE_ERROR_NULL:
+        case PCREn(ERROR_NULL):
             return AP_REG_INVARG;
-        case PCRE_ERROR_BADOPTION:
+        case PCREn(ERROR_BADOPTION):
             return AP_REG_INVARG;
-        case PCRE_ERROR_BADMAGIC:
+        case PCREn(ERROR_BADMAGIC):
             return AP_REG_INVARG;
-        case PCRE_ERROR_UNKNOWN_NODE:
-            return AP_REG_ASSERT;
-        case PCRE_ERROR_NOMEMORY:
+        case PCREn(ERROR_NOMEMORY):
             return AP_REG_ESPACE;
-#ifdef PCRE_ERROR_MATCHLIMIT
-        case PCRE_ERROR_MATCHLIMIT:
+#if defined(HAVE_PCRE2) || defined(PCRE_ERROR_MATCHLIMIT)
+        case PCREn(ERROR_MATCHLIMIT):
             return AP_REG_ESPACE;
 #endif
-#ifdef PCRE_ERROR_BADUTF8
-        case PCRE_ERROR_BADUTF8:
+#if defined(PCRE_ERROR_UNKNOWN_NODE)
+        case PCRE_ERROR_UNKNOWN_NODE:
+            return AP_REG_ASSERT;
+#endif
+#if defined(PCRE_ERROR_BADUTF8)
+        case PCREn(ERROR_BADUTF8):
             return AP_REG_INVARG;
 #endif
-#ifdef PCRE_ERROR_BADUTF8_OFFSET
-        case PCRE_ERROR_BADUTF8_OFFSET:
+#if defined(PCRE_ERROR_BADUTF8_OFFSET)
+        case PCREn(ERROR_BADUTF8_OFFSET):
             return AP_REG_INVARG;
 #endif
         default:
@@ -283,12 +343,21 @@
     int i;
     char *nametable;
 
-    pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
+#ifdef HAVE_PCRE2
+    pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+                       PCRE2_INFO_NAMECOUNT, &namecount);
+    pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+                       PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize);
+    pcre2_pattern_info((const pcre2_code *)preg->re_pcre,
+                       PCRE2_INFO_NAMETABLE, &nametable);
+#else
+    pcren(fullinfo)((const pcre *)preg->re_pcre, NULL,
                        PCRE_INFO_NAMECOUNT, &namecount);
-    pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
+    pcren(fullinfo)((const pcre *)preg->re_pcre, NULL,
                        PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
-    pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
+    pcren(fullinfo)((const pcre *)preg->re_pcre, NULL,
                        PCRE_INFO_NAMETABLE, &nametable);
+#endif
 
     for (i = 0; i < namecount; i++) {
         const char *offset = nametable + i * nameentrysize;
# writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess
" 'abc' =~ /bc/ " should evaluate to true, got false
not ok 44
# writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess
" 'abc' =~ /BC/i " should evaluate to true, got false
not ok 45
# writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess
" 'abc' !~ m!bc! " should evaluate to false, got true
not ok 46
# writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess
" 'abc' !~ m!BC!i " should evaluate to false, got true
not ok 47
# writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess
" 'abc' =~ /bc/ && $0 == '' " should evaluate to true, got false
not ok 55
# writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess
" 'abc' =~ /(bc)/ && 'xy' =~ /x/ && $0 == 'bc' " should evaluate to true, got false
not ok 56
# writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess
"%{TIME_YEAR} =~ /^\d{4}$/" should evaluate to true, got false
not ok 57
# writing file: /home/wrowe/dev/test/test2x-apr20-ossl110/t/htdocs/apache/expr/.htaccess
"%{TIME} =~ /^\d{14}$/" should evaluate to true, got false
not ok 64

Reply via email to