Currently PHP ships with two regular expression libraries that are both 
installed by default, PCRE & regex. The regex library that is responsible for 
ereg_* functions is fairly old and offers a very limited functionality 
compared to the PCRE library. In most cases the PCRE functions are also much 
faster then the old ereg functions.
I would like to propose that we drop the old ereg library and use only
a single regular expression library, PCRE. For BC purposes I've written a 
patch (see attached file), which emulates the old ereg_* functions for people 
who still rely on those, using PCRE.

This cleanup would mean we'd only need to maintain one set of regular 
expression code, which as far as code goes is pretty complex as well as give 
speed-up for people still using ereg. 
Perhaps, at some future point this would allow us to drop the ereg_* functions 
all together.

Ilia
Index: pcre/php_pcre.c
===================================================================
RCS file: /repository/php4/ext/pcre/php_pcre.c,v
retrieving revision 1.130
diff -u -3 -p -r1.130 php_pcre.c
--- pcre/php_pcre.c     24 Oct 2002 19:06:19 -0000      1.130
+++ pcre/php_pcre.c     31 Oct 2002 13:57:58 -0000
@@ -553,6 +553,110 @@ static void php_pcre_match(INTERNAL_FUNC
 }
 /* }}} */
 
+/* {{{ ereg_to_pcre_convert
+*/
+static inline zval *ereg_to_pcre_convert(zval **reg_expr, int case_sens)
+{
+       char *p, *pp;
+       int extra_len = 3;
+       zval *new_reg;
+       
+       if (case_sens) {
+               extra_len++;
+       }
+       
+       MAKE_STD_ZVAL(new_reg);
+       
+       Z_STRVAL_P(new_reg) = emalloc(Z_STRLEN_PP(reg_expr) * 2 + extra_len + 1);
+       Z_TYPE_P(new_reg) = IS_STRING;
+
+       pp = Z_STRVAL_PP(reg_expr);
+       p = Z_STRVAL_P(new_reg);
+
+       *p++ = '/';
+       while (*pp) {
+               if (*pp != '/') {
+                       *p++ = *pp;
+               } else {
+                       *p++ = '\\';
+                       *p++ = '/';
+                       extra_len++;
+               }
+               pp++;
+       }
+       
+       *p++ = '/';
+       if (case_sens) {
+               *p++ = 'i';
+       }
+       *p++ = 's';
+       *p = '\0';
+       
+       Z_STRLEN_P(new_reg) = Z_STRLEN_PP(reg_expr) + extra_len;
+       
+       return new_reg;
+}
+/* }}} */
+
+/* {{{ php_pcre_ereg_match
+*/
+static void php_pcre_ereg_match(INTERNAL_FUNCTION_PARAMETERS, int case_sens)
+{
+       zval **old_regex, **m_string, **subpats = NULL;
+       zval **args[3]; 
+       zval *retval, *pcre_func, *new_regx;
+
+       int argc = ZEND_NUM_ARGS();
+       
+       if ((argc != 2 && argc != 3) || (zend_get_parameters_ex(argc, &old_regex, 
+&m_string, &subpats) == FAILURE)) {
+               WRONG_PARAM_COUNT;      
+       }
+       
+       MAKE_STD_ZVAL(pcre_func);
+       ZVAL_STRING(pcre_func, "preg_match", 1);
+
+       convert_to_string_ex(old_regex);
+       
+       new_regx = ereg_to_pcre_convert(old_regex, case_sens);
+       
+       args[0] = &new_regx;
+       args[1] = m_string;
+       args[2] = subpats;
+       
+       if (call_user_function_ex(EG(function_table), NULL, pcre_func, &retval, argc, 
+args, 0, NULL TSRMLS_CC) == SUCCESS) {
+               if (Z_LVAL_P(retval)) {
+                       RETVAL_TRUE;
+               } else {
+                       RETVAL_FALSE;
+               }
+       } else {
+               RETVAL_FALSE;
+       }
+
+       zval_dtor(new_regx);
+       FREE_ZVAL(new_regx);
+       zval_dtor(pcre_func);
+       FREE_ZVAL(pcre_func);
+       FREE_ZVAL(retval);
+}
+/* }}} */
+
+/* {{{ proto int ereg(string pattern, string string [, array registers])
+   Regular expression match */
+PHP_FUNCTION(ereg)
+{
+       php_pcre_ereg_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
+}
+/* }}} */
+
+/* {{{ proto int eregi(string pattern, string string [, array registers])
+   Case-insensitive regular expression match */
+PHP_FUNCTION(eregi)
+{
+       php_pcre_ereg_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
+}
+/* }}} */
+
 /* {{{ proto int preg_match(string pattern, string subject [, array subpatterns [, 
int flags]])
    Perform a Perl-style regular expression match */
 PHP_FUNCTION(preg_match)
@@ -1102,6 +1206,62 @@ static void preg_replace_impl(INTERNAL_F
 }
 /* }}} */
 
+/* {{{ php_pcre_ereg_replace
+*/
+static void php_pcre_ereg_replace(INTERNAL_FUNCTION_PARAMETERS, int case_sens)
+{
+       zval **old_regex, **repl, **src_str;
+       zval **args[3]; 
+       zval *retval, *pcre_func, *new_regx;
+
+       int argc = ZEND_NUM_ARGS();
+       
+       if (argc != 3 || (zend_get_parameters_ex(argc, &old_regex, &repl, &src_str) == 
+FAILURE)) {
+               WRONG_PARAM_COUNT;      
+       }
+       
+       MAKE_STD_ZVAL(pcre_func);
+       ZVAL_STRING(pcre_func, "preg_replace", 1);
+
+       convert_to_string_ex(old_regex);
+       
+       new_regx = ereg_to_pcre_convert(old_regex, case_sens);
+       
+       args[0] = &new_regx;
+       args[1] = repl;
+       args[2] = src_str;
+       
+       if (call_user_function_ex(EG(function_table), NULL, pcre_func, &retval, argc, 
+args, 0, NULL TSRMLS_CC) == SUCCESS) {
+               RETVAL_STRINGL(Z_STRVAL_P(retval), Z_STRLEN_P(retval), 1);
+       } else {
+               RETVAL_FALSE;
+       }
+
+       zval_dtor(new_regx);
+       FREE_ZVAL(new_regx);
+       zval_dtor(pcre_func);
+       FREE_ZVAL(pcre_func);
+       zval_dtor(retval);
+       FREE_ZVAL(retval);
+}
+/* }}} */
+
+/* {{{ proto string ereg_replace(string pattern, string replacement, string string)
+   Replace regular expression */
+PHP_FUNCTION(ereg_replace)
+{
+       php_pcre_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
+}
+/* }}} */
+
+/* {{{ proto string eregi_replace(string pattern, string replacement, string string)
+   Case insensitive replace regular expression */
+PHP_FUNCTION(eregi_replace)
+{
+       php_pcre_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
+}
+/* }}} */
+
 /* {{{ proto string preg_replace(mixed regex, mixed replace, mixed subject [, int 
limit])
    Perform Perl-style regular expression replacement. */
 PHP_FUNCTION(preg_replace)
@@ -1279,6 +1439,68 @@ PHP_FUNCTION(preg_split)
 }
 /* }}} */
 
+/* {{{ php_pcre_ereg_split
+*/
+static void php_pcre_ereg_split(INTERNAL_FUNCTION_PARAMETERS, int case_sens)
+{
+       zval **old_regex, **str, **limit;
+       zval **args[3]; 
+       zval *retval, *pcre_func, *new_regx;
+
+       int argc = ZEND_NUM_ARGS();
+       
+       if ((argc != 3 && argc !=2) || (zend_get_parameters_ex(argc, &old_regex, &str, 
+&limit) == FAILURE)) {
+               WRONG_PARAM_COUNT;      
+       }
+       
+       MAKE_STD_ZVAL(pcre_func);
+       ZVAL_STRING(pcre_func, "preg_split", 1);
+
+       convert_to_string_ex(old_regex);
+       
+       new_regx = ereg_to_pcre_convert(old_regex, case_sens);
+       
+       args[0] = &new_regx;
+       args[1] = str;
+       args[2] = limit;
+       
+       if (call_user_function_ex(EG(function_table), NULL, pcre_func, &retval, argc, 
+args, 0, NULL TSRMLS_CC) == SUCCESS) {
+               if (Z_TYPE_P(retval) == IS_ARRAY) {
+                       *return_value = *retval;
+                       zval_copy_ctor(return_value);
+               } else {
+                       RETVAL_FALSE;
+               }
+       } else {
+               RETVAL_FALSE;
+       }
+
+       zval_dtor(new_regx);
+       FREE_ZVAL(new_regx);
+       zval_dtor(pcre_func);
+       FREE_ZVAL(pcre_func);
+       zval_dtor(retval);
+       FREE_ZVAL(retval);
+}
+/* }}} */
+
+/* {{{ proto array split(string pattern, string string [, int limit])
+   Split string into array by regular expression */
+PHP_FUNCTION(split)
+{
+       php_pcre_ereg_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
+}
+/* }}} */
+
+/* {{{ proto array spliti(string pattern, string string [, int limit])
+   Split string into array by regular expression case-insensitive */
+
+PHP_FUNCTION(spliti)
+{
+       php_pcre_ereg_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
+}
+/* }}} */
+
 /* {{{ proto string preg_quote(string str, string delim_char)
    Quote regular expression characters plus an optional character */
 PHP_FUNCTION(preg_quote)
@@ -1471,6 +1693,14 @@ function_entry pcre_functions[] = {
        PHP_FE(preg_split,                              NULL)
        PHP_FE(preg_quote,                              NULL)
        PHP_FE(preg_grep,                               NULL)
+/* the following are wrappers for old style regular expressions */     
+       PHP_FE(ereg,                                    third_arg_force_ref)
+       PHP_FE(eregi,                                   third_arg_force_ref)
+       PHP_FE(ereg_replace,                            NULL)
+       PHP_FE(eregi_replace,                           NULL)
+       PHP_FE(split,                                   NULL)
+       PHP_FE(spliti,                                  NULL)
+/* end of wrappers */  
        {NULL,          NULL,                           NULL}
 };
 
Index: pcre/php_pcre.h
===================================================================
RCS file: /repository/php4/ext/pcre/php_pcre.h,v
retrieving revision 1.33
diff -u -3 -p -r1.33 php_pcre.h
--- pcre/php_pcre.h     13 May 2002 17:28:36 -0000      1.33
+++ pcre/php_pcre.h     31 Oct 2002 13:57:58 -0000
@@ -40,6 +40,12 @@ PHP_FUNCTION(preg_replace_callback);
 PHP_FUNCTION(preg_split);
 PHP_FUNCTION(preg_quote);
 PHP_FUNCTION(preg_grep);
+PHP_FUNCTION(ereg);
+PHP_FUNCTION(eregi);
+PHP_FUNCTION(ereg_replace);
+PHP_FUNCTION(eregi_replace);
+PHP_FUNCTION(split);
+PHP_FUNCTION(spliti);
 
 PHPAPI char *php_pcre_replace(char *regex,   int regex_len, char *subject, int 
subject_len, zval *replace_val, int is_callable_replace, int *result_len, int limit 
TSRMLS_DC);
 PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *options);
Index: standard/basic_functions.c
===================================================================
RCS file: /repository/php4/ext/standard/basic_functions.c,v
retrieving revision 1.539
diff -u -3 -p -r1.539 basic_functions.c
--- standard/basic_functions.c  29 Oct 2002 23:35:49 -0000      1.539
+++ standard/basic_functions.c  31 Oct 2002 13:57:59 -0000
@@ -591,12 +591,6 @@ function_entry basic_functions[] = {
        PHP_FE(is_callable,                             third_arg_force_ref)
 
        /* functions from reg.c */
-       PHP_FE(ereg,                                    third_arg_force_ref)
-       PHP_FE(ereg_replace,                                                           
                                         NULL)
-       PHP_FE(eregi,                                   third_arg_force_ref)
-       PHP_FE(eregi_replace,                                                          
                                         NULL)
-       PHP_FE(split,                                                                  
                                                 NULL)
-       PHP_FE(spliti,                                                                 
                                                 NULL)
        PHP_FALIAS(join,                                implode,                       
                                         NULL)
        PHP_FE(sql_regcase,                                                            
                                                 NULL)
 
Index: standard/reg.c
===================================================================
RCS file: /repository/php4/ext/standard/reg.c,v
retrieving revision 1.66
diff -u -3 -p -r1.66 reg.c
--- standard/reg.c      25 Sep 2002 14:02:34 -0000      1.66
+++ standard/reg.c      31 Oct 2002 13:57:59 -0000
@@ -253,21 +253,17 @@ static void php_ereg(INTERNAL_FUNCTION_P
 }
 /* }}} */
 
-/* {{{ proto int ereg(string pattern, string string [, array registers])
-   Regular expression match */
+/*
 PHP_FUNCTION(ereg)
 {
        php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
 }
-/* }}} */
 
-/* {{{ proto int eregi(string pattern, string string [, array registers])
-   Case-insensitive regular expression match */
 PHP_FUNCTION(eregi)
 {
        php_ereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
 }
-/* }}} */
+*/
 
 /* {{{ php_reg_replace
  * this is the meat and potatoes of regex replacement! */
@@ -483,21 +479,16 @@ static void php_ereg_replace(INTERNAL_FU
 }
 /* }}} */
 
-/* {{{ proto string ereg_replace(string pattern, string replacement, string string)
-   Replace regular expression */
+/*
 PHP_FUNCTION(ereg_replace)
 {
        php_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
 }
-/* }}} */
-
-/* {{{ proto string eregi_replace(string pattern, string replacement, string string)
-   Case insensitive replace regular expression */
 PHP_FUNCTION(eregi_replace)
 {
        php_ereg_replace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
 }
-/* }}} */
+*/
 
 /* {{{ php_split
  */
@@ -592,23 +583,17 @@ static void php_split(INTERNAL_FUNCTION_
 }
 /* }}} */
 
-/* {{{ proto array split(string pattern, string string [, int limit])
-   Split string into array by regular expression */
+/*
 PHP_FUNCTION(split)
 {
        php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
 }
-/* }}} */
-
-/* {{{ proto array spliti(string pattern, string string [, int limit])
-   Split string into array by regular expression case-insensitive */
 
 PHP_FUNCTION(spliti)
 {
        php_split(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
 }
-
-/* }}} */
+*/
 
 /* {{{ proto string sql_regcase(string string)
    Make regular expression for case insensitive match */

-- 
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to