helly Thu Jul 20 21:19:05 2006 UTC
Modified files:
/php-src/ext/pcre php_pcre.c php_pcre.h
/php-src/ext/pcre/tests grep2.phpt match_flags3.phpt
preg_replace.phpt
preg_replace_callback2.phpt split.phpt
Log:
- Clean up (after consulting Andrei)
. Change the handlers SPL uses to php_pcre_*_impl(pcre_cache_entry*,....)
. All refactored funcs (match, split, replace, grep) use the above
. Change (zend|php)_error() to php_error_docref()
. Move from old to new param parsing api
. Fix memleaks in unicode mode
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.c?r1=1.187&r2=1.188&diff_format=u
Index: php-src/ext/pcre/php_pcre.c
diff -u php-src/ext/pcre/php_pcre.c:1.187 php-src/ext/pcre/php_pcre.c:1.188
--- php-src/ext/pcre/php_pcre.c:1.187 Sun Jul 16 19:20:05 2006
+++ php-src/ext/pcre/php_pcre.c Thu Jul 20 21:19:05 2006
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_pcre.c,v 1.187 2006/07/16 19:20:05 helly Exp $ */
+/* $Id: php_pcre.c,v 1.188 2006/07/20 21:19:05 helly Exp $ */
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -174,19 +174,20 @@
/* {{{ pcre_get_compiled_regex_cache
*/
-PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int
regex_len, pcre_extra **extra, int *preg_options, int *compile_options
TSRMLS_DC)
+PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int
regex_len TSRMLS_DC)
{
pcre *re = NULL;
- int coptions = 0;
- int soptions = 0;
- const char *error;
- int erroffset;
- char delimiter;
+ pcre_extra *extra;
+ int coptions = 0;
+ int soptions = 0;
+ const char *error;
+ int erroffset;
+ char delimiter;
char start_delimiter;
char end_delimiter;
- char *p, *pp;
+ char *p, *pp;
char *pattern;
- int do_study = 0;
+ int do_study = 0;
int poptions = 0;
unsigned const char *tables = NULL;
#if HAVE_SETLOCALE
@@ -209,9 +210,6 @@
#if HAVE_SETLOCALE
if (!strcmp(pce->locale, locale)) {
#endif
- *extra = pce->extra;
- *preg_options = pce->preg_options;
- *compile_options = pce->compile_options;
return pce;
#if HAVE_SETLOCALE
}
@@ -285,9 +283,6 @@
/* Move on to the options */
pp++;
- /* Clear out preg options */
- *preg_options = 0;
-
/* Parse through the options, setting appropriate flags. Display
a warning if we encounter an unknown modifier. */
while (*pp != 0) {
@@ -344,18 +339,17 @@
/* If study option was specified, study the pattern and
store the result in extra for passing to pcre_exec. */
if (do_study) {
- *extra = pcre_study(re, soptions, &error);
- if (*extra) {
- (*extra)->flags |= PCRE_EXTRA_MATCH_LIMIT |
PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+ extra = pcre_study(re, soptions, &error);
+ if (extra) {
+ extra->flags |= PCRE_EXTRA_MATCH_LIMIT |
PCRE_EXTRA_MATCH_LIMIT_RECURSION;
}
if (error != NULL) {
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "Error while
studying pattern");
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error
while studying pattern");
}
+ } else {
+ extra = NULL;
}
- *preg_options = poptions;
- *compile_options = coptions;
-
efree(pattern);
/*
@@ -370,7 +364,7 @@
/* Store the compiled pattern and extra info in the cache. */
new_entry.re = re;
- new_entry.extra = *extra;
+ new_entry.extra = extra;
new_entry.preg_options = poptions;
new_entry.compile_options = coptions;
#if HAVE_SETLOCALE
@@ -388,8 +382,14 @@
*/
PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int
*preg_options TSRMLS_DC)
{
- int compile_options;
- pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex,
strlen(regex), extra, preg_options, &compile_options TSRMLS_CC);
+ pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex,
strlen(regex) TSRMLS_CC);
+
+ if (extra) {
+ *extra = pce ? pce->extra : NULL;
+ }
+ if (preg_options) {
+ *preg_options = pce ? pce->preg_options : 0;
+ }
return pce ? pce->re : NULL;
}
@@ -399,7 +399,17 @@
*/
PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int
*preg_options, int *compile_options TSRMLS_DC)
{
- pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex,
strlen(regex), extra, preg_options, compile_options TSRMLS_CC);
+ pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex,
strlen(regex) TSRMLS_CC);
+
+ if (extra) {
+ *extra = pce ? pce->extra : NULL;
+ }
+ if (preg_options) {
+ *preg_options = pce ? pce->preg_options : 0;
+ }
+ if (compile_options) {
+ *compile_options = pce ? pce->compile_options : 0;
+ }
return pce ? pce->re : NULL;
}
@@ -429,72 +439,51 @@
static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{
*/
{
/* parameters */
- char *regex; /* Regular
expression */
- char *subject; /* String to match
against */
- int regex_len;
- int subject_len;
- pcre *re = NULL; /* Compiled
regular expression */
- pcre_extra *extra = NULL; /* Holds results of
studying pattern */
- zval *subpats = NULL; /* Array for
subpatterns */
- long flags; /* Match
control flags */
- int preg_options = 0; /* Custom preg
options */
- long start_offset = 0; /* Where the new search
starts */
- int subpats_order = 0; /* Order of
subpattern matches */
- int offset_capture = 0;/* Capture match
offsets: yes/no */
+ char *regex; /* Regular
expression */
+ char *subject; /* String to
match against */
+ int regex_len;
+ int subject_len;
+ pcre_cache_entry *pce; /* Compiled regular
expression */
+ zval *subpats = NULL; /* Array for
subpatterns */
+ long flags = 0; /* Match control flags
*/
+ long start_offset = 0; /* Where the new search
starts */
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ?
"ssz|ll" : "ss|zll"), ®ex, ®ex_len,
&subject,
&subject_len, &subpats, &flags, &start_offset) == FAILURE) {
RETURN_FALSE;
}
- if (global)
- subpats_order = PREG_PATTERN_ORDER;
-
- if (ZEND_NUM_ARGS() > 3) {
- offset_capture = flags & PREG_OFFSET_CAPTURE;
- /*
- * subpats_order is pre-set to pattern mode so we change it
only if
- * necessary.
- */
- if (flags & 0xff) {
- subpats_order = flags & 0xff;
- }
- if ((global && (subpats_order < PREG_PATTERN_ORDER ||
subpats_order > PREG_SET_ORDER)) ||
- (!global && subpats_order != 0)) {
- zend_error(E_WARNING, "Wrong value for parameter 4 in
call to %s()", get_active_function_name(TSRMLS_C));
- return;
- }
- }
-
/* Compile regex or get it from cache. */
- if ((re = pcre_get_compiled_regex(regex, &extra, &preg_options
TSRMLS_CC)) == NULL) {
+ if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC))
== NULL) {
RETURN_FALSE;
}
- php_pcre_match(re, extra, subject, subject_len, return_value, subpats,
- global, preg_options, start_offset, subpats_order,
offset_capture TSRMLS_CC);
+ php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
+ global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
}
-PHPAPI void php_pcre_match(pcre *re, pcre_extra *extra, char *subject, int
subject_len, zval *return_value,
- zval *subpats, int global, int preg_options, long start_offset, int
subpats_order, int offset_capture TSRMLS_DC)
+PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int
subject_len, zval *return_value,
+ zval *subpats, int global, int use_flags, long flags, long start_offset
TSRMLS_DC)
{
-
zval *result_set, /* Holds a set of
subpatterns after
a global match */
**match_sets = NULL; /* An array of sets of
matches for each
subpattern after a global match */
+ pcre_extra *extra = pce->extra;/* Holds results of
studying */
pcre_extra extra_data; /* Used locally for
exec options */
- int exoptions = 0; /* Execution
options */
- int count = 0; /*
Count of matched subpatterns */
- int *offsets; /*
Array of subpattern offsets */
+ int exoptions = 0; /* Execution
options */
+ int count = 0; /*
Count of matched subpatterns */
+ int *offsets; /*
Array of subpattern offsets */
int num_subpats; /* Number of
captured subpatterns */
- int size_offsets; /* Size of the
offsets array */
- int matched; /* Has
anything matched */
+ int size_offsets; /* Size of the
offsets array */
+ int matched; /* Has
anything matched */
int g_notempty = 0; /* If the match
should not be empty */
const char **stringlist; /* Holds list of
subpatterns */
char *match; /* The current
match */
char **subpat_names = NULL;/* Array for named subpatterns
*/
int i, rc;
+ int subpats_order; /* Order of
subpattern matches */
+ int offset_capture; /* Capture match
offsets: yes/no */
/* Overwrite the passed-in value for subpatterns with an empty array. */
if (subpats != NULL) {
@@ -502,6 +491,27 @@
array_init(subpats);
}
+ subpats_order = global ? PREG_PATTERN_ORDER : 0;
+
+ if (use_flags) {
+ offset_capture = flags & PREG_OFFSET_CAPTURE;
+
+ /*
+ * subpats_order is pre-set to pattern mode so we change it
only if
+ * necessary.
+ */
+ if (flags & 0xff) {
+ subpats_order = flags & 0xff;
+ }
+ if ((global && (subpats_order < PREG_PATTERN_ORDER ||
subpats_order > PREG_SET_ORDER)) ||
+ (!global && subpats_order != 0)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid
flags specified");
+ return;
+ }
+ } else {
+ offset_capture = 0;
+ }
+
/* Negative offset counts from the end of the string. */
if (start_offset < 0) {
start_offset = subject_len + start_offset;
@@ -518,10 +528,9 @@
extra->match_limit_recursion = PCRE_G(recursion_limit);
/* Calculate the size of the offsets array, and allocate memory for it.
*/
- rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
+ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT,
&num_subpats);
if (rc < 0) {
- php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d",
- get_active_function_name(TSRMLS_C), rc);
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal
pcre_fullinfo() error %d", rc);
RETURN_FALSE;
}
num_subpats++;
@@ -540,10 +549,9 @@
char *name_table;
unsigned short name_idx;
- rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
+ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT,
&name_cnt);
if (rc < 0) {
- php_error(E_WARNING, "%s: internal pcre_fullinfo()
error %d",
- get_active_function_name(TSRMLS_C),
rc);
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal
pcre_fullinfo() error %d", rc);
efree(offsets);
efree(subpat_names);
RETURN_FALSE;
@@ -552,12 +560,11 @@
int rc1, rc2;
long dummy_l;
double dummy_d;
- rc1 = pcre_fullinfo(re, extra, PCRE_INFO_NAMETABLE,
&name_table);
- rc2 = pcre_fullinfo(re, extra, PCRE_INFO_NAMEENTRYSIZE,
&name_size);
+ rc1 = pcre_fullinfo(pce->re, extra,
PCRE_INFO_NAMETABLE, &name_table);
+ rc2 = pcre_fullinfo(pce->re, extra,
PCRE_INFO_NAMEENTRYSIZE, &name_size);
rc = rc2 ? rc2 : rc1;
if (rc < 0) {
- php_error(E_WARNING, "%s: internal
pcre_fullinfo() error %d",
-
get_active_function_name(TSRMLS_C), rc);
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
"Internal pcre_fullinfo() error %d", rc);
efree(offsets);
efree(subpat_names);
RETURN_FALSE;
@@ -567,7 +574,7 @@
name_idx = 0xff * name_table[0] + name_table[1];
subpat_names[name_idx] = name_table + 2;
if (is_numeric_string(subpat_names[name_idx],
strlen(subpat_names[name_idx]), &dummy_l, &dummy_d, 0) > 0) {
- php_error(E_WARNING, "%s: numeric named
subpatterns are not allowed", get_active_function_name(TSRMLS_C));
+ php_error_docref(NULL TSRMLS_CC,
E_WARNING, "Numeric named subpatterns are not allowed");
efree(offsets);
efree(subpat_names);
RETURN_FALSE;
@@ -593,12 +600,12 @@
do {
/* Execute the regular expression. */
- count = pcre_exec(re, extra, subject, subject_len, start_offset,
+ count = pcre_exec(pce->re, extra, subject, subject_len,
start_offset,
exoptions|g_notempty,
offsets, size_offsets);
/* Check for too many substrings condition. */
if (count == 0) {
- zend_error(E_NOTICE, "Matched, but too many
substrings");
+ php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched,
but too many substrings");
count = size_offsets/3;
}
@@ -613,8 +620,7 @@
if (pcre_get_substring_list(subject, offsets,
count, &stringlist) < 0) {
efree(subpat_names);
efree(offsets);
- efree(re);
- zend_error(E_WARNING, "Get subpatterns
list failed");
+ php_error_docref(NULL TSRMLS_CC,
E_WARNING, "Get subpatterns list failed");
RETURN_FALSE;
}
@@ -843,7 +849,7 @@
/* If found a backreference.. */
if ('\\' == *walk || '$' == *walk) {
smart_str_appendl(&code, segment, walk - segment);
- if (walk_last == '\\') {
+ if (walk_last == '\\') {
code.c[code.len-1] = *walk++;
segment = walk;
walk_last = 0;
@@ -912,14 +918,26 @@
zval *replace_val,
int is_callable_replace,
int *result_len, int
limit, int *replace_count TSRMLS_DC)
{
- pcre *re = NULL; /* Compiled
regular expression */
- pcre_extra *extra = NULL; /* Holds results of
studying */
+ pcre_cache_entry *pce; /* Compiled regular
expression */
+
+ /* Compile regex or get it from cache. */
+ if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC))
== NULL) {
+ return NULL;
+ }
+
+ return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
+ is_callable_replace, result_len, limit, replace_count
TSRMLS_DC);
+}
+
+PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int
subject_len, zval *replace_val,
+ int is_callable_replace, int *result_len, int limit, int *replace_count
TSRMLS_DC)
+{
+ pcre_extra *extra = pce->extra;/* Holds results of
studying */
pcre_extra extra_data; /* Used locally for
exec options */
- int exoptions = 0; /* Execution
options */
- int preg_options = 0; /* Custom preg
options */
- int count = 0; /*
Count of matched subpatterns */
- int *offsets; /*
Array of subpattern offsets */
- int size_offsets; /* Size of the
offsets array */
+ int exoptions = 0; /* Execution
options */
+ int count = 0; /*
Count of matched subpatterns */
+ int *offsets; /*
Array of subpattern offsets */
+ int size_offsets; /* Size of the
offsets array */
int new_len; /*
Length of needed storage */
int alloc_len; /*
Actual allocated length */
int eval_result_len=0; /* Length of
the eval'ed or
@@ -942,11 +960,6 @@
walk_last; /* Last
walked character */
int rc;
- /* Compile regex or get it from cache. */
- if ((re = pcre_get_compiled_regex(regex, &extra, &preg_options
TSRMLS_CC)) == NULL) {
- return NULL;
- }
-
if (extra == NULL) {
extra_data.flags = PCRE_EXTRA_MATCH_LIMIT |
PCRE_EXTRA_MATCH_LIMIT_RECURSION;
extra = &extra_data;
@@ -954,10 +967,10 @@
extra->match_limit = PCRE_G(backtrack_limit);
extra->match_limit_recursion = PCRE_G(recursion_limit);
- eval = preg_options & PREG_REPLACE_EVAL;
+ eval = pce->preg_options & PREG_REPLACE_EVAL;
if (is_callable_replace) {
if (eval) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "/e
modifier cannot be used with replacement callback");
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier
/e cannot be used with replacement callback");
return NULL;
}
} else {
@@ -967,10 +980,9 @@
}
/* Calculate the size of the offsets array, and allocate memory for it.
*/
- rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
+ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT,
&size_offsets);
if (rc < 0) {
- php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d",
- get_active_function_name(TSRMLS_C), rc);
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal
pcre_fullinfo() error %d", rc);
return NULL;
}
size_offsets = (size_offsets + 1) * 3;
@@ -987,7 +999,7 @@
while (1) {
/* Execute the regular expression. */
- count = pcre_exec(re, extra, subject, subject_len, start_offset,
+ count = pcre_exec(pce->re, extra, subject, subject_len,
start_offset,
exoptions|g_notempty,
offsets, size_offsets);
/* Check for too many substrings condition. */
@@ -1004,7 +1016,7 @@
}
/* Set the match location in subject */
match = subject + offsets[0];
-
+
new_len = *result_len + offsets[0] - start_offset; /*
part before the match */
/* If evaluating, do it and add the return string's
length */
@@ -1210,13 +1222,13 @@
} else {
result = php_pcre_replace(Z_STRVAL_P(regex),
Z_STRLEN_P(regex),
-
Z_STRVAL_PP(subject),
+
Z_STRVAL_PP(subject),
Z_STRLEN_PP(subject),
replace,
is_callable_replace,
result_len,
limit,
- replace_count TSRMLS_CC);
+ replace_count
TSRMLS_CC);
return result;
}
}
@@ -1256,7 +1268,7 @@
convert_to_string_ex(replace);
if (is_callable_replace) {
if (!zend_is_callable(*replace, 0, &callback_name)) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "requires
argument 2, '%R', to be a valid callback", Z_TYPE(callback_name),
Z_UNIVAL(callback_name));
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires
argument 2, '%R', to be a valid callback", Z_TYPE(callback_name),
Z_UNIVAL(callback_name));
zval_dtor(&callback_name);
*return_value = **subject;
zval_copy_ctor(return_value);
@@ -1292,6 +1304,10 @@
/* Add to return array */
switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key,
0))
{
+ case HASH_KEY_IS_UNICODE:
+
add_u_assoc_stringl(return_value, IS_UNICODE, string_key, result, result_len,
0);
+ break;
+
case HASH_KEY_IS_STRING:
add_assoc_stringl(return_value,
string_key.s, result, result_len, 0);
break;
@@ -1337,63 +1353,39 @@
Split string into an array using a perl-style regular expression as a
delimiter */
PHP_FUNCTION(preg_split)
{
- zval **regex, /* Regular
expression to split by */
- **subject, /* Subject
string to split */
- **limit, /*
Number of pieces to return */
- **flags;
- int preg_options = 0; /* Custom preg
options */
- int coptions = 0; /* Custom preg options
*/
- int argc; /*
Argument count */
- int limit_val = -1; /* Integer
value of limit */
- int no_empty = 0; /* If NO_EMPTY
flag is set */
- int delim_capture = 0; /* If delimiters
should be captured */
- int offset_capture = 0;/* If offsets
should be captured */
- pcre *re; /* Compiled
regular expression */
- pcre_extra *extra = NULL; /* Holds results of
studying */
+ char *regex; /* Regular
expression */
+ char *subject; /* String to
match against */
+ int regex_len;
+ int subject_len;
+ long limit_val = -1;/* Integer value of
limit */
+ long flags = 0; /* Match
control flags */
+ pcre_cache_entry *pce; /* Compiled regular
expression */
/* Get function parameters and do error checking */
- argc = ZEND_NUM_ARGS();
- if (argc < 2 || argc > 4 || zend_get_parameters_ex(argc, ®ex,
&subject, &limit, &flags) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
-
- if (argc > 2) {
- convert_to_long_ex(limit);
- limit_val = Z_LVAL_PP(limit);
- if (limit_val == 0)
- limit_val = -1;
-
- if (argc > 3) {
- convert_to_long_ex(flags);
- no_empty = Z_LVAL_PP(flags) & PREG_SPLIT_NO_EMPTY;
- delim_capture = Z_LVAL_PP(flags) &
PREG_SPLIT_DELIM_CAPTURE;
- offset_capture = Z_LVAL_PP(flags) &
PREG_SPLIT_OFFSET_CAPTURE;
- }
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex,
®ex_len,
+ &subject,
&subject_len, &limit_val, &flags) == FAILURE) {
+ RETURN_FALSE;
}
- /* Make sure we're dealing with strings */
- convert_to_string_ex(regex);
- convert_to_string_ex(subject);
-
/* Compile regex or get it from cache. */
- if ((re = pcre_get_compiled_regex_ex(Z_STRVAL_PP(regex), &extra,
&preg_options, &coptions TSRMLS_CC)) == NULL) {
+ if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC))
== NULL) {
RETURN_FALSE;
}
- php_pcre_split(re, extra, Z_STRVAL_PP(subject), Z_STRLEN_PP(subject),
return_value,
- coptions, limit_val, no_empty, delim_capture, offset_capture
TSRMLS_CC);
+ php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val,
flags TSRMLS_CC);
}
/* {{{ php_pcre_split
*/
-PHPAPI void php_pcre_split(pcre *re, pcre_extra *extra, char *subject, int
subject_len, zval *return_value,
- int coptions, int limit_val, int no_empty, int delim_capture, int
offset_capture TSRMLS_DC)
+PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int
subject_len, zval *return_value,
+ long limit_val, long flags TSRMLS_DC)
{
+ pcre_extra *extra = NULL; /* Holds results of
studying */
pcre *re_bump = NULL; /* Regex instance for
empty matches */
pcre_extra *extra_bump = NULL; /* Almost dummy */
pcre_extra extra_data; /* Used locally for
exec options */
- int *offsets; /*
Array of subpattern offsets */
- int size_offsets; /* Size of the
offsets array */
+ int *offsets; /*
Array of subpattern offsets */
+ int size_offsets; /* Size of the
offsets array */
int exoptions = 0; /* Execution
options */
int count = 0; /*
Count of matched subpatterns */
int start_offset; /* Where the
new search starts */
@@ -1402,6 +1394,17 @@
char *match, /* The current
match */
*last_match; /* Location of
last match */
int rc;
+ int no_empty; /* If
NO_EMPTY flag is set */
+ int delim_capture; /* If
delimiters should be captured */
+ int offset_capture; /* If offsets
should be captured */
+
+ no_empty = flags & PREG_SPLIT_NO_EMPTY;
+ delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
+ offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
+
+ if (limit_val == 0) {
+ limit_val = -1;
+ }
if (extra == NULL) {
extra_data.flags = PCRE_EXTRA_MATCH_LIMIT |
PCRE_EXTRA_MATCH_LIMIT_RECURSION;
@@ -1414,10 +1417,9 @@
array_init(return_value);
/* Calculate the size of the offsets array, and allocate memory for it.
*/
- rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
+ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT,
&size_offsets);
if (rc < 0) {
- php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d",
- get_active_function_name(TSRMLS_C), rc);
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal
pcre_fullinfo() error %d", rc);
RETURN_FALSE;
}
size_offsets = (size_offsets + 1) * 3;
@@ -1432,7 +1434,7 @@
/* Get next piece if no limit or limit not yet reached and something
matched*/
while ((limit_val == -1 || limit_val > 1)) {
- count = pcre_exec(re, extra, subject,
+ count = pcre_exec(pce->re, extra, subject,
subject_len, start_offset,
exoptions|g_notempty,
offsets, size_offsets);
@@ -1452,7 +1454,7 @@
/* Add (match, offset) pair to the
return value */
add_offset_pair(return_value,
last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
} else {
- /* Add the piece to the return value */
+ /* Add the piece to the return value */
add_next_index_stringl(return_value,
last_match,
&subject[offsets[0]]-last_match, 1);
}
@@ -1463,7 +1465,7 @@
}
last_match = &subject[offsets[1]];
- next_offset = offsets[1];
+ next_offset = offsets[1];
if (delim_capture) {
int i, match_len;
@@ -1487,7 +1489,7 @@
the start offset, and continue. Fudge the offset
values
to achieve this, unless we're already at the end of
the string. */
if (g_notempty != 0 && start_offset < subject_len) {
- if (coptions & PCRE_UTF8) {
+ if (pce->compile_options & PCRE_UTF8) {
if (re_bump == NULL) {
int dummy;
@@ -1542,15 +1544,16 @@
}
/* }}} */
-/* {{{ proto string preg_quote(string str, string delim_char)
+/* {{{ proto string preg_quote(string str [, string delim_char])
Quote regular expression characters plus an optional character */
PHP_FUNCTION(preg_quote)
{
- zval **in_str_arg; /* Input string argument */
- zval **delim; /* Additional delimiter argument */
- char *in_str, /* Input string */
- *in_str_end, /* End of the input string */
- *out_str, /* Output string with quoted
characters */
+ int in_str_len;
+ char *in_str; /* Input string argument */
+ char *in_str_end; /* End of the input string */
+ int delim_len;
+ char *delim = NULL; /* Additional delimiter argument */
+ char *out_str, /* Output string with quoted characters
*/
*p, /* Iterator for input
string */
*q, /* Iterator for output
string */
delim_char=0, /* Delimiter character to be quoted */
@@ -1558,32 +1561,26 @@
zend_bool quote_delim = 0; /* Whether to quote additional delim char */
/* Get the arguments and check for errors */
- if (ZEND_NUM_ARGS() < 1 || ZEND_NUM_ARGS() > 2 ||
- zend_get_parameters_ex(ZEND_NUM_ARGS(), &in_str_arg, &delim) ==
FAILURE) {
- WRONG_PARAM_COUNT;
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str,
&in_str_len,
+ &delim, &delim_len)
== FAILURE) {
+ return;
}
- /* Make sure we're working with strings */
- convert_to_string_ex(in_str_arg);
- in_str = Z_STRVAL_PP(in_str_arg);
- in_str_end = Z_STRVAL_PP(in_str_arg) + Z_STRLEN_PP(in_str_arg);
+ in_str_end = in_str + in_str_len;
/* Nothing to do if we got an empty string */
if (in_str == in_str_end) {
RETURN_EMPTY_STRING();
}
- if (ZEND_NUM_ARGS() == 2) {
- convert_to_string_ex(delim);
- if (Z_STRLEN_PP(delim) > 0) {
- delim_char = Z_STRVAL_PP(delim)[0];
- quote_delim = 1;
- }
+ if (delim && *delim) {
+ delim_char = delim[0];
+ quote_delim = 1;
}
/* Allocate enough memory so that even if each character
is quoted, we won't run out of room */
- out_str = safe_emalloc(4, Z_STRLEN_PP(in_str_arg), 1);
+ out_str = safe_emalloc(4, in_str_len, 1);
/* Go through the string and quote necessary characters */
for(p = in_str, q = out_str; p != in_str_end; p++) {
@@ -1633,54 +1630,47 @@
}
/* }}} */
-/* {{{ proto array preg_grep(string regex, array input)
+/* {{{ proto array preg_grep(string regex, array input [, int flags])
Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)
{
- zval **regex, /* Regular
expression */
- **input, /*
Input array */
- **flags,
- **entry; /* An
entry in the input array */
- pcre *re = NULL; /* Compiled
regular expression */
- pcre_extra *extra = NULL; /* Holds results of
studying */
+ char *regex; /* Regular
expression */
+ int regex_len;
+ zval *input; /* Input array
*/
+ long flags = 0; /* Match
control flags */
+ pcre_cache_entry *pce; /* Compiled regular
expression */
+
+ /* Get arguments and do error checking */
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex,
®ex_len,
+ &input, &flags) ==
FAILURE) {
+ return;
+ }
+
+ /* Compile regex or get it from cache. */
+ if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC))
== NULL) {
+ RETURN_FALSE;
+ }
+
+ php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
+}
+
+PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval
*return_value,
+ long flags TSRMLS_DC)
+{
+ zval **entry; /* An entry in
the input array */
+ pcre_extra *extra = pce->extra;/* Holds results of
studying */
pcre_extra extra_data; /* Used locally for
exec options */
- int preg_options = 0; /* Custom preg
options */
int *offsets; /*
Array of subpattern offsets */
int size_offsets; /* Size of the
offsets array */
int count = 0; /*
Count of matched subpatterns */
zstr string_key;
ulong num_key;
- zend_bool invert = 0; /* Whether to return
non-matching
+ zend_bool invert; /* Whether to
return non-matching
entries */
int rc;
- /* Get arguments and do error checking */
-
- if (ZEND_NUM_ARGS() < 2 || ZEND_NUM_ARGS() > 3 ||
- zend_get_parameters_ex(ZEND_NUM_ARGS(), ®ex, &input, &flags)
== FAILURE) {
- WRONG_PARAM_COUNT;
- }
-
- if (Z_TYPE_PP(input) != IS_ARRAY) {
- php_error_docref(NULL TSRMLS_CC,E_WARNING, "Second argument to
preg_grep() should be an array");
- return;
- }
-
- SEPARATE_ZVAL(input);
+ invert = flags & PREG_GREP_INVERT ? 1 : 0;
- /* Make sure regex is a string */
- convert_to_string_ex(regex);
-
- if (ZEND_NUM_ARGS() > 2) {
- convert_to_long_ex(flags);
- invert = (Z_LVAL_PP(flags) & PREG_GREP_INVERT) ? 1 : 0;
- }
-
- /* Compile regex or get it from cache. */
- if ((re = pcre_get_compiled_regex(Z_STRVAL_PP(regex), &extra,
&preg_options TSRMLS_CC)) == NULL) {
- RETURN_FALSE;
- }
-
if (extra == NULL) {
extra_data.flags = PCRE_EXTRA_MATCH_LIMIT |
PCRE_EXTRA_MATCH_LIMIT_RECURSION;
extra = &extra_data;
@@ -1689,10 +1679,9 @@
extra->match_limit_recursion = PCRE_G(recursion_limit);
/* Calculate the size of the offsets array, and allocate memory for it.
*/
- rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
+ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT,
&size_offsets);
if (rc < 0) {
- php_error(E_WARNING, "%s: internal pcre_fullinfo() error %d",
- get_active_function_name(TSRMLS_C), rc);
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal
pcre_fullinfo() error %d", rc);
RETURN_FALSE;
}
size_offsets = (size_offsets + 1) * 3;
@@ -1704,13 +1693,13 @@
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
/* Go through the input array */
- zend_hash_internal_pointer_reset(Z_ARRVAL_PP(input));
- while(zend_hash_get_current_data(Z_ARRVAL_PP(input), (void **)&entry)
== SUCCESS) {
+ zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
+ while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) ==
SUCCESS) {
convert_to_string_ex(entry);
/* Perform the match */
- count = pcre_exec(re, extra, Z_STRVAL_PP(entry),
+ count = pcre_exec(pce->re, extra, Z_STRVAL_PP(entry),
Z_STRLEN_PP(entry), 0,
0, offsets, size_offsets);
@@ -1729,8 +1718,12 @@
(*entry)->refcount++;
/* Add to return array */
- switch (zend_hash_get_current_key(Z_ARRVAL_PP(input),
&string_key, &num_key, 0))
+ switch (zend_hash_get_current_key(Z_ARRVAL_P(input),
&string_key, &num_key, 0))
{
+ case HASH_KEY_IS_UNICODE:
+ add_u_assoc_zval(return_value,
IS_UNICODE, string_key, *entry);
+ break;
+
case HASH_KEY_IS_STRING:
zend_hash_update(Z_ARRVAL_P(return_value), string_key.s,
strlen(string_key.s)+1, entry, sizeof(zval *), NULL);
@@ -1743,7 +1736,7 @@
}
}
- zend_hash_move_forward(Z_ARRVAL_PP(input));
+ zend_hash_move_forward(Z_ARRVAL_P(input));
}
/* Clean up */
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.h?r1=1.48&r2=1.49&diff_format=u
Index: php-src/ext/pcre/php_pcre.h
diff -u php-src/ext/pcre/php_pcre.h:1.48 php-src/ext/pcre/php_pcre.h:1.49
--- php-src/ext/pcre/php_pcre.h:1.48 Wed Jul 19 20:30:52 2006
+++ php-src/ext/pcre/php_pcre.h Thu Jul 20 21:19:05 2006
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_pcre.h,v 1.48 2006/07/19 20:30:52 helly Exp $ */
+/* $Id: php_pcre.h,v 1.49 2006/07/20 21:19:05 helly Exp $ */
#ifndef PHP_PCRE_H
#define PHP_PCRE_H
@@ -44,10 +44,6 @@
PHPAPI char *php_pcre_replace(char *regex, int regex_len, char *subject, int
subject_len, zval *replace_val, int is_callable_replace, int *result_len, int
limit, int *replace_count TSRMLS_DC);
PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int
*options TSRMLS_DC);
PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int
*preg_options, int *coptions TSRMLS_DC);
-PHPAPI void php_pcre_split(pcre *re, pcre_extra *extra, char *subject, int
subject_len, zval *return_value,
- int coptions, int limit_val, int no_empty, int delim_capture, int
offset_capture TSRMLS_DC);
-PHPAPI void php_pcre_match(pcre *re, pcre_extra *extra, char *subject, int
subject_len, zval *return_value,
- zval *subpats, int global, int preg_options, long start_offset, int
subpats_order, int offset_capture TSRMLS_DC);
extern zend_module_entry pcre_module_entry;
#define pcre_module_ptr &pcre_module_entry
@@ -64,7 +60,19 @@
int refcount;
} pcre_cache_entry;
-PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int
regex_len, pcre_extra **extra, int *preg_options, int *compile_options
TSRMLS_DC);
+PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int
regex_len TSRMLS_DC);
+
+PHPAPI void php_pcre_match_impl( pcre_cache_entry *pce, char *subject, int
subject_len, zval *return_value,
+ zval *subpats, int global, int use_flags, long flags, long start_offset
TSRMLS_DC);
+
+PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int
subject_len, zval *return_value,
+ int is_callable_replace, int *result_len, int limit, int *replace_count
TSRMLS_DC);
+
+PHPAPI void php_pcre_split_impl( pcre_cache_entry *pce, char *subject, int
subject_len, zval *return_value,
+ long limit_val, long flags TSRMLS_DC);
+
+PHPAPI void php_pcre_grep_impl( pcre_cache_entry *pce, zval *input, zval
*return_value,
+ long flags TSRMLS_DC);
ZEND_BEGIN_MODULE_GLOBALS(pcre)
HashTable pcre_cache;
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/grep2.phpt?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/pcre/tests/grep2.phpt
diff -u php-src/ext/pcre/tests/grep2.phpt:1.2
php-src/ext/pcre/tests/grep2.phpt:1.3
--- php-src/ext/pcre/tests/grep2.phpt:1.2 Sun Jul 16 19:36:23 2006
+++ php-src/ext/pcre/tests/grep2.phpt Thu Jul 20 21:19:05 2006
@@ -19,10 +19,10 @@
?>
--EXPECTF--
-Warning: Wrong parameter count for preg_grep() in %sgrep2.php on line 3
+Warning: preg_grep() expects at most 3 parameters, 4 given in %sgrep2.php on
line 3
NULL
-Warning: preg_grep(): Second argument to preg_grep() should be an array in
%sgrep2.php on line 4
+Warning: preg_grep() expects parameter 2 to be array, integer given in
%sgrep2.php on line 4
NULL
Warning: preg_grep(): Compilation failed: nothing to repeat at offset 0 in
%sgrep2.php on line 5
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/match_flags3.phpt?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/pcre/tests/match_flags3.phpt
diff -u php-src/ext/pcre/tests/match_flags3.phpt:1.2
php-src/ext/pcre/tests/match_flags3.phpt:1.3
--- php-src/ext/pcre/tests/match_flags3.phpt:1.2 Mon Jul 3 14:55:49 2006
+++ php-src/ext/pcre/tests/match_flags3.phpt Thu Jul 20 21:19:05 2006
@@ -18,8 +18,9 @@
?>
--EXPECTF--
-Warning: Wrong value for parameter 4 in call to preg_match() in
%smatch_flags3.php on line 3
-NULL
+
+Warning: preg_match(): Empty regular expression in %smatch_flags3.php on line 3
+bool(false)
int(1)
array(1) {
[0]=>
@@ -41,5 +42,5 @@
}
}
-Warning: preg_match: numeric named subpatterns are not allowed in
%smatch_flags3.php on line 14
+Warning: preg_match(): Numeric named subpatterns are not allowed in
%smatch_flags3.php on line 14
bool(false)
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/preg_replace.phpt?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/pcre/tests/preg_replace.phpt
diff -u php-src/ext/pcre/tests/preg_replace.phpt:1.2
php-src/ext/pcre/tests/preg_replace.phpt:1.3
--- php-src/ext/pcre/tests/preg_replace.phpt:1.2 Mon Jul 3 14:55:49 2006
+++ php-src/ext/pcre/tests/preg_replace.phpt Thu Jul 20 21:19:05 2006
@@ -18,18 +18,18 @@
string(4) "abcd"
string(8) "zaab2k3l"
-Warning: preg_replace_callback(): requires argument 2, '', to be a valid
callback in %spreg_replace.php on line 8
+Warning: preg_replace_callback(): Requires argument 2, '', to be a valid
callback in %spreg_replace.php on line 8
string(0) ""
-Warning: preg_replace_callback(): /e modifier cannot be used with replacement
callback in %spreg_replace.php on line 10
+Warning: preg_replace_callback(): Modifier /e cannot be used with replacement
callback in %spreg_replace.php on line 10
NULL
---EXPECTUF--
+--UEXPECTF--
string(1) "x"
string(4) "abcd"
string(8) "zaab2k3l"
-Warning: preg_replace_callback(): requires argument 2, '', to be a valid
callback in %spreg_replace.php on line 8
+Warning: preg_replace_callback(): Requires argument 2, '', to be a valid
callback in %spreg_replace.php on line 8
unicode(0) ""
-Warning: preg_replace_callback(): /e modifier cannot be used with replacement
callback in %spreg_replace.php on line 10
+Warning: preg_replace_callback(): Modifier /e cannot be used with replacement
callback in %spreg_replace.php on line 10
NULL
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/preg_replace_callback2.phpt?r1=1.3&r2=1.4&diff_format=u
Index: php-src/ext/pcre/tests/preg_replace_callback2.phpt
diff -u php-src/ext/pcre/tests/preg_replace_callback2.phpt:1.3
php-src/ext/pcre/tests/preg_replace_callback2.phpt:1.4
--- php-src/ext/pcre/tests/preg_replace_callback2.phpt:1.3 Mon Jul 3
16:03:39 2006
+++ php-src/ext/pcre/tests/preg_replace_callback2.phpt Thu Jul 20 21:19:05 2006
@@ -38,3 +38,20 @@
string(7) "'A'rray"
}
string(3) "aBC"
+--UEXPECTF--
+Warning: preg_replace_callback(): Unable to call custom replacement function
in %spreg_replace_callback2.php on line %d
+array(3) {
+ [0]=>
+ string(12) "'a' 'b3' bcd"
+ [u"v"]=>
+ string(6) "aksfjk"
+ [12]=>
+ string(9) "'aa' 'bb'"
+}
+
+Notice: Array to string conversion in %spreg_replace_callback2.php on line 17
+array(1) {
+ [0]=>
+ string(7) "'A'rray"
+}
+string(3) "aBC"
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/split.phpt?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/pcre/tests/split.phpt
diff -u php-src/ext/pcre/tests/split.phpt:1.2
php-src/ext/pcre/tests/split.phpt:1.3
--- php-src/ext/pcre/tests/split.phpt:1.2 Sun Jul 16 19:36:23 2006
+++ php-src/ext/pcre/tests/split.phpt Thu Jul 20 21:19:05 2006
@@ -17,8 +17,8 @@
?>
--EXPECTF--
-Warning: Wrong parameter count for preg_split() in %ssplit.php on line 3
-NULL
+Warning: preg_split() expects at least 2 parameters, 0 given in %ssplit.php on
line 3
+bool(false)
Warning: preg_split(): Compilation failed: nothing to repeat at offset 0 in
%ssplit.php on line 4
bool(false)
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php