dmitry Fri Jul 6 23:06:51 2007 UTC
Modified files:
/php-src/ext/pcre php_pcre.c php_pcre.h
/php-src/ext/pcre/tests bug27103.phpt bug40909.phpt
invalid_utf8.phpt
/php-src/ext/spl spl_iterators.c
/php-src/win32 sendmail.c
Log:
PCRE unicode/binary support
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.c?r1=1.213&r2=1.214&diff_format=u
Index: php-src/ext/pcre/php_pcre.c
diff -u php-src/ext/pcre/php_pcre.c:1.213 php-src/ext/pcre/php_pcre.c:1.214
--- php-src/ext/pcre/php_pcre.c:1.213 Tue Jun 12 14:07:42 2007
+++ php-src/ext/pcre/php_pcre.c Fri Jul 6 23:06:51 2007
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_pcre.c,v 1.213 2007/06/12 14:07:42 scottmac Exp $ */
+/* $Id: php_pcre.c,v 1.214 2007/07/06 23:06:51 dmitry Exp $ */
/* TODO
* php_pcre_replace_impl():
@@ -191,7 +191,7 @@
/* {{{ pcre_get_compiled_regex_cache
*/
-PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int
regex_len TSRMLS_DC)
+PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_uchar utype, char
*regex, int regex_len TSRMLS_DC)
{
pcre *re = NULL;
pcre_extra *extra;
@@ -333,7 +333,7 @@
}
}
- if (UG(unicode)) {
+ if (utype == IS_UNICODE) {
coptions |= PCRE_UTF8;
}
@@ -405,7 +405,7 @@
*/
PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int
*preg_options TSRMLS_DC)
{
- pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex,
strlen(regex) TSRMLS_CC);
+ pcre_cache_entry * pce = pcre_get_compiled_regex_cache(ZEND_STR_TYPE,
regex, strlen(regex) TSRMLS_CC);
if (extra) {
*extra = pce ? pce->extra : NULL;
@@ -422,7 +422,7 @@
*/
PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int
*preg_options, int *compile_options TSRMLS_DC)
{
- pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex,
strlen(regex) TSRMLS_CC);
+ pcre_cache_entry * pce = pcre_get_compiled_regex_cache(ZEND_STR_TYPE,
regex, strlen(regex) TSRMLS_CC);
if (extra) {
*extra = pce ? pce->extra : NULL;
@@ -439,7 +439,7 @@
/* }}} */
/* {{{ add_offset_pair */
-static inline void add_offset_pair(zval *result, char *str, int len, int
offset, char *name, offset_map_t *prev TSRMLS_DC)
+static inline void add_offset_pair(zval *result, zend_uchar utype, char *str,
int len, int offset, char *name, offset_map_t *prev TSRMLS_DC)
{
zval *match_pair;
int tmp;
@@ -449,16 +449,25 @@
INIT_PZVAL(match_pair);
/* Add (match, offset) to the return value */
- add_next_index_utf8_stringl(match_pair, str, len, 1);
+ if (utype == IS_UNICODE) {
+ add_next_index_utf8_stringl(match_pair, str, len, 1);
+ } else {
+ add_next_index_stringl(match_pair, str, len, 1);
+ }
/* Calculate codepoint offset from the previous chunk */
if (offset) {
- tmp = prev->byte_offset;
- while (tmp < offset) {
- U8_FWD_1(prev->str, tmp, offset);
- prev->cp_offset++;
- }
- prev->byte_offset = tmp;
+ if (utype == IS_UNICODE) {
+ tmp = prev->byte_offset;
+ while (tmp < offset) {
+ U8_FWD_1(prev->str, tmp, offset);
+ prev->cp_offset++;
+ }
+ prev->byte_offset = tmp;
+ } else {
+ prev->cp_offset = offset;
+ prev->byte_offset = offset;
+ }
}
add_next_index_long(match_pair, prev->cp_offset);
@@ -511,7 +520,7 @@
}
/* Compile regex or get it from cache. */
- if ((pce = pcre_get_compiled_regex_cache(regex.s, regex_len TSRMLS_CC))
== NULL) {
+ if ((pce = pcre_get_compiled_regex_cache(str_type, regex.s, regex_len
TSRMLS_CC)) == NULL) {
if (str_type == IS_UNICODE) {
efree(regex_utf8);
efree(subject_utf8);
@@ -519,7 +528,7 @@
RETURN_FALSE;
}
- php_pcre_match_impl(pce, subject.s, subject_len, return_value, subpats,
+ php_pcre_match_impl(pce, str_type, subject.s, subject_len,
return_value, subpats,
global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
if (str_type == IS_UNICODE) {
@@ -530,7 +539,7 @@
/* }}} */
/* {{{ php_pcre_match_impl */
-PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int
subject_len, zval *return_value,
+PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_uchar utype, char
*subject, int subject_len, zval *return_value,
zval *subpats, int global, int use_flags, long flags, long start_offset
TSRMLS_DC)
{
zval *result_set, /* Holds a set of
subpatterns after
@@ -580,7 +589,7 @@
offset_capture = 0;
}
- if (UG(unicode)) {
+ if (utype == IS_UNICODE) {
int k = 0;
/* Calculate byte offset from codepoint offset */
if (start_offset < 0) {
@@ -590,7 +599,6 @@
U8_FWD_N(subject, k, subject_len, start_offset);
}
start_offset = k;
- exoptions |= PCRE_NO_UTF8_CHECK;
} else {
/* Negative offset counts from the end of the string. */
if (start_offset < 0) {
@@ -599,6 +607,9 @@
start_offset = 0;
}
}
+ if (!(pce->compile_options & PCRE_UTF8)) {
+ exoptions |= PCRE_NO_UTF8_CHECK;
+ }
}
if (extra == NULL) {
@@ -712,11 +723,14 @@
/* For each subpattern, insert
it into the appropriate array. */
for (i = 0; i < count; i++) {
if (offset_capture) {
-
add_offset_pair(match_sets[i], (char *)stringlist[i],
+
add_offset_pair(match_sets[i], utype, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, &map
TSRMLS_CC);
- } else {
+ } else if (utype ==
IS_UNICODE) {
add_next_index_utf8_stringl(match_sets[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], 1);
+ } else {
+
add_next_index_stringl(match_sets[i], (char *)stringlist[i],
+
offsets[(i<<1)+1] - offsets[i<<1], 1);
}
}
/*
@@ -744,16 +758,23 @@
/* Add all the subpatterns to
it */
for (i = 0; i < count; i++) {
if (offset_capture) {
-
add_offset_pair(result_set, (char *)stringlist[i],
+
add_offset_pair(result_set, utype, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1],
subpat_names[i], &map TSRMLS_CC);
- } else {
+ } else if (utype ==
IS_UNICODE) {
if
(subpat_names[i]) {
-
add_assoc_utf8_stringl(result_set, subpat_names[i], (char *)stringlist[i],
+
add_utf8_assoc_utf8_stringl(result_set, subpat_names[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], 1);
}
add_next_index_utf8_stringl(result_set, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], 1);
+ } else {
+ if
(subpat_names[i]) {
+
add_rt_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
+
offsets[(i<<1)+1] - offsets[i<<1], 1);
+ }
+
add_next_index_stringl(result_set, (char *)stringlist[i],
+
offsets[(i<<1)+1] - offsets[i<<1], 1);
}
}
/* And add it to the output
array */
@@ -763,16 +784,23 @@
/* For each subpattern, insert it into
the subpatterns array. */
for (i = 0; i < count; i++) {
if (offset_capture) {
-
add_offset_pair(subpats, (char *)stringlist[i],
+
add_offset_pair(subpats, utype, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1],
offsets[i<<1], subpat_names[i], &map TSRMLS_CC);
- } else {
+ } else if (utype == IS_UNICODE)
{
if (subpat_names[i]) {
-
add_assoc_utf8_stringl(subpats, subpat_names[i], (char *)stringlist[i],
+
add_utf8_assoc_utf8_stringl(subpats, subpat_names[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], 1);
}
add_next_index_utf8_stringl(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], 1);
+ } else {
+ if (subpat_names[i]) {
+
add_rt_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
+
offsets[(i<<1)+1] - offsets[i<<1], 1);
+ }
+
add_next_index_stringl(subpats, (char *)stringlist[i],
+
offsets[(i<<1)+1] - offsets[i<<1], 1);
}
}
}
@@ -786,7 +814,7 @@
to achieve this, unless we're already at the end of
the string. */
if (g_notempty != 0 && start_offset < subject_len) {
offsets[0] = start_offset;
- if (UG(unicode) || pce->compile_options &
PCRE_UTF8) {
+ if (utype == IS_UNICODE || pce->compile_options
& PCRE_UTF8) {
offsets[1] = start_offset;
U8_FWD_1(subject, offsets[1],
subject_len);
} else {
@@ -1026,7 +1054,8 @@
/* {{{ php_pcre_replace
*/
-PHPAPI char *php_pcre_replace(char *regex, int regex_len,
+PHPAPI char *php_pcre_replace(zend_uchar utype,
+ char *regex, int
regex_len,
char *subject, int
subject_len,
zval *replace_val,
int is_callable_replace,
int *result_len, int
limit, int *replace_count TSRMLS_DC)
@@ -1034,17 +1063,17 @@
pcre_cache_entry *pce; /* Compiled regular
expression */
/* Compile regex or get it from cache. */
- if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC))
== NULL) {
+ if ((pce = pcre_get_compiled_regex_cache(utype, regex, regex_len
TSRMLS_CC)) == NULL) {
return NULL;
}
- return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
+ return php_pcre_replace_impl(pce, utype, subject, subject_len,
replace_val,
is_callable_replace, result_len, limit, replace_count
TSRMLS_CC);
}
/* }}} */
/* {{{ php_pcre_replace_impl() */
-PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int
subject_len, zval *replace_val,
+PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, zend_uchar utype,
char *subject, int subject_len, zval *replace_val,
int is_callable_replace, int *result_len, int limit, int *replace_count
TSRMLS_DC)
{
pcre_extra *extra = pce->extra;/* Holds results of
studying */
@@ -1112,7 +1141,7 @@
start_offset = 0;
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- if (UG(unicode)) {
+ if (utype != IS_UNICODE && !(pce->compile_options & PCRE_UTF8)) {
exoptions |= PCRE_NO_UTF8_CHECK;
}
@@ -1227,7 +1256,7 @@
to achieve this, unless we're already at the end of
the string. */
if (g_notempty != 0 && start_offset < subject_len) {
offsets[0] = start_offset;
- if (UG(unicode) || pce->compile_options &
PCRE_UTF8) {
+ if (utype == IS_UNICODE || pce->compile_options
& PCRE_UTF8) {
offsets[1] = start_offset;
U8_FWD_1(subject, offsets[1],
subject_len);
} else {
@@ -1284,8 +1313,10 @@
char *subject_value,
*result;
int subject_len;
+ zend_uchar utype;
/* Make sure we're dealing with strings. */
+ utype = Z_TYPE_PP(subject);
convert_to_string_with_converter_ex(subject, UG(utf8_conv));
ZVAL_STRINGL(&empty_replace, "", 0, 0);
@@ -1325,7 +1356,8 @@
/* Do the actual replacement and put the result back
into subject_value
for further replacements. */
- if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
+ if ((result = php_pcre_replace(utype,
+
Z_STRVAL_PP(regex_entry),
Z_STRLEN_PP(regex_entry),
subject_value,
subject_len,
@@ -1344,7 +1376,8 @@
return subject_value;
} else {
- result = php_pcre_replace(Z_STRVAL_P(regex),
+ result = php_pcre_replace(utype,
+
Z_STRVAL_P(regex),
Z_STRLEN_P(regex),
Z_STRVAL_PP(subject),
Z_STRLEN_PP(subject),
@@ -1375,7 +1408,8 @@
ulong num_key;
zval callback_name;
int replace_count=0;
- int *replace_count_ptr=NULL;
+ int *replace_count_ptr=NULL;
+ zend_uchar utype;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z/z/z/|lz",
®ex,
&replace, &subject,
&limit, &zcount) == FAILURE) {
@@ -1419,21 +1453,34 @@
and add the result to the return_value array. */
while (zend_hash_get_current_data(Z_ARRVAL_P(subject), (void
**)&subject_entry) == SUCCESS) {
SEPARATE_ZVAL(subject_entry);
+ utype = Z_TYPE_PP(subject_entry);
if ((result = php_replace_in_subject(regex, replace,
subject_entry, &result_len, limit, is_callable_replace, replace_count_ptr
TSRMLS_CC)) != NULL) {
/* Add to return array */
switch
(zend_hash_get_current_key_ex(Z_ARRVAL_P(subject), &string_key,
&string_key_len, &num_key, 0, NULL))
{
case HASH_KEY_IS_UNICODE:
-
add_u_assoc_utf8_stringl_ex(return_value, IS_UNICODE, string_key,
string_key_len, result, result_len, ZSTR_AUTOFREE);
+ if (utype == IS_UNICODE ||
(UG(unicode) && utype != IS_STRING)) {
+
add_u_assoc_utf8_stringl_ex(return_value, IS_UNICODE, string_key,
string_key_len, result, result_len, ZSTR_AUTOFREE);
+ } else {
+
add_u_assoc_stringl_ex(return_value, IS_UNICODE, string_key, string_key_len,
result, result_len, 0);
+ }
break;
case HASH_KEY_IS_STRING:
-
add_u_assoc_utf8_stringl_ex(return_value, IS_STRING, string_key,
string_key_len, result, result_len, ZSTR_AUTOFREE);
+ if (utype == IS_UNICODE ||
(UG(unicode) && utype != IS_STRING)) {
+
add_u_assoc_utf8_stringl_ex(return_value, IS_STRING, string_key,
string_key_len, result, result_len, ZSTR_AUTOFREE);
+ } else {
+
add_u_assoc_stringl_ex(return_value, IS_STRING, string_key, string_key_len,
result, result_len, 0);
+ }
break;
case HASH_KEY_IS_LONG:
-
add_index_utf8_stringl(return_value, num_key, result, result_len,
ZSTR_AUTOFREE);
+ if (utype == IS_UNICODE ||
(UG(unicode) && utype != IS_STRING)) {
+
add_index_utf8_stringl(return_value, num_key, result, result_len,
ZSTR_AUTOFREE);
+ } else {
+
add_index_stringl(return_value, num_key, result, result_len, 0);
+ }
break;
}
}
@@ -1441,8 +1488,13 @@
zend_hash_move_forward(Z_ARRVAL_P(subject));
}
} else { /* if subject is not an array */
+ utype = Z_TYPE_P(subject);
if ((result = php_replace_in_subject(regex, replace, &subject,
&result_len, limit, is_callable_replace, replace_count_ptr TSRMLS_CC)) != NULL)
{
- RETVAL_UTF8_STRINGL(result, result_len, ZSTR_AUTOFREE);
+ if (utype == IS_UNICODE || (UG(unicode) && utype !=
IS_STRING)) {
+ RETVAL_UTF8_STRINGL(result, result_len,
ZSTR_AUTOFREE);
+ } else {
+ RETVAL_STRINGL(result, result_len, 0);
+ }
}
}
if (replace_count_ptr) {
@@ -1501,7 +1553,7 @@
}
/* Compile regex or get it from cache. */
- if ((pce = pcre_get_compiled_regex_cache(regex.s, regex_len TSRMLS_CC))
== NULL) {
+ if ((pce = pcre_get_compiled_regex_cache(str_type, regex.s, regex_len
TSRMLS_CC)) == NULL) {
if (str_type == IS_UNICODE) {
efree(regex_utf8);
efree(subject_utf8);
@@ -1509,7 +1561,7 @@
RETURN_FALSE;
}
- php_pcre_split_impl(pce, subject.s, subject_len, return_value,
limit_val, flags TSRMLS_CC);
+ php_pcre_split_impl(pce, str_type, subject.s, subject_len,
return_value, limit_val, flags TSRMLS_CC);
if (str_type == IS_UNICODE) {
efree(regex_utf8);
@@ -1520,7 +1572,7 @@
/* {{{ php_pcre_split_impl
*/
-PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int
subject_len, zval *return_value,
+PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_uchar utype, char
*subject, int subject_len, zval *return_value,
long limit_val, long flags TSRMLS_DC)
{
pcre_extra *extra = NULL; /* Holds results of
studying */
@@ -1574,7 +1626,7 @@
match = NULL;
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- if (UG(unicode)) {
+ if (utype != IS_UNICODE && !(pce->compile_options & PCRE_UTF8)) {
exoptions |= PCRE_NO_UTF8_CHECK;
}
@@ -1598,11 +1650,15 @@
if (offset_capture) {
/* Add (match, offset) pair to the
return value */
- add_offset_pair(return_value,
last_match, &subject[offsets[0]]-last_match, next_offset, NULL, &map TSRMLS_CC);
- } else {
+ add_offset_pair(return_value, utype,
last_match, &subject[offsets[0]]-last_match, next_offset, NULL, &map TSRMLS_CC);
+ } else if (utype == IS_UNICODE) {
/* Add the piece to the return value */
add_next_index_utf8_stringl(return_value, last_match,
&subject[offsets[0]]-last_match, 1);
+ } else {
+ /* Add the piece to the return value */
+ add_next_index_stringl(return_value,
last_match,
+
&subject[offsets[0]]-last_match, 1);
}
/* One less left to do */
@@ -1620,11 +1676,14 @@
/* If we have matched a delimiter */
if (!no_empty || match_len > 0) {
if (offset_capture) {
-
add_offset_pair(return_value, &subject[offsets[i<<1]], match_len,
+
add_offset_pair(return_value, utype, &subject[offsets[i<<1]], match_len,
offsets[i<<1], NULL, &map TSRMLS_CC);
- } else {
+ } else if (utype == IS_UNICODE)
{
add_next_index_utf8_stringl(return_value, &subject[offsets[i<<1]],
match_len, 1);
+ } else {
+
add_next_index_stringl(return_value, &subject[offsets[i<<1]],
+
match_len, 1);
}
}
}
@@ -1636,7 +1695,7 @@
to achieve this, unless we're already at the end of
the string. */
if (g_notempty != 0 && start_offset < subject_len) {
offsets[0] = start_offset;
- if (UG(unicode) || pce->compile_options &
PCRE_UTF8) {
+ if (utype == IS_UNICODE || pce->compile_options
& PCRE_UTF8) {
offsets[1] = start_offset;
U8_FWD_1(subject, offsets[1],
subject_len);
} else {
@@ -1664,11 +1723,14 @@
{
if (offset_capture) {
/* Add the last (match, offset) pair to the return
value */
- add_offset_pair(return_value, &subject[start_offset],
+ add_offset_pair(return_value, utype,
&subject[start_offset],
subject_len -
start_offset, start_offset, NULL, &map TSRMLS_CC);
- } else {
+ } else if (utype == IS_UNICODE) {
/* Add the last piece to the return value */
add_next_index_utf8_stringl(return_value, last_match,
subject + subject_len - last_match, 1);
+ } else {
+ /* Add the last piece to the return value */
+ add_next_index_stringl(return_value, last_match,
subject + subject_len - last_match, 1);
}
}
@@ -1786,24 +1848,41 @@
Searches array and returns entries which match regex */
PHP_FUNCTION(preg_grep)
{
- char *regex; /* Regular
expression */
+ zstr regex; /* Regular
expression */
int regex_len;
+ char* regex_utf8;
+ int regex_utf8_len;
+ zend_uchar regex_type;
zval *input; /* Input array
*/
long flags = 0; /* Match
control flags */
pcre_cache_entry *pce; /* Compiled regular
expression */
+ UErrorCode status = U_ZERO_ERROR;
/* Get arguments and do error checking */
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s&a|l", ®ex,
- ®ex_len,
UG(utf8_conv), &input, &flags) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ta|l", ®ex,
+ ®ex_len,
®ex_type, &input, &flags) == FAILURE) {
return;
}
+ if (regex_type == IS_UNICODE) {
+ zend_unicode_to_string_ex(UG(utf8_conv), ®ex_utf8,
®ex_utf8_len, regex.u, regex_len, &status);
+ regex.s = regex_utf8;
+ regex_len = regex_utf8_len;
+ }
+
/* Compile regex or get it from cache. */
- if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC))
== NULL) {
+ if ((pce = pcre_get_compiled_regex_cache(regex_type, regex.s, regex_len
TSRMLS_CC)) == NULL) {
+ if (regex_type == IS_UNICODE) {
+ efree(regex_utf8);
+ }
RETURN_FALSE;
}
php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
+
+ if (regex_type == IS_UNICODE) {
+ efree(regex_utf8);
+ }
}
/* }}} */
@@ -1849,10 +1928,6 @@
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
- if (UG(unicode)) {
- exoptions |= PCRE_NO_UTF8_CHECK;
- }
-
/* Go through the input array */
zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) ==
SUCCESS) {
@@ -1866,7 +1941,7 @@
/* Perform the match */
count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
Z_STRLEN(subject),
- 0, exoptions, offsets,
size_offsets);
+ 0, exoptions |
((Z_TYPE_PP(entry) != IS_UNICODE && !(pce->compile_options &
PCRE_UTF8))?PCRE_NO_UTF8_CHECK:0), offsets, size_offsets);
/* Check for too many substrings condition. */
if (count == 0) {
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.h?r1=1.52&r2=1.53&diff_format=u
Index: php-src/ext/pcre/php_pcre.h
diff -u php-src/ext/pcre/php_pcre.h:1.52 php-src/ext/pcre/php_pcre.h:1.53
--- php-src/ext/pcre/php_pcre.h:1.52 Mon Jan 1 09:29:27 2007
+++ php-src/ext/pcre/php_pcre.h Fri Jul 6 23:06:51 2007
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_pcre.h,v 1.52 2007/01/01 09:29:27 sebastian Exp $ */
+/* $Id: php_pcre.h,v 1.53 2007/07/06 23:06:51 dmitry Exp $ */
#ifndef PHP_PCRE_H
#define PHP_PCRE_H
@@ -41,7 +41,7 @@
PHP_FUNCTION(preg_quote);
PHP_FUNCTION(preg_grep);
-PHPAPI char *php_pcre_replace(char *regex, int regex_len, char *subject, int
subject_len, zval *replace_val, int is_callable_replace, int *result_len, int
limit, int *replace_count TSRMLS_DC);
+PHPAPI char *php_pcre_replace(zend_uchar utype, char *regex, int regex_len,
char *subject, int subject_len, zval *replace_val, int is_callable_replace, int
*result_len, int limit, int *replace_count TSRMLS_DC);
PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int
*options TSRMLS_DC);
PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int
*preg_options, int *coptions TSRMLS_DC);
@@ -61,15 +61,15 @@
zend_bool unicode_mode;
} pcre_cache_entry;
-PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int
regex_len TSRMLS_DC);
+PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_uchar utype, char
*regex, int regex_len TSRMLS_DC);
-PHPAPI void php_pcre_match_impl( pcre_cache_entry *pce, char *subject, int
subject_len, zval *return_value,
+PHPAPI void php_pcre_match_impl( pcre_cache_entry *pce, zend_uchar utype,
char *subject, int subject_len, zval *return_value,
zval *subpats, int global, int use_flags, long flags, long start_offset
TSRMLS_DC);
-PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int
subject_len, zval *return_value,
+PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, zend_uchar utype,
char *subject, int subject_len, zval *return_value,
int is_callable_replace, int *result_len, int limit, int *replace_count
TSRMLS_DC);
-PHPAPI void php_pcre_split_impl( pcre_cache_entry *pce, char *subject, int
subject_len, zval *return_value,
+PHPAPI void php_pcre_split_impl( pcre_cache_entry *pce, zend_uchar utype,
char *subject, int subject_len, zval *return_value,
long limit_val, long flags TSRMLS_DC);
PHPAPI void php_pcre_grep_impl( pcre_cache_entry *pce, zval *input, zval
*return_value,
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/bug27103.phpt?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/pcre/tests/bug27103.phpt
diff -u php-src/ext/pcre/tests/bug27103.phpt:1.2
php-src/ext/pcre/tests/bug27103.phpt:1.3
--- php-src/ext/pcre/tests/bug27103.phpt:1.2 Mon Feb 12 14:22:02 2007
+++ php-src/ext/pcre/tests/bug27103.phpt Fri Jul 6 23:06:51 2007
@@ -14,9 +14,9 @@
echo htmlentities($c, 0, "UTF-8"), ": ", strlen($c), "\n";
}
}
-$teststr = "\xe2\x82\xac hi there";
-iter(preg_split('//u', $teststr, -1, PREG_SPLIT_NO_EMPTY));
-preg_match_all('/./u', $teststr, $matches);
+$teststr = b"\xe2\x82\xac hi there";
+iter(preg_split(b'//u', $teststr, -1, PREG_SPLIT_NO_EMPTY));
+preg_match_all(b'/./u', $teststr, $matches);
iter($matches[0]);
?>
--EXPECT--
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/bug40909.phpt?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/pcre/tests/bug40909.phpt
diff -u php-src/ext/pcre/tests/bug40909.phpt:1.2
php-src/ext/pcre/tests/bug40909.phpt:1.3
--- php-src/ext/pcre/tests/bug40909.phpt:1.2 Tue Jun 19 17:33:02 2007
+++ php-src/ext/pcre/tests/bug40909.phpt Fri Jul 6 23:06:51 2007
@@ -36,3 +36,22 @@
string(19) ""simpleValueInside""
}
}
+--UEXPECT--
+int(1)
+array(3) {
+ [0]=>
+ array(1) {
+ [0]=>
+ unicode(33) " an_attribute="simpleValueInside""
+ }
+ [1]=>
+ array(1) {
+ [0]=>
+ unicode(12) "an_attribute"
+ }
+ [2]=>
+ array(1) {
+ [0]=>
+ unicode(19) ""simpleValueInside""
+ }
+}
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/tests/invalid_utf8.phpt?r1=1.2&r2=1.3&diff_format=u
Index: php-src/ext/pcre/tests/invalid_utf8.phpt
diff -u php-src/ext/pcre/tests/invalid_utf8.phpt:1.2
php-src/ext/pcre/tests/invalid_utf8.phpt:1.3
--- php-src/ext/pcre/tests/invalid_utf8.phpt:1.2 Mon Feb 12 14:22:02 2007
+++ php-src/ext/pcre/tests/invalid_utf8.phpt Fri Jul 6 23:06:51 2007
@@ -9,7 +9,7 @@
--FILE--
<?php
-$string = urldecode("search%e4");
+$string = urldecode(b"search%e4");
$result = preg_replace("#(&\#x*)([0-9A-F]+);*#iu","$1$2;",$string);
var_dump($result);
var_dump(preg_last_error());
http://cvs.php.net/viewvc.cgi/php-src/ext/spl/spl_iterators.c?r1=1.162&r2=1.163&diff_format=u
Index: php-src/ext/spl/spl_iterators.c
diff -u php-src/ext/spl/spl_iterators.c:1.162
php-src/ext/spl/spl_iterators.c:1.163
--- php-src/ext/spl/spl_iterators.c:1.162 Thu May 17 17:29:09 2007
+++ php-src/ext/spl/spl_iterators.c Fri Jul 6 23:06:51 2007
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: spl_iterators.c,v 1.162 2007/05/17 17:29:09 tony2001 Exp $ */
+/* $Id: spl_iterators.c,v 1.163 2007/07/06 23:06:51 dmitry Exp $ */
#ifdef HAVE_CONFIG_H
# include "config.h"
@@ -1024,7 +1024,7 @@
}
intern->u.regex.mode = mode;
intern->u.regex.regex = estrndup(regex, regex_len);
- intern->u.regex.pce =
pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC);
+ intern->u.regex.pce =
pcre_get_compiled_regex_cache(ZEND_STR_TYPE, regex, regex_len TSRMLS_CC);
if (intern->u.regex.pce == NULL) {
/* pcre_get_compiled_regex_cache has already
sent error */
php_set_error_handling(EH_NORMAL, NULL
TSRMLS_CC);
@@ -1417,7 +1417,7 @@
}
zval_ptr_dtor(&intern->current.data);
ALLOC_INIT_ZVAL(intern->current.data);
- php_pcre_match_impl(intern->u.regex.pce, subject, subject_len,
&zcount,
+ php_pcre_match_impl(intern->u.regex.pce, ZEND_STR_TYPE,
subject, subject_len, &zcount,
intern->current.data, intern->u.regex.mode ==
REGIT_MODE_ALL_MATCHES, intern->u.regex.use_flags, intern->u.regex.preg_flags,
0 TSRMLS_CC);
count =
zend_hash_num_elements(Z_ARRVAL_P(intern->current.data));
RETVAL_BOOL(count > 0);
@@ -1430,14 +1430,14 @@
}
zval_ptr_dtor(&intern->current.data);
ALLOC_INIT_ZVAL(intern->current.data);
- php_pcre_split_impl(intern->u.regex.pce, subject, subject_len,
intern->current.data, -1, intern->u.regex.preg_flags TSRMLS_CC);
+ php_pcre_split_impl(intern->u.regex.pce, ZEND_STR_TYPE,
subject, subject_len, intern->current.data, -1, intern->u.regex.preg_flags
TSRMLS_CC);
count =
zend_hash_num_elements(Z_ARRVAL_P(intern->current.data));
RETVAL_BOOL(count > 1);
break;
case REGIT_MODE_REPLACE:
replacement = zend_read_property(intern->std.ce, getThis(),
"replacement", sizeof("replacement")-1, 1 TSRMLS_CC);
- result = php_pcre_replace_impl(intern->u.regex.pce, subject,
subject_len, replacement, 0, &result_len, 0, NULL TSRMLS_CC);
+ result = php_pcre_replace_impl(intern->u.regex.pce,
ZEND_STR_TYPE, subject, subject_len, replacement, 0, &result_len, 0, NULL
TSRMLS_CC);
if (intern->u.regex.flags & REGIT_USE_KEY) {
if (intern->current.key_type != HASH_KEY_IS_LONG) {
http://cvs.php.net/viewvc.cgi/php-src/win32/sendmail.c?r1=1.69&r2=1.70&diff_format=u
Index: php-src/win32/sendmail.c
diff -u php-src/win32/sendmail.c:1.69 php-src/win32/sendmail.c:1.70
--- php-src/win32/sendmail.c:1.69 Sat Feb 24 16:25:55 2007
+++ php-src/win32/sendmail.c Fri Jul 6 23:06:51 2007
@@ -17,7 +17,7 @@
*
*/
-/* $Id: sendmail.c,v 1.69 2007/02/24 16:25:55 helly Exp $ */
+/* $Id: sendmail.c,v 1.70 2007/07/06 23:06:51 dmitry Exp $ */
#include "php.h" /*php specific */
#include <stdio.h>
@@ -165,7 +165,7 @@
MAKE_STD_ZVAL(replace);
ZVAL_STRING(replace, PHP_WIN32_MAIL_UNIFY_REPLACE, 0);
- result = php_pcre_replace(PHP_WIN32_MAIL_UNIFY_PATTERN,
sizeof(PHP_WIN32_MAIL_UNIFY_PATTERN)-1,
+ result = php_pcre_replace(IS_STRING, PHP_WIN32_MAIL_UNIFY_PATTERN,
sizeof(PHP_WIN32_MAIL_UNIFY_PATTERN)-1,
header,
strlen(header),
replace,
0,
@@ -179,7 +179,7 @@
ZVAL_STRING(replace, PHP_WIN32_MAIL_RMVDBL_REPLACE, 0);
- result2 = php_pcre_replace(PHP_WIN32_MAIL_RMVDBL_PATTERN,
sizeof(PHP_WIN32_MAIL_RMVDBL_PATTERN)-1,
+ result2 = php_pcre_replace(IS_STRING, PHP_WIN32_MAIL_RMVDBL_PATTERN,
sizeof(PHP_WIN32_MAIL_RMVDBL_PATTERN)-1,
result, result_len,
replace,
0,
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php