Hi Andrei, all:
In a previous patch
(http://news.php.net/article.php?group=php.dev&article=84281), support
was added to preg_split for capturing offsets along with matches. The
attached patch adds similar support to preg_match and preg_match_all via
a new PREG_MATCH_OFFSET_CAPTURE flag.
The code handles capturing offsets for both subpattern matches and whole
pattern matches, using the previously-added add_offset_pair helper function.
The flag is a new fourth (and optional) parameter for preg_match, and
are or'd into the existing 'order' parameter for preg_match_all, above
PREG_SET_ORDER and PREG_PATTERN_ORDER.
The patch below is diffed against the CVS head - humbly sumbitted for
application, rejection, suggestions, or extensive flaming. :)
Thanks in advance,
- Dave
[EMAIL PROTECTED]
--- ext/pcre/php_pcre.c.orig Tue Jun 4 13:02:50 2002
+++ ext/pcre/php_pcre.c Tue Jun 4 13:12:10 2002
@@ -35,7 +35,9 @@
#define PREG_PATTERN_ORDER 0
#define PREG_SET_ORDER 1
-#define PREG_SPLIT_NO_EMPTY (1<<0)
+#define PREG_MATCH_OFFSET_CAPTURE (1<<2)
+
+#define PREG_SPLIT_NO_EMPTY (1<<0)
#define PREG_SPLIT_DELIM_CAPTURE (1<<1)
#define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
@@ -99,6 +101,7 @@
REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS |
CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS |
CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PREG_MATCH_OFFSET_CAPTURE", PREG_MATCH_OFFSET_CAPTURE,
+CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS |
CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE,
CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE,
CONST_CS | CONST_PERSISTENT);
@@ -310,6 +313,24 @@
}
/* }}} */
+/* {{{ add_offset_pair
+ */
+static inline void add_offset_pair(zval *result, char *str, int len, int offset)
+{
+ zval *match_pair;
+
+ ALLOC_ZVAL(match_pair);
+ array_init(match_pair);
+ INIT_PZVAL(match_pair);
+
+ /* Add (match, offset) to the return value */
+ add_next_index_stringl(match_pair, str, len, 1);
+ add_next_index_long(match_pair, offset);
+
+ zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *),
+NULL);
+}
+/* }}} */
+
/* {{{ php_pcre_match
*/
static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
@@ -335,6 +356,7 @@
int matched; /* Has
anything matched */
int i;
int subpats_order_val = 0; /* Integer value of
subpats_order */
+ int offset_capture = 0; /* If offsets should
+be captured */
int g_notempty = 0; /* If the match should
not be empty */
const char **stringlist; /* Used to hold list of
subpatterns */
char *match; /* The current match */
@@ -363,11 +385,17 @@
/* Make sure subpats_order is a number */
convert_to_long_ex(subpats_order);
- subpats_order_val = Z_LVAL_PP(subpats_order);
- if (subpats_order_val < PREG_PATTERN_ORDER ||
- subpats_order_val > PREG_SET_ORDER) {
- zend_error(E_WARNING, "Wrong value for parameter 4 in
call to preg_match_all()");
- }
+ offset_capture = (Z_LVAL_PP(subpats_order) & PREG_MATCH_OFFSET_CAPTURE);
+
+ if (global) {
+ subpats_order_val = (Z_LVAL_PP(subpats_order) & 1UL);
+
+ if ((subpats_order_val < PREG_PATTERN_ORDER) ||
+ (subpats_order_val > PREG_SET_ORDER)) {
+ zend_error(E_WARNING, "Wrong value for parameter 4
+in call to preg_match_all()");
+ }
+ }
+
break;
default:
@@ -442,8 +470,13 @@
if (subpats_order_val == PREG_PATTERN_ORDER) {
/* For each subpattern, insert it into
the appropriate array. */
for (i = 0; i < count; i++) {
-
add_next_index_stringl(match_sets[i], (char *)stringlist[i],
-
offsets[(i<<1)+1] - offsets[i<<1], 1);
+ if (offset_capture) {
+
+add_offset_pair(match_sets[i], (char *)stringlist[i],
+
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]);
+ } else {
+
+add_next_index_stringl(match_sets[i], (char *)stringlist[i],
+
+ offsets[(i<<1)+1] - offsets[i<<1], 1);
+ }
}
/*
* If the number of captured
subpatterns on this run is
@@ -463,8 +496,13 @@
/* Add all the subpatterns to it */
for (i = 0; i < count; i++) {
-
add_next_index_stringl(result_set, (char *)stringlist[i],
-
offsets[(i<<1)+1] - offsets[i<<1], 1);
+ if (offset_capture) {
+
+add_offset_pair(result_set, (char *)stringlist[i],
+
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]);
+ } else {
+
+add_next_index_stringl(result_set, (char *)stringlist[i],
+
+ offsets[(i<<1)+1] - offsets[i<<1], 1);
+ }
}
/* And add it to the output array */
zend_hash_next_index_insert(Z_ARRVAL_PP(subpats), &result_set,
@@ -474,8 +512,13 @@
else { /* single pattern matching */
/* For each subpattern, insert it into the
subpatterns array. */
for (i = 0; i < count; i++) {
- add_next_index_stringl((*subpats),
(char *)stringlist[i],
-
offsets[(i<<1)+1] - offsets[i<<1], 1);
+ if (offset_capture) {
+ add_offset_pair((*subpats),
+(char *)stringlist[i],
+
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1]);
+ } else {
+
+add_next_index_stringl((*subpats), (char *)stringlist[i],
+
+ offsets[(i<<1)+1] - offsets[i<<1], 1);
+ }
}
}
@@ -518,7 +561,7 @@
}
/* }}} */
-/* {{{ proto int preg_match(string pattern, string subject [, array subpatterns])
+/* {{{ proto int preg_match(string pattern, string subject [, array matches [, int
+flags]])
Perform a Perl-style regular expression match */
PHP_FUNCTION(preg_match)
{
@@ -1063,21 +1106,6 @@
preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */
-
-static inline void add_offset_pair(zval *result, char *str, int len, int offset)
-{
- zval *match_pair;
-
- ALLOC_ZVAL(match_pair);
- array_init(match_pair);
- INIT_PZVAL(match_pair);
-
- /* Add (match, offset) to the return value */
- add_next_index_stringl(match_pair, str, len, 1);
- add_next_index_long(match_pair, offset);
-
- zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *),
NULL);
-}
/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int
flags]])
Split string into an array using a perl-style regular expression as a delimiter */
--
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php