[PHP-DEV] [PATCH] preg_match(_all) support for capturing offsets
Hi Andrei, all: (This is a re-send of a previous message that received no replies; my apologies if you've already seen/considered it) In a previous patch (http://news.php.net/article.php?group=php.devarticle=84281), support was added to preg_split for capturing offsets along with matches. The attached patch adds similar support to preg_match and preg_match_all via a new PREG_MATCH_OFFSET_CAPTURE flag. The code handles capturing offsets for both subpattern matches and whole pattern matches, using the previously-added add_offset_pair helper function. The flag is a new fourth (and optional) parameter for preg_match, and are or'd into the existing 'order' parameter for preg_match_all, above PREG_SET_ORDER and PREG_PATTERN_ORDER. The patch below is diffed against the CVS head - humbly sumbitted for application, rejection, suggestions, or extensive flaming. :) Thanks in advance, - Dave [EMAIL PROTECTED] --- ext/pcre/php_pcre.c.origTue Jun 4 13:02:50 2002 +++ ext/pcre/php_pcre.c Tue Jun 4 13:12:10 2002 @@ -35,7 +35,9 @@ #define PREG_PATTERN_ORDER 0 #define PREG_SET_ORDER 1 -#definePREG_SPLIT_NO_EMPTY (10) +#define PREG_MATCH_OFFSET_CAPTURE (12) + +#define PREG_SPLIT_NO_EMPTY(10) #define PREG_SPLIT_DELIM_CAPTURE (11) #define PREG_SPLIT_OFFSET_CAPTURE (12) @@ -99,6 +101,7 @@ REGISTER_LONG_CONSTANT(PREG_PATTERN_ORDER, PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT(PREG_SET_ORDER, PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT(PREG_MATCH_OFFSET_CAPTURE, PREG_MATCH_OFFSET_CAPTURE, +CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT(PREG_SPLIT_NO_EMPTY, PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT(PREG_SPLIT_DELIM_CAPTURE, PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT(PREG_SPLIT_OFFSET_CAPTURE, PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); @@ -310,6 +313,24 @@ } /* }}} */ +/* {{{ add_offset_pair + */ +static inline void add_offset_pair(zval *result, char *str, int len, int offset) +{ + zval *match_pair; + + ALLOC_ZVAL(match_pair); + array_init(match_pair); + INIT_PZVAL(match_pair); + + /* Add (match, offset) to the return value */ + add_next_index_stringl(match_pair, str, len, 1); + add_next_index_long(match_pair, offset); + + zend_hash_next_index_insert(Z_ARRVAL_P(result), match_pair, sizeof(zval *), +NULL); +} +/* }}} */ + /* {{{ php_pcre_match */ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) @@ -335,6 +356,7 @@ int matched; /* Has anything matched */ int i; int subpats_order_val = 0; /* Integer value of subpats_order */ + int offset_capture = 0;/* If offsets should +be captured */ int g_notempty = 0;/* If the match should not be empty */ const char **stringlist;/* Used to hold list of subpatterns */ char*match; /* The current match */ @@ -363,11 +385,17 @@ /* Make sure subpats_order is a number */ convert_to_long_ex(subpats_order); - subpats_order_val = Z_LVAL_PP(subpats_order); - if (subpats_order_val PREG_PATTERN_ORDER || - subpats_order_val PREG_SET_ORDER) { - zend_error(E_WARNING, Wrong value for parameter 4 in call to preg_match_all()); - } +offset_capture = (Z_LVAL_PP(subpats_order) PREG_MATCH_OFFSET_CAPTURE); + + if (global) { + subpats_order_val = (Z_LVAL_PP(subpats_order) 1UL); + + if ((subpats_order_val PREG_PATTERN_ORDER) || + (subpats_order_val PREG_SET_ORDER)) { + zend_error(E_WARNING, Wrong value for parameter 4 +in call to preg_match_all()); + } +} + break; default: @@ -442,8 +470,13 @@ if (subpats_order_val == PREG_PATTERN_ORDER) { /* For each subpattern, insert it into the appropriate array. */ for (i = 0; i count; i++) { - add_next_index_stringl(match_sets[i], (char *)stringlist[i], -
[PHP-DEV] [PATCH] preg_match(_all) support for capturing offsets
Hi Andrei, all: In a previous patch (http://news.php.net/article.php?group=php.devarticle=84281), support was added to preg_split for capturing offsets along with matches. The attached patch adds similar support to preg_match and preg_match_all via a new PREG_MATCH_OFFSET_CAPTURE flag. The code handles capturing offsets for both subpattern matches and whole pattern matches, using the previously-added add_offset_pair helper function. The flag is a new fourth (and optional) parameter for preg_match, and are or'd into the existing 'order' parameter for preg_match_all, above PREG_SET_ORDER and PREG_PATTERN_ORDER. The patch below is diffed against the CVS head - humbly sumbitted for application, rejection, suggestions, or extensive flaming. :) Thanks in advance, - Dave [EMAIL PROTECTED] --- ext/pcre/php_pcre.c.origTue Jun 4 13:02:50 2002 +++ ext/pcre/php_pcre.c Tue Jun 4 13:12:10 2002 @@ -35,7 +35,9 @@ #define PREG_PATTERN_ORDER 0 #define PREG_SET_ORDER 1 -#definePREG_SPLIT_NO_EMPTY (10) +#define PREG_MATCH_OFFSET_CAPTURE (12) + +#define PREG_SPLIT_NO_EMPTY(10) #define PREG_SPLIT_DELIM_CAPTURE (11) #define PREG_SPLIT_OFFSET_CAPTURE (12) @@ -99,6 +101,7 @@ REGISTER_LONG_CONSTANT(PREG_PATTERN_ORDER, PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT(PREG_SET_ORDER, PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT(PREG_MATCH_OFFSET_CAPTURE, PREG_MATCH_OFFSET_CAPTURE, +CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT(PREG_SPLIT_NO_EMPTY, PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT(PREG_SPLIT_DELIM_CAPTURE, PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT(PREG_SPLIT_OFFSET_CAPTURE, PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); @@ -310,6 +313,24 @@ } /* }}} */ +/* {{{ add_offset_pair + */ +static inline void add_offset_pair(zval *result, char *str, int len, int offset) +{ + zval *match_pair; + + ALLOC_ZVAL(match_pair); + array_init(match_pair); + INIT_PZVAL(match_pair); + + /* Add (match, offset) to the return value */ + add_next_index_stringl(match_pair, str, len, 1); + add_next_index_long(match_pair, offset); + + zend_hash_next_index_insert(Z_ARRVAL_P(result), match_pair, sizeof(zval *), +NULL); +} +/* }}} */ + /* {{{ php_pcre_match */ static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) @@ -335,6 +356,7 @@ int matched; /* Has anything matched */ int i; int subpats_order_val = 0; /* Integer value of subpats_order */ + int offset_capture = 0;/* If offsets should +be captured */ int g_notempty = 0;/* If the match should not be empty */ const char **stringlist;/* Used to hold list of subpatterns */ char*match; /* The current match */ @@ -363,11 +385,17 @@ /* Make sure subpats_order is a number */ convert_to_long_ex(subpats_order); - subpats_order_val = Z_LVAL_PP(subpats_order); - if (subpats_order_val PREG_PATTERN_ORDER || - subpats_order_val PREG_SET_ORDER) { - zend_error(E_WARNING, Wrong value for parameter 4 in call to preg_match_all()); - } +offset_capture = (Z_LVAL_PP(subpats_order) PREG_MATCH_OFFSET_CAPTURE); + + if (global) { + subpats_order_val = (Z_LVAL_PP(subpats_order) 1UL); + + if ((subpats_order_val PREG_PATTERN_ORDER) || + (subpats_order_val PREG_SET_ORDER)) { + zend_error(E_WARNING, Wrong value for parameter 4 +in call to preg_match_all()); + } +} + break; default: @@ -442,8 +470,13 @@ if (subpats_order_val == PREG_PATTERN_ORDER) { /* For each subpattern, insert it into the appropriate array. */ for (i = 0; i count; i++) { - add_next_index_stringl(match_sets[i], (char *)stringlist[i], - offsets[(i1)+1] - offsets[i1], 1); + if (offset_capture) { +