[PHP-DEV] [PATCH] preg_match(_all) support for capturing offsets

2002-06-06 Thread David Brown

Hi Andrei, all:

(This is a re-send of a previous message that received no replies; my
apologies if you've already seen/considered it)

In a previous patch 
(http://news.php.net/article.php?group=php.devarticle=84281), support
was added to preg_split for capturing offsets along with matches. The
attached patch adds similar support to preg_match and preg_match_all via
a new PREG_MATCH_OFFSET_CAPTURE flag.

The code handles capturing offsets for both subpattern matches and whole
pattern matches, using the previously-added add_offset_pair helper function.

The flag is a new fourth (and optional) parameter for preg_match, and
are or'd into the existing 'order' parameter for preg_match_all, above
PREG_SET_ORDER and PREG_PATTERN_ORDER.

The patch below is diffed against the CVS head - humbly sumbitted for
application, rejection, suggestions, or extensive flaming. :)


Thanks in advance,

- Dave
  [EMAIL PROTECTED]


--- ext/pcre/php_pcre.c.origTue Jun  4 13:02:50 2002
+++ ext/pcre/php_pcre.c Tue Jun  4 13:12:10 2002
@@ -35,7 +35,9 @@
 #define PREG_PATTERN_ORDER 0
 #define PREG_SET_ORDER 1
 
-#definePREG_SPLIT_NO_EMPTY (10)
+#define PREG_MATCH_OFFSET_CAPTURE  (12)
+
+#define PREG_SPLIT_NO_EMPTY(10)
 #define PREG_SPLIT_DELIM_CAPTURE   (11)
 #define PREG_SPLIT_OFFSET_CAPTURE  (12)
 
@@ -99,6 +101,7 @@

REGISTER_LONG_CONSTANT(PREG_PATTERN_ORDER, PREG_PATTERN_ORDER, CONST_CS | 
CONST_PERSISTENT);
REGISTER_LONG_CONSTANT(PREG_SET_ORDER, PREG_SET_ORDER, CONST_CS | 
CONST_PERSISTENT);
+   REGISTER_LONG_CONSTANT(PREG_MATCH_OFFSET_CAPTURE, PREG_MATCH_OFFSET_CAPTURE, 
+CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT(PREG_SPLIT_NO_EMPTY, PREG_SPLIT_NO_EMPTY, CONST_CS | 
CONST_PERSISTENT);
REGISTER_LONG_CONSTANT(PREG_SPLIT_DELIM_CAPTURE, PREG_SPLIT_DELIM_CAPTURE, 
CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT(PREG_SPLIT_OFFSET_CAPTURE, PREG_SPLIT_OFFSET_CAPTURE, 
CONST_CS | CONST_PERSISTENT);
@@ -310,6 +313,24 @@
 }
 /* }}} */
 
+/* {{{ add_offset_pair
+ */
+static inline void add_offset_pair(zval *result, char *str, int len, int offset)
+{
+   zval *match_pair;
+
+   ALLOC_ZVAL(match_pair);
+   array_init(match_pair);
+   INIT_PZVAL(match_pair);
+
+   /* Add (match, offset) to the return value */
+   add_next_index_stringl(match_pair, str, len, 1);
+   add_next_index_long(match_pair, offset);
+   
+   zend_hash_next_index_insert(Z_ARRVAL_P(result), match_pair, sizeof(zval *), 
+NULL);
+}
+/* }}} */
+
 /* {{{ php_pcre_match
  */
 static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
@@ -335,6 +356,7 @@
int  matched;   /* Has 
anything matched */
int  i;
int  subpats_order_val = 0; /* Integer value of 
subpats_order */
+   int  offset_capture = 0;/* If offsets should 
+be captured */
int  g_notempty = 0;/* If the match should 
not be empty */
const char **stringlist;/* Used to hold list of 
subpatterns */
char*match; /* The current match */
@@ -363,11 +385,17 @@

/* Make sure subpats_order is a number */
convert_to_long_ex(subpats_order);
-   subpats_order_val = Z_LVAL_PP(subpats_order);
-   if (subpats_order_val  PREG_PATTERN_ORDER ||
-   subpats_order_val  PREG_SET_ORDER) {
-   zend_error(E_WARNING, Wrong value for parameter 4 in 
call to preg_match_all());
-   }
+offset_capture = (Z_LVAL_PP(subpats_order)  PREG_MATCH_OFFSET_CAPTURE);
+
+   if (global) {
+  subpats_order_val = (Z_LVAL_PP(subpats_order)  1UL);
+   
+  if ((subpats_order_val  PREG_PATTERN_ORDER) ||
+  (subpats_order_val  PREG_SET_ORDER)) {
+ zend_error(E_WARNING, Wrong value for parameter 4 
+in call to preg_match_all());
+ }
+}
+
break;

default:
@@ -442,8 +470,13 @@
if (subpats_order_val == PREG_PATTERN_ORDER) {
/* For each subpattern, insert it into 
the appropriate array. */
for (i = 0; i  count; i++) {
-   
add_next_index_stringl(match_sets[i], (char *)stringlist[i],
-  
   

[PHP-DEV] [PATCH] preg_match(_all) support for capturing offsets

2002-06-04 Thread David Brown

Hi Andrei, all:

In a previous patch 
(http://news.php.net/article.php?group=php.devarticle=84281), support
was added to preg_split for capturing offsets along with matches. The
attached patch adds similar support to preg_match and preg_match_all via
a new PREG_MATCH_OFFSET_CAPTURE flag.

The code handles capturing offsets for both subpattern matches and whole
pattern matches, using the previously-added add_offset_pair helper function.

The flag is a new fourth (and optional) parameter for preg_match, and
are or'd into the existing 'order' parameter for preg_match_all, above
PREG_SET_ORDER and PREG_PATTERN_ORDER.

The patch below is diffed against the CVS head - humbly sumbitted for
application, rejection, suggestions, or extensive flaming. :)


Thanks in advance,

- Dave
  [EMAIL PROTECTED]


--- ext/pcre/php_pcre.c.origTue Jun  4 13:02:50 2002
+++ ext/pcre/php_pcre.c Tue Jun  4 13:12:10 2002
@@ -35,7 +35,9 @@
 #define PREG_PATTERN_ORDER 0
 #define PREG_SET_ORDER 1
 
-#definePREG_SPLIT_NO_EMPTY (10)
+#define PREG_MATCH_OFFSET_CAPTURE  (12)
+
+#define PREG_SPLIT_NO_EMPTY(10)
 #define PREG_SPLIT_DELIM_CAPTURE   (11)
 #define PREG_SPLIT_OFFSET_CAPTURE  (12)
 
@@ -99,6 +101,7 @@

REGISTER_LONG_CONSTANT(PREG_PATTERN_ORDER, PREG_PATTERN_ORDER, CONST_CS | 
CONST_PERSISTENT);
REGISTER_LONG_CONSTANT(PREG_SET_ORDER, PREG_SET_ORDER, CONST_CS | 
CONST_PERSISTENT);
+   REGISTER_LONG_CONSTANT(PREG_MATCH_OFFSET_CAPTURE, PREG_MATCH_OFFSET_CAPTURE, 
+CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT(PREG_SPLIT_NO_EMPTY, PREG_SPLIT_NO_EMPTY, CONST_CS | 
CONST_PERSISTENT);
REGISTER_LONG_CONSTANT(PREG_SPLIT_DELIM_CAPTURE, PREG_SPLIT_DELIM_CAPTURE, 
CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT(PREG_SPLIT_OFFSET_CAPTURE, PREG_SPLIT_OFFSET_CAPTURE, 
CONST_CS | CONST_PERSISTENT);
@@ -310,6 +313,24 @@
 }
 /* }}} */
 
+/* {{{ add_offset_pair
+ */
+static inline void add_offset_pair(zval *result, char *str, int len, int offset)
+{
+   zval *match_pair;
+
+   ALLOC_ZVAL(match_pair);
+   array_init(match_pair);
+   INIT_PZVAL(match_pair);
+
+   /* Add (match, offset) to the return value */
+   add_next_index_stringl(match_pair, str, len, 1);
+   add_next_index_long(match_pair, offset);
+   
+   zend_hash_next_index_insert(Z_ARRVAL_P(result), match_pair, sizeof(zval *), 
+NULL);
+}
+/* }}} */
+
 /* {{{ php_pcre_match
  */
 static void php_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)
@@ -335,6 +356,7 @@
int  matched;   /* Has 
anything matched */
int  i;
int  subpats_order_val = 0; /* Integer value of 
subpats_order */
+   int  offset_capture = 0;/* If offsets should 
+be captured */
int  g_notempty = 0;/* If the match should 
not be empty */
const char **stringlist;/* Used to hold list of 
subpatterns */
char*match; /* The current match */
@@ -363,11 +385,17 @@

/* Make sure subpats_order is a number */
convert_to_long_ex(subpats_order);
-   subpats_order_val = Z_LVAL_PP(subpats_order);
-   if (subpats_order_val  PREG_PATTERN_ORDER ||
-   subpats_order_val  PREG_SET_ORDER) {
-   zend_error(E_WARNING, Wrong value for parameter 4 in 
call to preg_match_all());
-   }
+offset_capture = (Z_LVAL_PP(subpats_order)  PREG_MATCH_OFFSET_CAPTURE);
+
+   if (global) {
+  subpats_order_val = (Z_LVAL_PP(subpats_order)  1UL);
+   
+  if ((subpats_order_val  PREG_PATTERN_ORDER) ||
+  (subpats_order_val  PREG_SET_ORDER)) {
+ zend_error(E_WARNING, Wrong value for parameter 4 
+in call to preg_match_all());
+ }
+}
+
break;

default:
@@ -442,8 +470,13 @@
if (subpats_order_val == PREG_PATTERN_ORDER) {
/* For each subpattern, insert it into 
the appropriate array. */
for (i = 0; i  count; i++) {
-   
add_next_index_stringl(match_sets[i], (char *)stringlist[i],
-  
offsets[(i1)+1] - offsets[i1], 1);
+   if (offset_capture) {
+