andrei          Tue Sep 19 20:01:11 2006 UTC

  Modified files:              
    /php-src/ext/pcre   php_pcre.c 
  Log:
  Upgrade preg_quote() to support Unicode.
  
  
http://cvs.php.net/viewvc.cgi/php-src/ext/pcre/php_pcre.c?r1=1.201&r2=1.202&diff_format=u
Index: php-src/ext/pcre/php_pcre.c
diff -u php-src/ext/pcre/php_pcre.c:1.201 php-src/ext/pcre/php_pcre.c:1.202
--- php-src/ext/pcre/php_pcre.c:1.201   Tue Sep 19 17:37:48 2006
+++ php-src/ext/pcre/php_pcre.c Tue Sep 19 20:01:10 2006
@@ -16,7 +16,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: php_pcre.c,v 1.201 2006/09/19 17:37:48 andrei Exp $ */
+/* $Id: php_pcre.c,v 1.202 2006/09/19 20:01:10 andrei Exp $ */
 
 /* UTODO
  *  - PCRE_NO_UTF8_CHECK option for Unicode strings
@@ -1684,13 +1684,13 @@
        char    *out_str,               /* Output string with quoted characters 
*/
                        *p,                             /* Iterator for input 
string */
                        *q,                             /* Iterator for output 
string */
-                        delim_char=0,  /* Delimiter character to be quoted */
                         c;                             /* Current character */
+       UChar32  delim_char=0;  /* Delimiter character to be quoted */
        zend_bool quote_delim = 0; /* Whether to quote additional delim char */
        
        /* Get the arguments and check for errors */
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, 
&in_str_len,
-                                                         &delim, &delim_len) 
== FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s&|s&", &in_str, 
&in_str_len, UG(utf8_conv),
+                                                         &delim, &delim_len, 
UG(utf8_conv)) == FAILURE) {
                return;
        }
        
@@ -1698,17 +1698,24 @@
 
        /* Nothing to do if we got an empty string */
        if (in_str == in_str_end) {
-               RETURN_EMPTY_STRING();
+               RETURN_EMPTY_TEXT();
        }
 
        if (delim && *delim) {
-               delim_char = delim[0];
+               if (UG(unicode)) {
+                       U8_GET(delim, 0, 0, delim_len, delim_char);
+               } else {
+                       delim_char = (UChar32)delim[0];
+               }
                quote_delim = 1;
        }
        
        /* Allocate enough memory so that even if each character
-          is quoted, we won't run out of room */
-       out_str = safe_emalloc(4, in_str_len, 1);
+          is quoted, we won't run out of room. In Unicode mode, the longest 
UTF-8
+          sequence is 4 bytes, so the multiplier is (4+1). In non-Unicode 
mode, we
+          have to assume that any character can be '\0', which needs 4 chars to
+          be escaped. */
+       out_str = safe_emalloc(UG(unicode)?5:4, in_str_len, 1);
        
        /* Go through the string and quote necessary characters */
        for(p = in_str, q = out_str; p != in_str_end; p++) {
@@ -1745,16 +1752,28 @@
                                break;
 
                        default:
-                               if (quote_delim && c == delim_char)
-                                       *q++ = '\\';
-                               *q++ = c;
+                               if ((UChar32)(unsigned char)c > 0x7f) { /* 
non-ASCII char */
+                                       int tmp = 0;
+                                       UChar32 cp = 0;
+                                       U8_NEXT(p, tmp, in_str_end-p, cp);
+                                       if (quote_delim && cp == delim_char) {
+                                               *q++ = '\\';
+                                       }
+                                       memcpy(q, p, tmp);
+                                       q += tmp;
+                                       p += tmp-1; /* going to be incremented 
by the loop */
+                               } else {
+                                       if (quote_delim && c == delim_char)
+                                                       *q++ = '\\';
+                                       *q++ = c;
+                               }
                                break;
                }
        }
        *q = '\0';
        
        /* Reallocate string and return it */
-       RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
+       RETVAL_UTF8_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
 }
 /* }}} */
 

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to