andrei          Mon Oct  2 16:52:22 2006 UTC

  Modified files:              
    /php-src    unicode-progress.txt 
    /php-src/ext/standard       string.c 
  Log:
  Make stripos() work with Unicode strings.
  
  
http://cvs.php.net/viewvc.cgi/php-src/unicode-progress.txt?r1=1.50&r2=1.51&diff_format=u
Index: php-src/unicode-progress.txt
diff -u php-src/unicode-progress.txt:1.50 php-src/unicode-progress.txt:1.51
--- php-src/unicode-progress.txt:1.50   Fri Sep 22 19:35:05 2006
+++ php-src/unicode-progress.txt        Mon Oct  2 16:52:21 2006
@@ -27,7 +27,6 @@
         Params API. Rest - no idea yet.
 
     stristr()
-    stripos()
     strripos()
     str_replace()
     stri_replace()
@@ -158,6 +157,7 @@
     strip_tags()
     stripcslashes()
     stripslashes()
+    stripos()
     strpbrk()
     strpos()
     strrchr()
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/string.c?r1=1.595&r2=1.596&diff_format=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.595 php-src/ext/standard/string.c:1.596
--- php-src/ext/standard/string.c:1.595 Mon Oct  2 01:11:04 2006
+++ php-src/ext/standard/string.c       Mon Oct  2 16:52:22 2006
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: string.c,v 1.595 2006/10/02 01:11:04 pollita Exp $ */
+/* $Id: string.c,v 1.596 2006/10/02 16:52:22 andrei Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -2625,7 +2625,7 @@
 }
 /* }}} */
 
-/* {{{ proto int stripos(string haystack, string needle [, int offset])
+/* {{{ proto int stripos(string haystack, string needle [, int offset]) U
    Finds position of first occurrence of a string within another, case 
insensitive */
 PHP_FUNCTION(stripos)
 {
@@ -2633,12 +2633,12 @@
        long offset = 0;
        int haystack_len, needle_len = 0;
        zend_uchar str_type;
-       void *haystack_dup, *needle_dup = NULL;
+       void *haystack_dup = NULL, *needle_dup = NULL;
        char needle_char[2];
        char c = 0;
-       UChar u_needle_char[3];
-       UChar32 ch = 0;
+       UChar u_needle_char[8];
        void *found = NULL;
+       int cu_offset = 0;
 
        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZ|l", &haystack, 
&needle, &offset) == FAILURE) {
                return;
@@ -2662,6 +2662,7 @@
                if (!Z_UNILEN_PP(needle) || Z_UNILEN_PP(needle) > haystack_len) 
{
                        RETURN_FALSE;
                }
+               /* convert both strings to the same type */
                if (Z_TYPE_PP(haystack) != Z_TYPE_PP(needle)) {
                        str_type = zend_get_unified_string_type(2 TSRMLS_CC, 
Z_TYPE_PP(haystack), Z_TYPE_PP(needle));
                        convert_to_explicit_type_ex(haystack, str_type);
@@ -2669,11 +2670,9 @@
                }
                needle_len = Z_UNILEN_PP(needle);
                if (Z_TYPE_PP(haystack) == IS_UNICODE) {
-                       haystack_dup = php_u_strtolower(Z_USTRVAL_PP(haystack), 
&haystack_len, UG(default_locale));
-                       needle_dup = php_u_strtolower(Z_USTRVAL_PP(needle), 
&needle_len, UG(default_locale));
-                       found = zend_u_memnstr((UChar *)haystack_dup + offset,
-                                                                  (UChar 
*)needle_dup, needle_len,
-                                                                  (UChar 
*)haystack_dup + haystack_len);
+                       /* calculate codeunit offset */
+                       U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset, 
haystack_len, offset);
+                       found = php_u_stristr(Z_USTRVAL_PP(haystack) + 
cu_offset, Z_USTRVAL_PP(needle), haystack_len, needle_len TSRMLS_CC);
                } else {
                        haystack_dup = estrndup(Z_STRVAL_PP(haystack), 
haystack_len);
                        php_strtolower((char *)haystack_dup, haystack_len);
@@ -2688,14 +2687,22 @@
                        case IS_LONG:
                        case IS_BOOL:
                                if (Z_TYPE_PP(haystack) == IS_UNICODE) {
-                                       ch = 
u_tolower((UChar32)Z_LVAL_PP(needle));
+                                       if (Z_LVAL_PP(needle) < 0 || 
Z_LVAL_PP(needle) > 0x10FFFF) {
+                                               php_error(E_WARNING, "Needle 
argument codepoint value out of range (0 - 0x10FFFF)");
+                                               RETURN_FALSE;      
+                                       }
+                                       needle_len = 
zend_codepoint_to_uchar((UChar32)Z_LVAL_PP(needle), u_needle_char);
                                } else {
                                        c = tolower((char)Z_LVAL_PP(needle));
                                }
                                break;
                        case IS_DOUBLE:
                                if (Z_TYPE_PP(haystack) == IS_UNICODE) {
-                                       ch = 
u_tolower((UChar32)Z_DVAL_PP(needle));
+                                       if ((UChar32)Z_DVAL_PP(needle) < 0 || 
(UChar32)Z_DVAL_PP(needle) > 0x10FFFF) {
+                                               php_error(E_WARNING, "Needle 
argument codepoint value out of range (0 - 0x10FFFF)");
+                                               RETURN_FALSE;      
+                                       }
+                                       needle_len = 
zend_codepoint_to_uchar((UChar32)Z_DVAL_PP(needle), u_needle_char);
                                } else {
                                        c = tolower((char)Z_DVAL_PP(needle));
                                }
@@ -2707,18 +2714,12 @@
 
                }
                if (Z_TYPE_PP(haystack) == IS_UNICODE) {
-                       if (U_IS_BMP(ch)) {
-                               u_needle_char[needle_len++] = ch;
-                               u_needle_char[needle_len]   = 0;
-                       } else {
-                               u_needle_char[needle_len++] = U16_LEAD(ch);
-                               u_needle_char[needle_len++] = U16_TRAIL(ch);
-                               u_needle_char[needle_len]   = 0;
-                       }
-                       haystack_dup = php_u_strtolower(Z_USTRVAL_PP(haystack), 
&haystack_len, UG(default_locale));
-                       found = zend_u_memnstr((UChar *)haystack_dup + offset,
-                                                                  (UChar 
*)u_needle_char, needle_len,
-                                                                  (UChar 
*)haystack_dup + haystack_len);
+                       /* calculate codeunit offset */
+                       U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset, 
haystack_len, offset);
+                       u_needle_char[needle_len] = 0;
+                       found = php_u_stristr(Z_USTRVAL_PP(haystack) + 
cu_offset,
+                                                                 
u_needle_char, haystack_len, needle_len TSRMLS_CC);
+                                                                  
                } else {
                        needle_char[0] = c;
                        needle_char[1] = '\0';
@@ -2731,14 +2732,21 @@
                }
        }
 
-       efree(haystack_dup);
+       if (haystack_dup) {
+               efree(haystack_dup);
+       }
        if (needle_dup) {
                efree(needle_dup);
        }
 
        if (found) {
                if (Z_TYPE_PP(haystack) == IS_UNICODE) {
-                       RETURN_LONG((UChar *)found - (UChar *)haystack_dup);
+                       /* Simple subtraction will not suffice, since there may 
be
+                          supplementary codepoints. We count how many 
codepoints there are
+                          between the starting offset and the found location 
and add them
+                          to the starting codepoint offset. */
+                       RETURN_LONG(offset + 
u_countChar32(Z_USTRVAL_PP(haystack) + cu_offset,
+                                                                               
           (UChar*)found - (Z_USTRVAL_PP(haystack) + cu_offset)));
                } else {
                        RETURN_LONG((char *)found - (char *)haystack_dup);
                }

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to