derick          Fri Mar 17 14:29:05 2006 UTC

  Added files:                 
    /php-src/ext/standard/tests/strings strtr2.phpt ucfirst.phpt 

  Modified files:              
    /php-src/ext/standard       string.c type.c 
  Log:
  - Fixed bug in ucfirst() implementation. If the tmp_len = 0 then it will not
    write the uppercased character to the buffer, but only returns the length of
    the uppercased letter as per
    http://icu.sourceforge.net/apiref/icu4c/ustring_8h.html#a50.
  - Updated is_string():
    If Unicode semantics is turned on, return "true" for Unicode strings only.
    If Unicode semantics is turned off, return "true" for native strings only.
  - Added is_binary() function that returns "true" for native strings only.
  - Added first implementation of upgraded strtr function. It works except if
    combining sequences or surrogates are used in the non-array method of 
calling
    this function.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/string.c?r1=1.529&r2=1.530&diff_format=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.529 php-src/ext/standard/string.c:1.530
--- php-src/ext/standard/string.c:1.529 Wed Mar 15 12:20:49 2006
+++ php-src/ext/standard/string.c       Fri Mar 17 14:29:05 2006
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: string.c,v 1.529 2006/03/15 12:20:49 derick Exp $ */
+/* $Id: string.c,v 1.530 2006/03/17 14:29:05 derick Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -3263,8 +3263,8 @@
    Makes an Unicode string's first character uppercase */
 static void php_u_ucfirst(zval *ustr, zval *return_value TSRMLS_DC)
 {
-       UChar tmp[3] = { 0,}; /* UChar32 will be converted to upto 2 UChar 
units ? */
-       int tmp_len = 0;
+       UChar tmp[3] = { 0, 0, 0 }; /* UChar32 will be converted to upto 2 
UChar units ? */
+       int tmp_len = 2;
        int pos = 0;
        UErrorCode status = U_ZERO_ERROR;
 
@@ -3427,6 +3427,181 @@
 }
 /* }}} */
 
+/* {{{ php_u_strtr
+ */
+PHPAPI UChar *php_u_strtr(UChar *str, int len, UChar *str_from, int 
str_from_len, UChar *str_to, int str_to_len, int trlen)
+{
+       int i, j;
+       int can_optimize = 1;
+       
+       if ((trlen < 1) || (len < 1)) {
+               return str;
+       }
+
+       /* First loop to see if we can use the optimized version */
+       for (i = 0; i < trlen; i++)     {
+               if (str_from[i] > 255 || str_to[i] > 255) {
+                       can_optimize = 0;
+                       break;
+               }
+       }
+       if (can_optimize) {
+               for (i = trlen; i < str_from_len; i++) {
+                       if (str_from[i] > 255) {
+                               can_optimize = 0;
+                               break;
+                       }
+               }
+       }
+       if (can_optimize) {
+               for (i = trlen; i < str_to_len; i++) {
+                       if (str_from[i] > 255) {
+                               can_optimize = 0;
+                               break;
+                       }
+               }
+       }
+
+       if (can_optimize) {
+               UChar xlat[256];
+
+               for (i = 0; i < 256; xlat[i] = i, i++);
+
+               for (i = 0; i < trlen; i++) {
+                       xlat[str_from[i]] = str_to[i];
+               }
+
+               for (i = 0; i < len; i++) {
+                       str[i] = xlat[str[i]];
+               }
+
+               return str;
+       } else {
+               /* UTODO: We're quite fucked... this is *extremely* slow, better
+                * algorithm wanted here! It also doesn't handle combining 
sequences, I
+                * asked the icu-support list for good algorithms.  */
+               for (i = 0; i < len; i++) {
+                       for (j = 0; j < trlen; j++) {
+                               if (str[i] == str_from[j]) {
+                                       str[i] = str_to[j];
+                               }
+                       }
+               }
+               return str;
+       }
+}
+/* }}} */
+
+/* {{{ php_u_strtr_array
+ */
+static void php_u_strtr_array(zval *return_value, UChar *str, int slen, 
HashTable *hash)
+{
+       zval **entry;
+       zstr   string_key;
+       uint   string_key_len;
+       zval **trans;
+       zval   ctmp;
+       ulong num_key;
+       int minlen = 128*1024;
+       int maxlen = 0, pos, len, found;
+       UChar *key;
+       HashPosition hpos;
+       smart_str result = {0};
+       HashTable tmp_hash;
+       
+       zend_hash_init(&tmp_hash, 0, NULL, NULL, 0);
+       zend_hash_internal_pointer_reset_ex(hash, &hpos);
+       while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) == 
SUCCESS) {
+               switch (zend_hash_get_current_key_ex(hash, &string_key, 
&string_key_len, &num_key, 0, &hpos)) {
+                       case HASH_KEY_IS_UNICODE:
+                               len = string_key_len-1;
+                               if (len < 1) {
+                                       zend_hash_destroy(&tmp_hash);
+                                       RETURN_FALSE;
+                               }
+                               zend_u_hash_add(&tmp_hash, IS_UNICODE, 
string_key, string_key_len, entry, sizeof(zval*), NULL);
+                               if (len > maxlen) {
+                                       maxlen = len;
+                               }
+                               if (len < minlen) {
+                                       minlen = len;
+                               }
+                               break; 
+                       
+                       case HASH_KEY_IS_LONG:
+                               Z_TYPE(ctmp) = IS_LONG;
+                               Z_LVAL(ctmp) = num_key;
+                       
+                               convert_to_unicode(&ctmp);
+                               len = Z_USTRLEN(ctmp);
+                               zend_u_hash_add(&tmp_hash, IS_UNICODE, 
Z_UNIVAL(ctmp), len+1, entry, sizeof(zval*), NULL);
+                               zval_dtor(&ctmp);
+
+                               if (len > maxlen) {
+                                       maxlen = len;
+                               }
+                               if (len < minlen) {
+                                       minlen = len;
+                               }
+                               break;
+               }
+               zend_hash_move_forward_ex(hash, &hpos);
+       }
+
+       key = eumalloc(maxlen+1);
+       pos = 0;
+
+       while (pos < slen) {
+               if ((pos + maxlen) > slen) {
+                       maxlen = slen - pos;
+               }
+
+               found = 0;
+               memcpy(key, str+pos, UBYTES(maxlen));
+
+               for (len = maxlen; len >= minlen; len--) {
+                       key[len] = 0;
+                       
+                       if (zend_u_hash_find(&tmp_hash, IS_UNICODE, ZSTR(key), 
len+1, (void**)&trans) == SUCCESS) {
+                               UChar *tval;
+                               int tlen;
+                               zval tmp;
+
+                               if (Z_TYPE_PP(trans) != IS_UNICODE) {
+                                       tmp = **trans;
+                                       zval_copy_ctor(&tmp);
+                                       convert_to_string(&tmp);
+                                       tval = Z_USTRVAL(tmp);
+                                       tlen = Z_USTRLEN(tmp);
+                               } else {
+                                       tval = Z_USTRVAL_PP(trans);
+                                       tlen = Z_USTRLEN_PP(trans);
+                               }
+
+                               smart_str_appendl(&result, tval, UBYTES(tlen));
+                               pos += len;
+                               found = 1;
+
+                               if (Z_TYPE_PP(trans) != IS_UNICODE) {
+                                       zval_dtor(&tmp);
+                               }
+                               break;
+                       } 
+               }
+
+               if (! found) {
+                       smart_str_append2c(&result, str[pos]);
+                       pos++;
+               }
+       }
+
+       efree(key);
+       zend_hash_destroy(&tmp_hash);
+       smart_str_0(&result);
+       RETVAL_UNICODEL((UChar *) result.c, result.len >> 1, 0);
+}
+/* }}} */
+
 /* {{{ php_strtr_array
  */
 static void php_strtr_array(zval *return_value, char *str, int slen, HashTable 
*hash)
@@ -3552,27 +3727,52 @@
                RETURN_FALSE;
        }
 
-       convert_to_string_ex(str);
+       if (Z_TYPE_PP(str) != IS_UNICODE && Z_TYPE_PP(str) != IS_STRING) {
+               convert_to_text_ex(str);
+       }
 
        /* shortcut for empty string */
-       if (Z_STRLEN_PP(str) == 0) {
+       if (Z_TYPE_PP(str) == IS_UNICODE && !Z_USTRLEN_PP(str)) {
+               RETURN_EMPTY_UNICODE();
+       } else if (!Z_STRLEN_PP(str)) {
                RETURN_EMPTY_STRING();
        }
 
-       if (ac == 2) {
-               php_strtr_array(return_value, Z_STRVAL_PP(str), 
Z_STRLEN_PP(str), HASH_OF(*from));
+       if (Z_TYPE_PP(str) == IS_UNICODE) {
+               if (ac == 2) {
+                       php_u_strtr_array(return_value, Z_USTRVAL_PP(str), 
Z_USTRLEN_PP(str), HASH_OF(*from));
+                       Z_TYPE_P(return_value) = IS_UNICODE;
+               } else {
+                       convert_to_unicode_ex(from);
+                       convert_to_unicode_ex(to);
+
+                       ZVAL_UNICODEL(return_value, Z_USTRVAL_PP(str), 
Z_USTRLEN_PP(str), 1);
+                       
+                       php_u_strtr(Z_USTRVAL_P(return_value),
+                                         Z_USTRLEN_P(return_value),
+                                         Z_USTRVAL_PP(from),
+                                         Z_USTRLEN_PP(from),
+                                         Z_USTRVAL_PP(to),
+                                         Z_USTRLEN_PP(to),
+                                         MIN(Z_USTRLEN_PP(from), 
Z_USTRLEN_PP(to)));
+                       Z_TYPE_P(return_value) = IS_UNICODE;
+               }
        } else {
-               convert_to_string_ex(from);
-               convert_to_string_ex(to);
+               if (ac == 2) {
+                       php_strtr_array(return_value, Z_STRVAL_PP(str), 
Z_STRLEN_PP(str), HASH_OF(*from));
+               } else {
+                       convert_to_string_ex(from);
+                       convert_to_string_ex(to);
 
-               ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 
1);
-               
-               php_strtr(Z_STRVAL_P(return_value),
-                                 Z_STRLEN_P(return_value),
-                                 Z_STRVAL_PP(from),
-                                 Z_STRVAL_PP(to),
-                                 MIN(Z_STRLEN_PP(from), 
-                                 Z_STRLEN_PP(to)));
+                       ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), 
Z_STRLEN_PP(str), 1);
+                       
+                       php_strtr(Z_STRVAL_P(return_value),
+                                         Z_STRLEN_P(return_value),
+                                         Z_STRVAL_PP(from),
+                                         Z_STRVAL_PP(to),
+                                         MIN(Z_STRLEN_PP(from), 
+                                         Z_STRLEN_PP(to)));
+               }
        }
 }
 /* }}} */
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/type.c?r1=1.40&r2=1.41&diff_format=u
Index: php-src/ext/standard/type.c
diff -u php-src/ext/standard/type.c:1.40 php-src/ext/standard/type.c:1.41
--- php-src/ext/standard/type.c:1.40    Tue Feb 21 20:12:42 2006
+++ php-src/ext/standard/type.c Fri Mar 17 14:29:05 2006
@@ -16,7 +16,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: type.c,v 1.40 2006/02/21 20:12:42 dmitry Exp $ */
+/* $Id: type.c,v 1.41 2006/03/17 14:29:05 derick Exp $ */
 
 #include "php.h"
 #include "php_incomplete_class.h"
@@ -285,11 +285,19 @@
 }
 /* }}} */
 
+/* {{{ proto bool is_binary(mixed var)
+   Returns true if variable is a native (binary) string */
+PHP_FUNCTION(is_binary)
+{
+       php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, IS_STRING);
+}
+/* }}} */
+
 /* {{{ proto bool is_string(mixed var)
-   Returns true if variable is a native string */
+   Returns true if variable is a string */
 PHP_FUNCTION(is_string)
 {
-       php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, IS_STRING);
+       php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, UG(unicode) ? IS_UNICODE 
: IS_STRING);
 }
 /* }}} */
 

http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/tests/strings/strtr2.phpt?view=markup&rev=1.1
Index: php-src/ext/standard/tests/strings/strtr2.phpt
+++ php-src/ext/standard/tests/strings/strtr2.phpt
--TEST--
strtr() function (with unicode chars and combining sequences)
--FILE--
<?php
        declare(encoding="utf8");
        $from = "aåаиу";
        $to   = "12356";
        $string = "Dе åkаt krаpt de krуllen van de trap af";
        var_dump( strtr( $string, $from, $to ) );

        $from = "aeoui";
        $to   = "12356";
        $string = "De akat krapt de krullen van de trap af";
        var_dump( strtr( $string, $from, $to ) );

        $ar = array( "a" => 1, "e" => "2", "o" => 3, "u" => 5, "i" => 6 );
        $string = "De akat krapt de krullen van de trap af";
        var_dump( strtr( $string, $ar ) );

        // Test with combining chars
        $from = "åb";
        $to   = "1";
        $string = "xyzabc";
        var_dump( strtr( $string, $from, $to ) );
?>
--EXPECT--
unicode(39) "Dе 2k3t kr3pt de kr6llen v1n de tr1p 1f"
unicode(39) "D2 1k1t kr1pt d2 kr5ll2n v1n d2 tr1p 1f"
unicode(39) "D2 1k1t kr1pt d2 kr5ll2n v1n d2 tr1p 1f"

http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/tests/strings/ucfirst.phpt?view=markup&rev=1.1
Index: php-src/ext/standard/tests/strings/ucfirst.phpt
+++ php-src/ext/standard/tests/strings/ucfirst.phpt
--TEST--
ucfirst()
--FILE--
<?php
echo ucfirst("peren"), "\n";
echo ucfirst("appelen"), "\n";
echo ucfirst("ßen"), "\n";
?>
--EXPECT--
Peren
Appelen
SSen

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to