derick Fri Mar 17 14:29:05 2006 UTC
Added files:
/php-src/ext/standard/tests/strings strtr2.phpt ucfirst.phpt
Modified files:
/php-src/ext/standard string.c type.c
Log:
- Fixed bug in ucfirst() implementation. If the tmp_len = 0 then it will not
write the uppercased character to the buffer, but only returns the length of
the uppercased letter as per
http://icu.sourceforge.net/apiref/icu4c/ustring_8h.html#a50.
- Updated is_string():
If Unicode semantics is turned on, return "true" for Unicode strings only.
If Unicode semantics is turned off, return "true" for native strings only.
- Added is_binary() function that returns "true" for native strings only.
- Added first implementation of upgraded strtr function. It works except if
combining sequences or surrogates are used in the non-array method of
calling
this function.
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/string.c?r1=1.529&r2=1.530&diff_format=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.529 php-src/ext/standard/string.c:1.530
--- php-src/ext/standard/string.c:1.529 Wed Mar 15 12:20:49 2006
+++ php-src/ext/standard/string.c Fri Mar 17 14:29:05 2006
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: string.c,v 1.529 2006/03/15 12:20:49 derick Exp $ */
+/* $Id: string.c,v 1.530 2006/03/17 14:29:05 derick Exp $ */
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
@@ -3263,8 +3263,8 @@
Makes an Unicode string's first character uppercase */
static void php_u_ucfirst(zval *ustr, zval *return_value TSRMLS_DC)
{
- UChar tmp[3] = { 0,}; /* UChar32 will be converted to upto 2 UChar
units ? */
- int tmp_len = 0;
+ UChar tmp[3] = { 0, 0, 0 }; /* UChar32 will be converted to upto 2
UChar units ? */
+ int tmp_len = 2;
int pos = 0;
UErrorCode status = U_ZERO_ERROR;
@@ -3427,6 +3427,181 @@
}
/* }}} */
+/* {{{ php_u_strtr
+ */
+PHPAPI UChar *php_u_strtr(UChar *str, int len, UChar *str_from, int
str_from_len, UChar *str_to, int str_to_len, int trlen)
+{
+ int i, j;
+ int can_optimize = 1;
+
+ if ((trlen < 1) || (len < 1)) {
+ return str;
+ }
+
+ /* First loop to see if we can use the optimized version */
+ for (i = 0; i < trlen; i++) {
+ if (str_from[i] > 255 || str_to[i] > 255) {
+ can_optimize = 0;
+ break;
+ }
+ }
+ if (can_optimize) {
+ for (i = trlen; i < str_from_len; i++) {
+ if (str_from[i] > 255) {
+ can_optimize = 0;
+ break;
+ }
+ }
+ }
+ if (can_optimize) {
+ for (i = trlen; i < str_to_len; i++) {
+ if (str_from[i] > 255) {
+ can_optimize = 0;
+ break;
+ }
+ }
+ }
+
+ if (can_optimize) {
+ UChar xlat[256];
+
+ for (i = 0; i < 256; xlat[i] = i, i++);
+
+ for (i = 0; i < trlen; i++) {
+ xlat[str_from[i]] = str_to[i];
+ }
+
+ for (i = 0; i < len; i++) {
+ str[i] = xlat[str[i]];
+ }
+
+ return str;
+ } else {
+ /* UTODO: We're quite fucked... this is *extremely* slow, better
+ * algorithm wanted here! It also doesn't handle combining
sequences, I
+ * asked the icu-support list for good algorithms. */
+ for (i = 0; i < len; i++) {
+ for (j = 0; j < trlen; j++) {
+ if (str[i] == str_from[j]) {
+ str[i] = str_to[j];
+ }
+ }
+ }
+ return str;
+ }
+}
+/* }}} */
+
+/* {{{ php_u_strtr_array
+ */
+static void php_u_strtr_array(zval *return_value, UChar *str, int slen,
HashTable *hash)
+{
+ zval **entry;
+ zstr string_key;
+ uint string_key_len;
+ zval **trans;
+ zval ctmp;
+ ulong num_key;
+ int minlen = 128*1024;
+ int maxlen = 0, pos, len, found;
+ UChar *key;
+ HashPosition hpos;
+ smart_str result = {0};
+ HashTable tmp_hash;
+
+ zend_hash_init(&tmp_hash, 0, NULL, NULL, 0);
+ zend_hash_internal_pointer_reset_ex(hash, &hpos);
+ while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) ==
SUCCESS) {
+ switch (zend_hash_get_current_key_ex(hash, &string_key,
&string_key_len, &num_key, 0, &hpos)) {
+ case HASH_KEY_IS_UNICODE:
+ len = string_key_len-1;
+ if (len < 1) {
+ zend_hash_destroy(&tmp_hash);
+ RETURN_FALSE;
+ }
+ zend_u_hash_add(&tmp_hash, IS_UNICODE,
string_key, string_key_len, entry, sizeof(zval*), NULL);
+ if (len > maxlen) {
+ maxlen = len;
+ }
+ if (len < minlen) {
+ minlen = len;
+ }
+ break;
+
+ case HASH_KEY_IS_LONG:
+ Z_TYPE(ctmp) = IS_LONG;
+ Z_LVAL(ctmp) = num_key;
+
+ convert_to_unicode(&ctmp);
+ len = Z_USTRLEN(ctmp);
+ zend_u_hash_add(&tmp_hash, IS_UNICODE,
Z_UNIVAL(ctmp), len+1, entry, sizeof(zval*), NULL);
+ zval_dtor(&ctmp);
+
+ if (len > maxlen) {
+ maxlen = len;
+ }
+ if (len < minlen) {
+ minlen = len;
+ }
+ break;
+ }
+ zend_hash_move_forward_ex(hash, &hpos);
+ }
+
+ key = eumalloc(maxlen+1);
+ pos = 0;
+
+ while (pos < slen) {
+ if ((pos + maxlen) > slen) {
+ maxlen = slen - pos;
+ }
+
+ found = 0;
+ memcpy(key, str+pos, UBYTES(maxlen));
+
+ for (len = maxlen; len >= minlen; len--) {
+ key[len] = 0;
+
+ if (zend_u_hash_find(&tmp_hash, IS_UNICODE, ZSTR(key),
len+1, (void**)&trans) == SUCCESS) {
+ UChar *tval;
+ int tlen;
+ zval tmp;
+
+ if (Z_TYPE_PP(trans) != IS_UNICODE) {
+ tmp = **trans;
+ zval_copy_ctor(&tmp);
+ convert_to_string(&tmp);
+ tval = Z_USTRVAL(tmp);
+ tlen = Z_USTRLEN(tmp);
+ } else {
+ tval = Z_USTRVAL_PP(trans);
+ tlen = Z_USTRLEN_PP(trans);
+ }
+
+ smart_str_appendl(&result, tval, UBYTES(tlen));
+ pos += len;
+ found = 1;
+
+ if (Z_TYPE_PP(trans) != IS_UNICODE) {
+ zval_dtor(&tmp);
+ }
+ break;
+ }
+ }
+
+ if (! found) {
+ smart_str_append2c(&result, str[pos]);
+ pos++;
+ }
+ }
+
+ efree(key);
+ zend_hash_destroy(&tmp_hash);
+ smart_str_0(&result);
+ RETVAL_UNICODEL((UChar *) result.c, result.len >> 1, 0);
+}
+/* }}} */
+
/* {{{ php_strtr_array
*/
static void php_strtr_array(zval *return_value, char *str, int slen, HashTable
*hash)
@@ -3552,27 +3727,52 @@
RETURN_FALSE;
}
- convert_to_string_ex(str);
+ if (Z_TYPE_PP(str) != IS_UNICODE && Z_TYPE_PP(str) != IS_STRING) {
+ convert_to_text_ex(str);
+ }
/* shortcut for empty string */
- if (Z_STRLEN_PP(str) == 0) {
+ if (Z_TYPE_PP(str) == IS_UNICODE && !Z_USTRLEN_PP(str)) {
+ RETURN_EMPTY_UNICODE();
+ } else if (!Z_STRLEN_PP(str)) {
RETURN_EMPTY_STRING();
}
- if (ac == 2) {
- php_strtr_array(return_value, Z_STRVAL_PP(str),
Z_STRLEN_PP(str), HASH_OF(*from));
+ if (Z_TYPE_PP(str) == IS_UNICODE) {
+ if (ac == 2) {
+ php_u_strtr_array(return_value, Z_USTRVAL_PP(str),
Z_USTRLEN_PP(str), HASH_OF(*from));
+ Z_TYPE_P(return_value) = IS_UNICODE;
+ } else {
+ convert_to_unicode_ex(from);
+ convert_to_unicode_ex(to);
+
+ ZVAL_UNICODEL(return_value, Z_USTRVAL_PP(str),
Z_USTRLEN_PP(str), 1);
+
+ php_u_strtr(Z_USTRVAL_P(return_value),
+ Z_USTRLEN_P(return_value),
+ Z_USTRVAL_PP(from),
+ Z_USTRLEN_PP(from),
+ Z_USTRVAL_PP(to),
+ Z_USTRLEN_PP(to),
+ MIN(Z_USTRLEN_PP(from),
Z_USTRLEN_PP(to)));
+ Z_TYPE_P(return_value) = IS_UNICODE;
+ }
} else {
- convert_to_string_ex(from);
- convert_to_string_ex(to);
+ if (ac == 2) {
+ php_strtr_array(return_value, Z_STRVAL_PP(str),
Z_STRLEN_PP(str), HASH_OF(*from));
+ } else {
+ convert_to_string_ex(from);
+ convert_to_string_ex(to);
- ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str),
1);
-
- php_strtr(Z_STRVAL_P(return_value),
- Z_STRLEN_P(return_value),
- Z_STRVAL_PP(from),
- Z_STRVAL_PP(to),
- MIN(Z_STRLEN_PP(from),
- Z_STRLEN_PP(to)));
+ ZVAL_STRINGL(return_value, Z_STRVAL_PP(str),
Z_STRLEN_PP(str), 1);
+
+ php_strtr(Z_STRVAL_P(return_value),
+ Z_STRLEN_P(return_value),
+ Z_STRVAL_PP(from),
+ Z_STRVAL_PP(to),
+ MIN(Z_STRLEN_PP(from),
+ Z_STRLEN_PP(to)));
+ }
}
}
/* }}} */
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/type.c?r1=1.40&r2=1.41&diff_format=u
Index: php-src/ext/standard/type.c
diff -u php-src/ext/standard/type.c:1.40 php-src/ext/standard/type.c:1.41
--- php-src/ext/standard/type.c:1.40 Tue Feb 21 20:12:42 2006
+++ php-src/ext/standard/type.c Fri Mar 17 14:29:05 2006
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: type.c,v 1.40 2006/02/21 20:12:42 dmitry Exp $ */
+/* $Id: type.c,v 1.41 2006/03/17 14:29:05 derick Exp $ */
#include "php.h"
#include "php_incomplete_class.h"
@@ -285,11 +285,19 @@
}
/* }}} */
+/* {{{ proto bool is_binary(mixed var)
+ Returns true if variable is a native (binary) string */
+PHP_FUNCTION(is_binary)
+{
+ php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, IS_STRING);
+}
+/* }}} */
+
/* {{{ proto bool is_string(mixed var)
- Returns true if variable is a native string */
+ Returns true if variable is a string */
PHP_FUNCTION(is_string)
{
- php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, IS_STRING);
+ php_is_type(INTERNAL_FUNCTION_PARAM_PASSTHRU, UG(unicode) ? IS_UNICODE
: IS_STRING);
}
/* }}} */
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/tests/strings/strtr2.phpt?view=markup&rev=1.1
Index: php-src/ext/standard/tests/strings/strtr2.phpt
+++ php-src/ext/standard/tests/strings/strtr2.phpt
--TEST--
strtr() function (with unicode chars and combining sequences)
--FILE--
<?php
declare(encoding="utf8");
$from = "aåаиÑ";
$to = "12356";
$string = "Dе Ã¥kаt krаpt de krÑllen van de trap af";
var_dump( strtr( $string, $from, $to ) );
$from = "aeoui";
$to = "12356";
$string = "De akat krapt de krullen van de trap af";
var_dump( strtr( $string, $from, $to ) );
$ar = array( "a" => 1, "e" => "2", "o" => 3, "u" => 5, "i" => 6 );
$string = "De akat krapt de krullen van de trap af";
var_dump( strtr( $string, $ar ) );
// Test with combining chars
$from = "aÌb";
$to = "1";
$string = "xyzabc";
var_dump( strtr( $string, $from, $to ) );
?>
--EXPECT--
unicode(39) "Dе 2k3t kr3pt de kr6llen v1n de tr1p 1f"
unicode(39) "D2 1k1t kr1pt d2 kr5ll2n v1n d2 tr1p 1f"
unicode(39) "D2 1k1t kr1pt d2 kr5ll2n v1n d2 tr1p 1f"
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/tests/strings/ucfirst.phpt?view=markup&rev=1.1
Index: php-src/ext/standard/tests/strings/ucfirst.phpt
+++ php-src/ext/standard/tests/strings/ucfirst.phpt
--TEST--
ucfirst()
--FILE--
<?php
echo ucfirst("peren"), "\n";
echo ucfirst("appelen"), "\n";
echo ucfirst("Ãen"), "\n";
?>
--EXPECT--
Peren
Appelen
SSen
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php