derick Sun Mar 26 02:15:47 2006 UTC Added files: /php-src/ext/standard/tests/strings strtotitle.phpt
Modified files: /php-src/ext/standard string.c /php-src/ext/standard/tests/strings strtr2.phpt Log: - Commit intermediate work so that I can hack on it on some plane.
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/string.c?r1=1.533&r2=1.534&diff_format=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.533 php-src/ext/standard/string.c:1.534 --- php-src/ext/standard/string.c:1.533 Wed Mar 22 10:20:20 2006 +++ php-src/ext/standard/string.c Sun Mar 26 02:15:47 2006 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: string.c,v 1.533 2006/03/22 10:20:20 derick Exp $ */ +/* $Id: string.c,v 1.534 2006/03/26 02:15:47 derick Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -106,6 +106,7 @@ int php_tag_find(char *tag, int len, char *set); static void php_ucwords(zval *str); +static UChar* php_u_strtr_array(UChar *str, int slen, HashTable *hash, int minlen, int maxlen, int *outlen TSRMLS_DC); /* this is read-only, so it's ok */ static char hexconvtab[] = "0123456789abcdef"; @@ -3512,7 +3513,7 @@ /* {{{ php_u_strtr */ -PHPAPI UChar *php_u_strtr(UChar *str, int len, UChar *str_from, int str_from_len, UChar *str_to, int str_to_len, int trlen) +PHPAPI UChar *php_u_strtr(UChar *str, int len, UChar *str_from, int str_from_len, UChar *str_to, int str_to_len, int trlen, int *outlen) { int i, j; int can_optimize = 1; @@ -3547,6 +3548,7 @@ if (can_optimize) { UChar xlat[256]; + UChar *tmp_str = eustrndup(str, len); for (i = 0; i < 256; xlat[i] = i, i++); @@ -3555,54 +3557,61 @@ } for (i = 0; i < len; i++) { - str[i] = xlat[str[i]]; + tmp_str[i] = xlat[str[i]]; } - return str; + *outlen = len; + return tmp_str; } else { - /* UTODO: We're quite fucked... this is *extremely* slow, better - * algorithm wanted here! It also doesn't handle combining sequences, I - * asked the icu-support list for good algorithms. */ - for (i = 0; i < len; i++) { - for (j = 0; j < trlen; j++) { - if (str[i] == str_from[j]) { - str[i] = str_to[j]; - } - } - } + /* We use the character break iterator here to assemble an mapping + * array in such a way that we can reuse the code in php_u_strtr_array + * to do the replacements in order to avoid duplicating code. */ + HashTable *tmp_hash; + int minlen = 128*1024, maxlen; + zval *tmp; + + tmp_hash = emalloc(sizeof(HashTable)); + zend_hash_init(tmp_hash, 0, NULL, NULL, 0); + + /* Loop over the two strings and prepare the hash entries */ + MAKE_STD_ZVAL(tmp); + ZVAL_UNICODEL(tmp, "X", 1, 1); + minlen = maxlen = 1; + zend_u_hash_add(tmp_hash, IS_UNICODE, ZSTR("a"), 2, &tmp, sizeof(zval *), NULL); + + /* Run the replacement */ + str = php_u_strtr_array(str, len, tmp_hash, minlen, maxlen, outlen TSRMLS_DC); + zend_hash_destroy(tmp_hash); + efree(tmp_hash); + return str; } } /* }}} */ -/* {{{ php_u_strtr_array - */ -static void php_u_strtr_array(zval *return_value, UChar *str, int slen, HashTable *hash TSRMLS_DC) +static HashTable* php_u_strtr_array_prepare_hashtable(HashTable *hash, int *minlen_out, int *maxlen_out) { + HashTable *tmp_hash = emalloc(sizeof(HashTable)); + HashPosition hpos; zval **entry; zstr string_key; uint string_key_len; - zval **trans; - zval ctmp; - ulong num_key; int minlen = 128*1024; - int maxlen = 0, pos, len, found; - UChar *key; - HashPosition hpos; - smart_str result = {0}; - HashTable tmp_hash; + ulong num_key; + int maxlen = 0, len; + zval ctmp; - zend_hash_init(&tmp_hash, 0, NULL, NULL, 0); + zend_hash_init(tmp_hash, 0, NULL, NULL, 0); zend_hash_internal_pointer_reset_ex(hash, &hpos); while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) == SUCCESS) { switch (zend_hash_get_current_key_ex(hash, &string_key, &string_key_len, &num_key, 0, &hpos)) { case HASH_KEY_IS_UNICODE: len = string_key_len-1; if (len < 1) { - zend_hash_destroy(&tmp_hash); - RETURN_FALSE; + zend_hash_destroy(tmp_hash); + return NULL; } - zend_u_hash_add(&tmp_hash, IS_UNICODE, string_key, string_key_len, entry, sizeof(zval*), NULL); + zend_u_hash_add(tmp_hash, IS_UNICODE, string_key, string_key_len, entry, sizeof(zval*), NULL); if (len > maxlen) { maxlen = len; } @@ -3617,7 +3626,7 @@ convert_to_unicode(&ctmp); len = Z_USTRLEN(ctmp); - zend_u_hash_add(&tmp_hash, IS_UNICODE, Z_UNIVAL(ctmp), len+1, entry, sizeof(zval*), NULL); + zend_u_hash_add(tmp_hash, IS_UNICODE, Z_UNIVAL(ctmp), len+1, entry, sizeof(zval*), NULL); zval_dtor(&ctmp); if (len > maxlen) { @@ -3630,6 +3639,19 @@ } zend_hash_move_forward_ex(hash, &hpos); } + *minlen_out = minlen; + *maxlen_out = maxlen; + return tmp_hash; +} + +/* {{{ php_u_strtr_array + */ +static UChar* php_u_strtr_array(UChar *str, int slen, HashTable *hash, int minlen, int maxlen, int *outlen TSRMLS_DC) +{ + zval **trans; + UChar *key; + int pos, found, len; + smart_str result = {0}; key = eumalloc(maxlen+1); pos = 0; @@ -3645,7 +3667,7 @@ for (len = maxlen; len >= minlen; len--) { key[len] = 0; - if (zend_u_hash_find(&tmp_hash, IS_UNICODE, ZSTR(key), len+1, (void**)&trans) == SUCCESS) { + if (zend_u_hash_find(hash, IS_UNICODE, ZSTR(key), len+1, (void**)&trans) == SUCCESS) { UChar *tval; int tlen; zval tmp; @@ -3653,7 +3675,7 @@ if (Z_TYPE_PP(trans) != IS_UNICODE) { tmp = **trans; zval_copy_ctor(&tmp); - convert_to_string(&tmp); + convert_to_unicode(&tmp); tval = Z_USTRVAL(tmp); tlen = Z_USTRLEN(tmp); } else { @@ -3679,9 +3701,9 @@ } efree(key); - zend_hash_destroy(&tmp_hash); smart_str_0(&result); - RETVAL_UNICODEL((UChar *) result.c, result.len >> 1, 0); + *outlen = result.len >> 1; + return (UChar*) result.c; } /* }}} */ @@ -3822,22 +3844,33 @@ } if (Z_TYPE_PP(str) == IS_UNICODE) { + int outlen; + UChar *outstr; + if (ac == 2) { - php_u_strtr_array(return_value, Z_USTRVAL_PP(str), Z_USTRLEN_PP(str), HASH_OF(*from) TSRMLS_CC); + int minlen, maxlen; + HashTable *hash; + + hash = php_u_strtr_array_prepare_hashtable(HASH_OF(*from), &minlen, &maxlen); + outstr = php_u_strtr_array(Z_USTRVAL_PP(str), Z_USTRLEN_PP(str), hash, minlen, maxlen, &outlen TSRMLS_CC); + zend_hash_destroy(hash); + efree(hash); + RETVAL_UNICODEL(outstr, outlen, 0); Z_TYPE_P(return_value) = IS_UNICODE; } else { convert_to_unicode_ex(from); convert_to_unicode_ex(to); - ZVAL_UNICODEL(return_value, Z_USTRVAL_PP(str), Z_USTRLEN_PP(str), 1); - - php_u_strtr(Z_USTRVAL_P(return_value), - Z_USTRLEN_P(return_value), + outstr = php_u_strtr(Z_USTRVAL_PP(str), + Z_USTRLEN_PP(str), Z_USTRVAL_PP(from), Z_USTRLEN_PP(from), Z_USTRVAL_PP(to), Z_USTRLEN_PP(to), - MIN(Z_USTRLEN_PP(from), Z_USTRLEN_PP(to))); + MIN(Z_USTRLEN_PP(from), Z_USTRLEN_PP(to)), + &outlen); + ZVAL_UNICODEL(return_value, outstr, outlen, 0); + Z_TYPE_P(return_value) = IS_UNICODE; } } else { http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/tests/strings/strtr2.phpt?r1=1.1&r2=1.2&diff_format=u Index: php-src/ext/standard/tests/strings/strtr2.phpt diff -u php-src/ext/standard/tests/strings/strtr2.phpt:1.1 php-src/ext/standard/tests/strings/strtr2.phpt:1.2 --- php-src/ext/standard/tests/strings/strtr2.phpt:1.1 Fri Mar 17 14:29:05 2006 +++ php-src/ext/standard/tests/strings/strtr2.phpt Sun Mar 26 02:15:47 2006 @@ -1,5 +1,7 @@ --TEST-- strtr() function (with unicode chars and combining sequences) +--SKIPIF-- +<?php if (!unicode_semantics()) die('skip unicode_semantics=off'); ?> --FILE-- <?php declare(encoding="utf8"); @@ -17,13 +19,28 @@ $string = "De akat krapt de krullen van de trap af"; var_dump( strtr( $string, $ar ) ); - // Test with combining chars + // Test with combining chars 1 $from = "aÌb"; $to = "1"; + $string = "xyzaÌbc"; + var_dump( strtr( $string, $from, $to ) ); + + // Test with combining chars 2 + $from = "aÌb"; + $to = "12"; $string = "xyzabc"; var_dump( strtr( $string, $from, $to ) ); + + // Test with combining chars 3 + $from = "aÌ̧eÌiÌ"; + $to = "123"; + $string = "aÌ̧bcdeÌfghiÌj"; + var_dump( strtr( $string, $from, $to ) ); ?> --EXPECT-- unicode(39) "Dе 2k3t kr3pt de kr6llen v1n de tr1p 1f" unicode(39) "D2 1k1t kr1pt d2 kr5ll2n v1n d2 tr1p 1f" unicode(39) "D2 1k1t kr1pt d2 kr5ll2n v1n d2 tr1p 1f" +unicode(6) "xyz1bc" +unicode(6) "xyz12c" +unicode(10) "1bcd2fgh3j" http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/tests/strings/strtotitle.phpt?view=markup&rev=1.1 Index: php-src/ext/standard/tests/strings/strtotitle.phpt +++ php-src/ext/standard/tests/strings/strtotitle.phpt --TEST-- Test strtotitle --SKIPIF-- <?php if (!setlocale(LC_CTYPE, "de_DE", "de", "german", "ge", "de_DE.ISO8859-1", "ISO8859-1")) { die("skip locale needed for this test is not supported on this platform"); } ?> --INI-- unicode.script_encoding=ISO-8859-1 unicode.output_encoding=ISO-8859-1 --FILE-- <?php declare(encoding="latin1"); setlocale(LC_ALL, "de_DE", "de", "german", "ge", "de_DE.ISO8859-1", "ISO8859-1"); $strings = array( "ßen", "året", "året ßen" ); foreach( $strings as $string ) { echo ucwords( $string ), "\n"; echo strtotitle( $string ), "\n"; } ?> --EXPECT-- ßen ßen Året Året Året ßen Året ßen --UEXPECT-- SSen Ssen Året Året Året SSen Året Ssen
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php