andrei Thu Mar 2 23:12:34 2006 UTC Modified files: /php-src/ext/standard string.c Log: Fix ucwords() to use full case mapping. # Note that this is different from i18n_strtotitle() which uses # locale-aware word break iterator. The difference is seen here: # # $a = "pouvez-vous"; # var_dump(ucwords($a)); # var_dump(i18n_strtotitle($a)); # # Outputs: # # unicode(11) "Pouvez-vous" # unicode(11) "Pouvez-Vous" # http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/string.c?r1=1.521&r2=1.522&diff_format=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.521 php-src/ext/standard/string.c:1.522 --- php-src/ext/standard/string.c:1.521 Thu Mar 2 20:37:07 2006 +++ php-src/ext/standard/string.c Thu Mar 2 23:12:33 2006 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: string.c,v 1.521 2006/03/02 20:37:07 andrei Exp $ */ +/* $Id: string.c,v 1.522 2006/03/02 23:12:33 andrei Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -3257,9 +3257,8 @@ Makes an Unicode string's first character uppercase */ static void php_u_ucfirst(zval *ustr, zval *return_value) { - UChar32 lc, uc; UChar tmp[3] = { 0,}; /* UChar32 will be converted to upto 2 UChar units ? */ - int tmp_len; + int tmp_len = 0; int pos = 0; UErrorCode status = U_ZERO_ERROR; @@ -3310,46 +3309,52 @@ Uppercase the first character of every word in an Unicode string */ static void php_u_ucwords(zval *ustr, zval *retval) { - UChar32 *codepts; - int32_t len, retval_len; - int32_t i; - UErrorCode err; + UChar32 cp = 0; + UChar *tmp; + int retval_len; + int pos = 0, last_pos = 0; + int tmp_len = 0; + zend_bool last_was_space = TRUE; + UErrorCode status = U_ZERO_ERROR; - len = Z_USTRLEN_P(ustr); - codepts = (UChar32 *)emalloc((len+1)*sizeof(UChar32)); - err = U_ZERO_ERROR; - u_strToUTF32(codepts, len+1, &len, Z_USTRVAL_P(ustr), len, &err); - if (U_FAILURE(err)) { - efree(codepts); - ZVAL_EMPTY_UNICODE(retval); - return; - } + /* + * We can calculate maximum resulting length precisely considering that not + * more than half of the codepoints in the string can follow a whitespace + * and that maximum expansion is 2 UChar's. + */ + retval_len = ((3 * Z_USTRLEN_P(ustr)) >> 1) + 2; + tmp = eumalloc(retval_len); - codepts[0] = u_toupper(codepts[0]); - for (i = 1; i < len ; i++) { - if (u_isWhitespace(codepts[i-1]) == TRUE) { - codepts[i] = u_totitle(codepts[i]); - } - } + while (pos < Z_USTRLEN_P(ustr)) { - retval_len = len; - Z_USTRVAL_P(retval) = eumalloc(retval_len+1); - err = U_ZERO_ERROR; - u_strFromUTF32(Z_USTRVAL_P(retval), retval_len+1, &retval_len, codepts, len, &err); - if (U_FAILURE(err) == U_BUFFER_OVERFLOW_ERROR) { - err = U_ZERO_ERROR; - Z_USTRVAL_P(retval) = eurealloc(Z_USTRVAL_P(retval), retval_len+1); - u_strFromUTF32(Z_USTRVAL_P(retval), retval_len+1, NULL, codepts, len, &err); + U16_NEXT(Z_USTRVAL_P(ustr), pos, Z_USTRLEN_P(ustr), cp); + + if (u_isWhitespace(cp) == TRUE) { + tmp_len += zend_codepoint_to_uchar(cp, tmp + tmp_len); + last_was_space = TRUE; + } else { + if (last_was_space) { + tmp_len += u_strToUpper(tmp + tmp_len, retval_len - tmp_len, Z_USTRVAL_P(ustr) + last_pos, 1, UG(default_locale), &status); + last_was_space = FALSE; + } else { + tmp_len += zend_codepoint_to_uchar(cp, tmp + tmp_len); + } + } + + last_pos = pos; } + tmp[tmp_len] = 0; - if (U_SUCCESS(err)) { - Z_USTRLEN_P(retval) = retval_len; + /* + * Try to avoid another alloc if the difference between allocated size and + * real length is "small". + */ + if (retval_len - tmp_len > 256) { + ZVAL_UNICODEL(retval, tmp, tmp_len, 1); + efree(tmp); } else { - efree(Z_USTRVAL_P(retval)); - ZVAL_EMPTY_UNICODE(retval); + ZVAL_UNICODEL(retval, tmp, tmp_len, 0); } - - efree(codepts); } /* }}} */
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php