Rolland,

The implementation of this is problematic for two reasons:

1) You are assuming well-formed UTF-16 here. It would be better to use U16_PREV() macro to obtain a codepoint and then put it in proper place with zend_codepoint_to_uchar().

2) Combining sequences are not respected. We can't swap base character and the combining chars that follow it because the string may be concatenated with something else and the combining chars may end up affecting something else. So we need to work at grapheme level here, using u_getCombiningClass() to check for combining chars and copying the base+combining as a unit.

Also, it'd be great to have the function upgrade proposals first, before the implementation is committed.

-Andrei

On Aug 19, 2005, at 3:59 AM, Rolland Santimano wrote:
http://cvs.php.net/diff.php/php-src/ext/standard/string.c? r1=1.466&r2=1.467&ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.466 php-src/ext/standard/string.c:1.467
--- php-src/ext/standard/string.c:1.466 Thu Aug 18 18:37:22 2005
+++ php-src/ext/standard/string.c       Fri Aug 19 06:59:19 2005
@@ -18,7 +18,7 @@
+---------------------------------------------------------------------- +
  */

-/* $Id: string.c,v 1.466 2005/08/18 22:37:22 andrei Exp $ */
+/* $Id: string.c,v 1.467 2005/08/19 10:59:19 rolland Exp $ */

 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */

@@ -3212,25 +3212,51 @@
 {
        zval **str;
        char *s, *e, *n, *p;
+       UChar *u_s, *u_e, *u_n, *u_p;
        
if (ZEND_NUM_ARGS()!=1 || zend_get_parameters_ex(1, &str) == FAILURE) {
                WRONG_PARAM_COUNT;
        }
-       convert_to_string_ex(str);
-       
-       n = emalloc(Z_STRLEN_PP(str)+1);
-       p = n;
-       
-       s = Z_STRVAL_PP(str);
-       e = s + Z_STRLEN_PP(str);
-       
-       while (--e>=s) {
-               *p++ = *e;
+
+ if (Z_TYPE_PP(str) != IS_UNICODE && Z_TYPE_PP(str) != IS_BINARY && Z_TYPE_PP(str) != IS_STRING) {
+               convert_to_text_ex(str);
+       }
+
+       if (Z_TYPE_PP(str) == IS_UNICODE) {
+               u_n = eumalloc(Z_USTRLEN_PP(str)+1);
+               u_p = u_n;
+               u_s = Z_USTRVAL_PP(str);
+               u_e = u_s + Z_USTRLEN_PP(str) - 1;
+
+               while (u_e >= u_s) {
+                       if (U16_IS_TRAIL(*u_e)) {
+                               *u_p = *(u_e-1);
+                               *(u_p+1) = *u_e;
+                               u_e -= 2; u_p += 2;
+                       } else {
+                               *u_p++ = *u_e--;
+                       }
+               }
+               *u_p = 0;
+       } else {
+               n = emalloc(Z_STRLEN_PP(str)+1);
+               p = n;
+               s = Z_STRVAL_PP(str);
+               e = s + Z_STRLEN_PP(str);
+
+               while (--e >= s) {
+                       *(p++) = *e;
+               }
+               *p = '\0';
        }
        
-       *p = '\0';
-       
-       RETVAL_STRINGL(n, Z_STRLEN_PP(str), 0);
+       if (Z_TYPE_PP(str) == IS_UNICODE) {
+               RETVAL_UNICODEL(u_n, Z_USTRLEN_PP(str), 0);
+       } else if (Z_TYPE_PP(str) == IS_BINARY) {
+               RETVAL_BINARYL(n, Z_BINLEN_PP(str), 0);
+       } else {
+               RETVAL_STRINGL(n, Z_STRLEN_PP(str), 0);
+       }
 }
 /* }}} */


--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to