andrei          Thu Dec 21 21:47:56 2006 UTC

  Modified files:              
    /php-src    unicode-progress.txt 
    /php-src/ext/standard       array.c php_string.h string.c strnatcmp.c 
  Log:
  Bite the bullet and port the natural comparison algorithm to support
  UChar strings. Also, simplify the original code.
  
  # Argggghh, post-incremented iteration sucks. That means you, U16_* stuff.
  
  
http://cvs.php.net/viewvc.cgi/php-src/unicode-progress.txt?r1=1.70&r2=1.71&diff_format=u
Index: php-src/unicode-progress.txt
diff -u php-src/unicode-progress.txt:1.70 php-src/unicode-progress.txt:1.71
--- php-src/unicode-progress.txt:1.70   Wed Dec 20 21:02:11 2006
+++ php-src/unicode-progress.txt        Thu Dec 21 21:47:56 2006
@@ -11,15 +11,6 @@
         Params API, what encoding to use for the message, handling email
         option
 
-  array.c
-  -------
-    natsort(), natcasesort()
-        Params API
-        Either port strnatcmp() to support Unicode or maybe use ICU's
-        numeric collation. Update: can't seem to get the right collation
-        parameters to duplicate strnatcmp() functionality. Conclusion: port
-        to support Unicode.
-
   string.c
   --------
     parse_str()
@@ -28,9 +19,6 @@
     sscanf()
         Params API. Rest - no idea yet.
 
-    strnatcmp(), strnatcasecmp()
-        Params API. The rest depends on porting of strnatcmp.c
-
     wordwrap()
         Upgrade, do wordwrapping on codepoint (or glyph ?) level, maybe use
         additional whitespace chars instead of just space.
@@ -538,6 +526,7 @@
     in_array()
     min()
     max()
+    natsort(), natcasesort()
     range()
     shuffle()
 
@@ -609,6 +598,7 @@
     stripslashes()
     stripos()
     stristr()
+    strnatcmp(), strnatcasecmp()
     strpbrk()
     strpos()
     strrchr()
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/array.c?r1=1.400&r2=1.401&diff_format=u
Index: php-src/ext/standard/array.c
diff -u php-src/ext/standard/array.c:1.400 php-src/ext/standard/array.c:1.401
--- php-src/ext/standard/array.c:1.400  Tue Dec 19 21:38:59 2006
+++ php-src/ext/standard/array.c        Thu Dec 21 21:47:56 2006
@@ -21,7 +21,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: array.c,v 1.400 2006/12/19 21:38:59 andrei Exp $ */
+/* $Id: array.c,v 1.401 2006/12/21 21:47:56 andrei Exp $ */
 
 #include "php.h"
 #include "php_ini.h"
@@ -394,6 +394,7 @@
        zval *fval, *sval;
        zval first, second;
        int result;
+       zend_uchar type;
  
        f = *((Bucket **) a);
        s = *((Bucket **) b);
@@ -402,20 +403,26 @@
        sval = *((zval **) s->pData);
        first = *fval;
        second = *sval;
-       if (Z_TYPE_P(fval) != IS_STRING) {
+
+       type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_P(fval), 
Z_TYPE_P(sval));
+       if (Z_TYPE_P(fval) != type) {
                zval_copy_ctor(&first);
-               convert_to_string(&first);
+               convert_to_explicit_type(&first, type);
        }
-       if (Z_TYPE_P(sval) != IS_STRING) {
+       if (Z_TYPE_P(sval) != type) {
                zval_copy_ctor(&second);
-               convert_to_string(&second);
+               convert_to_explicit_type(&second, type);
        }
 
-       result = strnatcmp_ex(Z_STRVAL(first), Z_STRLEN(first), 
Z_STRVAL(second), Z_STRLEN(second), fold_case);
+       if (type == IS_UNICODE) {
+               result = u_strnatcmp_ex(Z_USTRVAL(first), Z_USTRLEN(first), 
Z_USTRVAL(second), Z_USTRLEN(second), fold_case);
+       } else {
+               result = strnatcmp_ex(Z_STRVAL(first), Z_STRLEN(first), 
Z_STRVAL(second), Z_STRLEN(second), fold_case);
+       }
 
-       if (Z_TYPE_P(fval) != IS_STRING)
+       if (Z_TYPE_P(fval) != type)
                zval_dtor(&first);
-       if (Z_TYPE_P(sval) != IS_STRING)
+       if (Z_TYPE_P(sval) != type)
                zval_dtor(&second);
        
        return result;
@@ -433,14 +440,14 @@
 
 static void php_natsort(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
 {
-       zval **array;
+       zval *array;
        HashTable *target_hash;
 
-       if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &array) == 
FAILURE) {
-               WRONG_PARAM_COUNT;
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &array) == 
FAILURE) {
+               return;
        }
 
-       target_hash = HASH_OF(*array);
+       target_hash = HASH_OF(array);
        if (!target_hash) {
                php_error_docref(NULL TSRMLS_CC, E_WARNING, "The argument 
should be an array");
                return;
@@ -460,7 +467,7 @@
 }
 
 
-/* {{{ proto void natsort(array &array_arg)
+/* {{{ proto void natsort(array &array_arg) U
    Sort an array using natural sort */
 PHP_FUNCTION(natsort)
 {
@@ -469,7 +476,7 @@
 /* }}} */
 
 
-/* {{{ proto void natcasesort(array &array_arg)
+/* {{{ proto void natcasesort(array &array_arg) U
    Sort an array using case-insensitive natural sort */
 PHP_FUNCTION(natcasesort)
 {
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/php_string.h?r1=1.105&r2=1.106&diff_format=u
Index: php-src/ext/standard/php_string.h
diff -u php-src/ext/standard/php_string.h:1.105 
php-src/ext/standard/php_string.h:1.106
--- php-src/ext/standard/php_string.h:1.105     Mon Dec 18 15:04:36 2006
+++ php-src/ext/standard/php_string.h   Thu Dec 21 21:47:56 2006
@@ -17,7 +17,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: php_string.h,v 1.105 2006/12/18 15:04:36 iliaa Exp $ */
+/* $Id: php_string.h,v 1.106 2006/12/21 21:47:56 andrei Exp $ */
 
 /* Synced with php 3.0 revision 1.43 1999-06-16 [ssb] */
 
@@ -111,7 +111,12 @@
        strnatcmp_ex(a, strlen(a), b, strlen(b), 0)
 #define strnatcasecmp(a, b) \
        strnatcmp_ex(a, strlen(a), b, strlen(b), 1)
+#define u_strnatcmp(a, b) \
+       u_strnatcmp_ex(a, u_strlen(a), b, strlen(b), 0)
+#define u_strnatcasecmp(a, b) \
+       u_strnatcmp_ex(a, u_strlen(a), b, strlen(b), 1)
 PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t 
b_len, int fold_case);
+PHPAPI int u_strnatcmp_ex(UChar const *a, size_t a_len, UChar const *b, size_t 
b_len, int fold_case);
 
 #ifdef HAVE_LOCALECONV
 PHPAPI struct lconv *localeconv_r(struct lconv *out);
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/string.c?r1=1.626&r2=1.627&diff_format=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.626 php-src/ext/standard/string.c:1.627
--- php-src/ext/standard/string.c:1.626 Wed Dec 20 23:36:43 2006
+++ php-src/ext/standard/string.c       Thu Dec 21 21:47:56 2006
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: string.c,v 1.626 2006/12/20 23:36:43 tony2001 Exp $ */
+/* $Id: string.c,v 1.627 2006/12/21 21:47:56 andrei Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -6970,22 +6970,24 @@
  */
 static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
 {
-       zval **s1, **s2;
+       zstr s1, s2;
+       int s1_len, s2_len;
+       zend_uchar type;
 
-       if (ZEND_NUM_ARGS()!=2 || zend_get_parameters_ex(2, &s1, &s2) == 
FAILURE) {
-               WRONG_PARAM_COUNT;
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "TT", &s1, &s1_len,
+                                                         &type, &s2, &s2_len, 
&type) == FAILURE) {
+               return;
        }
 
-       convert_to_string_ex(s1);
-       convert_to_string_ex(s2);
-
-       RETURN_LONG(strnatcmp_ex(Z_STRVAL_PP(s1), Z_STRLEN_PP(s1),
-                                                        Z_STRVAL_PP(s2), 
Z_STRLEN_PP(s2),
-                                                        fold_case));
+       if (type == IS_UNICODE) {
+               RETURN_LONG(u_strnatcmp_ex(s1.u, s1_len, s2.u, s2_len, 
fold_case));
+       } else {
+               RETURN_LONG(strnatcmp_ex(s1.s, s1_len, s2.s, s2_len, 
fold_case));
+       }
 }
 /* }}} */
 
-/* {{{ proto int strnatcmp(string s1, string s2)
+/* {{{ proto int strnatcmp(string s1, string s2) U
    Returns the result of string comparison using 'natural' algorithm */
 PHP_FUNCTION(strnatcmp)
 {
@@ -7083,7 +7085,7 @@
 }
 /* }}} */
 
-/* {{{ proto int strnatcasecmp(string s1, string s2)
+/* {{{ proto int strnatcasecmp(string s1, string s2) U
    Returns the result of case-insensitive string comparison using 'natural' 
algorithm */
 PHP_FUNCTION(strnatcasecmp)
 {
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/strnatcmp.c?r1=1.10&r2=1.11&diff_format=u
Index: php-src/ext/standard/strnatcmp.c
diff -u php-src/ext/standard/strnatcmp.c:1.10 
php-src/ext/standard/strnatcmp.c:1.11
--- php-src/ext/standard/strnatcmp.c:1.10       Thu Jul 15 01:26:03 2004
+++ php-src/ext/standard/strnatcmp.c    Thu Dec 21 21:47:56 2006
@@ -1,9 +1,9 @@
 /* -*- mode: c; c-file-style: "k&r" -*-
 
-  Modified for PHP by Andrei Zmievski <[EMAIL PROTECTED]>
+  Modified for PHP by Andrei Zmievski <[EMAIL PROTECTED]>
 
   strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
-  Copyright (C) 2000 by Martin Pool <[EMAIL PROTECTED]>
+  Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
 
   This software is provided 'as-is', without any express or implied
   warranty.  In no event will the authors be held liable for any damages
@@ -38,12 +38,12 @@
 
 #if 0
 static char const *version UNUSED =
-    "$Id: strnatcmp.c,v 1.10 2004/07/15 01:26:03 iliaa Exp $";
+    "$Id: strnatcmp.c,v 1.11 2006/12/21 21:47:56 andrei Exp $";
 #endif
 /* {{{ compare_right
  */
 static int
-compare_right(char const **a, char const *aend, char const **b, char const 
*bend)
+compare_right(char const *a, char const *b)
 {
        int bias = 0;
 
@@ -51,20 +51,22 @@
           value wins, but we can't know that it will until we've scanned
           both numbers to know that they have the same magnitude, so we
           remember it in BIAS. */
-       for(;; (*a)++, (*b)++) {
-               if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
-                       (*b == bend || !isdigit((int)(unsigned char)**b)))
+       for(;; a++, b++) {
+               if (!isdigit((int)(unsigned char)*a) &&
+                       !isdigit((int)(unsigned char)*b))
                        return bias;
-               else if (*a == aend || !isdigit((int)(unsigned char)**a))
+               else if (!isdigit((int)(unsigned char)*a))
                        return -1;
-               else if (*b == bend || !isdigit((int)(unsigned char)**b))
+               else if (!isdigit((int)(unsigned char)*b))
                        return +1;
-               else if (**a < **b) {
+               else if (*a < *b) {
                        if (!bias)
                                bias = -1;
-               } else if (**a > **b) {
+               } else if (*a > *b) {
                        if (!bias)
                                bias = +1;
+               } else if (!*a && !*b) {
+                       return bias;
                }
      }
 
@@ -75,21 +77,21 @@
 /* {{{ compare_left
  */
 static int
-compare_left(char const **a, char const *aend, char const **b, char const 
*bend)
+compare_left(char const *a, char const *b)
 {
      /* Compare two left-aligned numbers: the first to have a
         different value wins. */
-       for(;; (*a)++, (*b)++) {
-               if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
-                       (*b == bend || !isdigit((int)(unsigned char)**b)))
+       for(;; a++, b++) {
+               if (!isdigit((int)(unsigned char)*a) &&
+                       !isdigit((int)(unsigned char)*b))
                        return 0;
-               else if (*a == aend || !isdigit((int)(unsigned char)**a))
+               else if (!isdigit((int)(unsigned char)*a))
                        return -1;
-               else if (*b == bend || !isdigit((int)(unsigned char)**b))
+               else if (!isdigit((int)(unsigned char)*b))
                        return +1;
-                else if (**a < **b)
+                else if (*a < *b)
                         return -1;
-                else if (**a > **b)
+                else if (*a > *b)
                         return +1;
      }
          
@@ -102,46 +104,40 @@
 PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t 
b_len, int fold_case)
 {
        char ca, cb;
-       char const *ap, *bp;
-       char const *aend = a + a_len,
-                          *bend = b + b_len;
+       int ai, bi;
        int fractional, result;
 
-       if (a_len == 0 || b_len == 0)
-               return a_len - b_len;
-
-       ap = a;
-       bp = b;
+       ai = bi = 0;
        while (1) {
-               ca = *ap; cb = *bp;
+               ca = a[ai]; cb = b[bi];
 
                /* skip over leading spaces or zeros */
                while (isspace((int)(unsigned char)ca))
-                       ca = *++ap;
+                       ca = a[++ai];
 
                while (isspace((int)(unsigned char)cb))
-                       cb = *++bp;
+                       cb = b[++bi];
 
                /* process run of digits */
                if (isdigit((int)(unsigned char)ca)  &&  isdigit((int)(unsigned 
char)cb)) {
                        fractional = (ca == '0' || cb == '0');
 
-                       if (fractional)
-                               result = compare_left(&ap, aend, &bp, bend);
-                       else
-                               result = compare_right(&ap, aend, &bp, bend);
-
-                       if (result != 0)
-                               return result;
-                       else if (ap == aend && bp == bend)
-                               /* End of the strings. Let caller sort them 
out. */
-                               return 0;
-                       else {
-                               /* Keep on comparing from the current point. */
-                               ca = *ap; cb = *bp;
+                       if (fractional) {
+                               if ((result = compare_left(a+ai, b+bi)) != 0) {
+                                       return result;
+                               }
+                       } else {
+                               if ((result = compare_right(a+ai, b+bi)) != 0)
+                                       return result;
                        }
                }
 
+               if (!ca && !cb) {
+                       /* The strings compare the same.  Perhaps the caller
+                          will want to call strcmp to break the tie. */
+                       return 0;
+               }
+
                if (fold_case) {
                        ca = toupper((int)(unsigned char)ca);
                        cb = toupper((int)(unsigned char)cb);
@@ -152,19 +148,159 @@
                else if (ca > cb)
                        return +1;
 
-               ++ap; ++bp;
-               if (ap >= aend && bp >= bend)
+               ++ai; ++bi;
+       }
+}
+/* }}} */
+
+/* {{{ u_compare_right
+ */
+static int
+u_compare_right(UChar const *a, int a_len, int *a_curr, UChar const *b, int 
b_len, int *b_curr)
+{
+       UChar32 ca, cb;
+       int a_off, b_off;
+       int bias = 0;
+
+       /* The longest run of digits wins.  That aside, the greatest
+          value wins, but we can't know that it will until we've scanned
+          both numbers to know that they have the same magnitude, so we
+          remember it in BIAS. */
+
+       for (;;) {
+               a_off = *a_curr;
+               b_off = *b_curr;
+               U16_NEXT(a, a_off, a_len, ca);
+               U16_NEXT(b, b_off, b_len, cb);
+
+               if (!u_isdigit(ca) && !u_isdigit(cb)) {
+                       return bias;
+               } else if (!u_isdigit(ca)) {
+                       return -1;
+               } else if (!u_isdigit(cb)) {
+                       return +1;
+               } else if (ca < cb) {
+                       if (!bias)
+                               bias = -1;
+               } else if (ca > cb) {
+                       if (!bias)
+                               bias = +1;
+               } else if (ca == 0 && cb == 0) {
+                       return bias;
+               }
+               *a_curr = a_off;
+               *b_curr = b_off;
+       }
+
+       return 0;
+}
+/* }}} */
+
+/* {{{ u_compare_left
+ */
+static int
+u_compare_left(UChar const *a, int a_len, int *a_curr, UChar const *b, int 
b_len, int *b_curr)
+{
+       int a_off, b_off;
+       UChar32 ca, cb;
+
+     /* Compare two left-aligned numbers: the first to have a
+        different value wins. */
+       for (;;) {
+               a_off = *a_curr;
+               b_off = *b_curr;
+               U16_NEXT(a, a_off, a_len, ca);
+               U16_NEXT(b, b_off, b_len, cb);
+
+               if (!u_isdigit(ca) && !u_isdigit(cb)) {
+                       return 0;
+               } else if (!u_isdigit(ca)) {
+                       return -1;
+               } else if (!u_isdigit(cb)) {
+                       return +1;
+               } else if (ca < cb) {
+                       return -1;
+               } else if (ca > cb) {
+                       return +1;
+               }
+               *a_curr = a_off;
+               *b_curr = b_off;
+       }
+
+       return 0;
+}
+/* }}} */
+
+/* {{{ u_strnatcmp_ex
+ */
+PHPAPI int u_strnatcmp_ex(UChar const *a, size_t a_len, UChar const *b, size_t 
b_len, int fold_case)
+{
+       UChar ca, cb;
+       UChar const *ap, *bp;
+       int fractional, result;
+       int a_off, b_off;
+       int a_curr, b_curr;
+
+       if (a_len == 0 || b_len == 0)
+               return a_len - b_len;
+
+       ap = a;
+       bp = b;
+       a_curr = b_curr = 0;
+
+       while (1) {
+               a_off = a_curr;
+               b_off = b_curr;
+               U16_NEXT(a, a_curr, a_len, ca);
+               U16_NEXT(b, b_curr, b_len, cb);
+
+               /* skip over leading spaces */
+               for ( ; u_isspace(ca) && a_curr < a_len; ) {
+                       a_off = a_curr;
+                       U16_NEXT(a, a_curr, a_len, ca);
+               }
+
+               for ( ; u_isspace(cb) && b_curr < b_len; ) {
+                       b_off = b_curr;
+                       U16_NEXT(b, b_curr, b_len, cb);
+               }
+
+               /* process run of digits */
+               if (u_isdigit(ca) && u_isdigit(cb)) {
+                       fractional = (ca == 0x30 /*'0'*/ || cb == 0x30 /*'0'*/);
+
+                       if (fractional) {
+                               if ((result = u_compare_left(a, a_len, &a_off, 
b, b_len, &b_off)) != 0) {
+                                       return result;
+                               }
+                       } else {
+                               if ((result = u_compare_right(a, a_len, &a_off, 
b, b_len, &b_off)) != 0) {
+                                       return result;
+                               }
+                       }
+
+                       a_curr = a_off;
+                       b_curr = b_off;
+               }
+
+               if (ca == 0 && cb == 0) {
                        /* The strings compare the same.  Perhaps the caller
                           will want to call strcmp to break the tie. */
                        return 0;
-               else if (ap >= aend)
+               }
+
+               if (fold_case) {
+                       ca = u_toupper(ca);
+                       cb = u_toupper(cb);
+               }
+
+               if (ca < cb)
                        return -1;
-               else if (bp >= bend)
-                       return 1;
+               else if (ca > cb)
+                       return +1;
        }
 }
 /* }}} */
-
 /*
  * Local variables:
  * tab-width: 4

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to