rolland         Mon Oct  3 02:14:12 2005 EDT

  Modified files:              
    /php-src/ext/standard       string.c 
  Log:
  - Unicode impl of similar_text()
  
  
http://cvs.php.net/diff.php/php-src/ext/standard/string.c?r1=1.489&r2=1.490&ty=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.489 php-src/ext/standard/string.c:1.490
--- php-src/ext/standard/string.c:1.489 Fri Sep 30 09:19:15 2005
+++ php-src/ext/standard/string.c       Mon Oct  3 02:14:10 2005
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: string.c,v 1.489 2005/09/30 13:19:15 rolland Exp $ */
+/* $Id: string.c,v 1.490 2005/10/03 06:14:10 rolland Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -3410,6 +3410,42 @@
 }
 /* }}} */
 
+/* {{{ php_u_similar_str
+ */
+static void php_u_similar_str(const UChar *txt1, int32_t len1,
+                                                         const UChar *txt2, 
int32_t len2,
+                                                         int32_t *pos1, 
int32_t *end1,
+                                                         int32_t *pos2, 
int32_t *end2, int *max)
+{
+       int32_t i1, i2, j1, j2, l;
+       UChar32 ch1, ch2;
+
+       *max = 0;
+       for (i1 = 0 ; i1 < len1 ; ) {
+               for (i2 = 0 ; i2 < len2 ; ) {
+                       l = 0 ; j1 = 0 ; j2 = 0;
+                       while ((i1+j1 < len1) && (i2+j2 < len2)) {
+                               U16_NEXT(txt1+i1, j1, len1-i1, ch1);
+                               U16_NEXT(txt2+i2, j2, len2-i2, ch2);
+                               if (ch1 != ch2) {
+                                       U16_BACK_1(txt1+i1, 0, j1);
+                                       U16_BACK_1(txt2+i2, 0, j2);
+                                       break;
+                               }
+                               l++;
+                       }
+                       if (l > *max) {
+                               *max = l;
+                               *pos1 = i1; *end1 = j1;
+                               *pos2 = i2; *end2 = j2;
+                       }
+                       U16_FWD_1(txt2, i2, len2);
+               }
+               U16_FWD_1(txt1, i1, len1);
+       }
+}
+/* }}} */
+
 /* {{{ php_similar_str
  */
 static void php_similar_str(const char *txt1, int len1, const char *txt2, int 
len2, int *pos1, int *pos2, int *max)
@@ -3433,6 +3469,27 @@
 }
 /* }}} */
 
+/* {{{ php_u_similar_char
+ */
+static int php_u_similar_char(const UChar *txt1, int32_t len1, const UChar 
*txt2, int32_t len2)
+{
+       int sum, max;
+       int32_t pos1, pos2, end1, end2;
+
+       php_u_similar_str(txt1, len1, txt2, len2, &pos1, &end1, &pos2, &end2, 
&max);
+       if ((sum = max)) {
+               if (pos1 && pos2) {
+                       sum += php_u_similar_char(txt1, pos1, txt2, pos2);
+               }
+               if ((pos1 + end1 < len1) && (pos2 + end2 < len2)) {
+                       sum += php_similar_char((UChar *)txt1+pos1+end1, 
len1-pos1-end1,
+                                                                       (UChar 
*)txt2+pos2+end2, len2-pos2-end2);
+               }
+       }
+       return sum;
+}
+/* }}} */
+
 /* {{{ php_similar_char
  */
 static int php_similar_char(const char *txt1, int len1, const char *txt2, int 
len2)
@@ -3463,30 +3520,49 @@
        zval **t1, **t2, **percent;
        int ac = ZEND_NUM_ARGS();
        int sim;
-       
+       zend_uchar str_type;
+
        if (ac < 2 || ac > 3 || zend_get_parameters_ex(ac, &t1, &t2, &percent) 
== FAILURE) {
                WRONG_PARAM_COUNT;
        }       
-
-       convert_to_string_ex(t1);
-       convert_to_string_ex(t2);
-
+       if (Z_TYPE_PP(t1) != IS_UNICODE && Z_TYPE_PP(t1) != IS_BINARY && 
Z_TYPE_PP(t1) != IS_STRING) {
+               convert_to_text_ex(t1);
+       }
+       if (Z_TYPE_PP(t2) != IS_UNICODE && Z_TYPE_PP(t2) != IS_BINARY && 
Z_TYPE_PP(t2) != IS_STRING) {
+               convert_to_text_ex(t2);
+       }
+       str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_PP(t1), 
Z_TYPE_PP(t2));
+       if (str_type == (zend_uchar)-1) {
+               convert_to_binary_ex(t1);
+               convert_to_binary_ex(t2);
+       } else {
+               convert_to_explicit_type_ex(t1, str_type);
+               convert_to_explicit_type_ex(t2, str_type);
+       }
        if (ac > 2) {
                convert_to_double_ex(percent);
        }
-       
-       if (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2) == 0) {
+
+       if (Z_UNILEN_PP(t1) + Z_UNILEN_PP(t2) == 0) {
                if (ac > 2) {
                        Z_DVAL_PP(percent) = 0;
                }
 
                RETURN_LONG(0);
        }
-       
-       sim = php_similar_char(Z_STRVAL_PP(t1), Z_STRLEN_PP(t1), 
Z_STRVAL_PP(t2), Z_STRLEN_PP(t2));     
+
+       if (str_type == IS_UNICODE) {
+               sim = php_u_similar_char(Z_USTRVAL_PP(t1), Z_USTRLEN_PP(t1), 
Z_USTRVAL_PP(t2), Z_USTRLEN_PP(t2));
+       } else {
+               sim = php_similar_char(Z_STRVAL_PP(t1), Z_STRLEN_PP(t1), 
Z_STRVAL_PP(t2), Z_STRLEN_PP(t2));     
+       }
 
        if (ac > 2) {
-               Z_DVAL_PP(percent) = sim * 200.0 / (Z_STRLEN_PP(t1) + 
Z_STRLEN_PP(t2));
+               if (str_type == IS_UNICODE) {
+                       Z_DVAL_PP(percent) = sim * 200.0 / (Z_USTRCPLEN_PP(t1) 
+ Z_USTRCPLEN_PP(t2));
+               } else {
+                       Z_DVAL_PP(percent) = sim * 200.0 / (Z_STRLEN_PP(t1) + 
Z_STRLEN_PP(t2));
+               }
        }
 
        RETURN_LONG(sim);

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to