andrei          Fri Sep 22 17:47:10 2006 UTC

  Modified files:              
    /php-src    unicode-progress.txt 
    /php-src/ext/standard       string.c 
  Log:
  Unicode support in count_chars(). Note that due to the size of the
  Unicode character set only mode=1 is supported.
  
  
http://cvs.php.net/viewvc.cgi/php-src/unicode-progress.txt?r1=1.47&r2=1.48&diff_format=u
Index: php-src/unicode-progress.txt
diff -u php-src/unicode-progress.txt:1.47 php-src/unicode-progress.txt:1.48
--- php-src/unicode-progress.txt:1.47   Wed Sep 20 20:30:19 2006
+++ php-src/unicode-progress.txt        Fri Sep 22 17:47:09 2006
@@ -16,10 +16,6 @@
 
   string.c
   --------
-    count_chars()
-        Params API. Do we really want to go through the whole Unicode table?
-        May need to use hashtable instead of array.
-
     hebrev(), hebrevc()
         Figure out if this is something we can use ICU for, internally.
         Check with Zeev.
@@ -122,7 +118,6 @@
     max()
     range()
     shuffle()
-    strrchr()
 
     end(), prev(), next(), reset(), current(), key()
 
@@ -150,6 +145,7 @@
     bin2hex()
     chr()
     chunk_split()
+    count_chars()
     dirname()
     explode()
     implode()
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/string.c?r1=1.588&r2=1.589&diff_format=u
Index: php-src/ext/standard/string.c
diff -u php-src/ext/standard/string.c:1.588 php-src/ext/standard/string.c:1.589
--- php-src/ext/standard/string.c:1.588 Wed Sep 20 20:30:19 2006
+++ php-src/ext/standard/string.c       Fri Sep 22 17:47:09 2006
@@ -18,7 +18,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: string.c,v 1.588 2006/09/20 20:30:19 andrei Exp $ */
+/* $Id: string.c,v 1.589 2006/09/22 17:47:09 andrei Exp $ */
 
 /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
 
@@ -6295,79 +6295,115 @@
 }
 /* }}} */
 
-/* {{{ proto mixed count_chars(string input [, int mode])
+/* {{{ proto mixed count_chars(string input [, int mode]) U
    Returns info about what characters are used in input */
 PHP_FUNCTION(count_chars)
 {
-       zval **input, **mode;
+       zstr input;
+       int input_len;
+       zend_uchar type;
+       long mode = 0;
        int chars[256];
-       int ac=ZEND_NUM_ARGS();
-       int mymode=0;
+       HashTable uchars;
+       UChar32 cp;
+       int *uchar_cnt_ptr, uchar_cnt;
        unsigned char *buf;
-       int len, inx;
+       int inx;
        char retstr[256];
        int retlen=0;
 
-       if (ac < 1 || ac > 2 || zend_get_parameters_ex(ac, &input, &mode) == 
FAILURE) {
-               WRONG_PARAM_COUNT;
+       if (zend_parse_parameters(ZEND_NUM_ARGS(), "t|l", &input, &input_len,
+                                                         &type, &mode) == 
FAILURE) {
+               return;
        }
 
-       convert_to_string_ex(input);
-
-       if (ac == 2) {
-               convert_to_long_ex(mode);
-               mymode = Z_LVAL_PP(mode);
-
-               if (mymode < 0 || mymode > 4) {
+       if (ZEND_NUM_ARGS() > 1) {
+               if (mode < 0 || mode > 4) {
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown 
mode.");
                        RETURN_FALSE;
                }
+               if (UG(unicode) && mode != 1) {
+                       php_error_docref(NULL TSRMLS_DC, E_WARNING, "Only 
mode=1 is supported with Unicode strings");
+               }
        }
 
-       len = Z_STRLEN_PP(input);
-       buf = (unsigned char *) Z_STRVAL_PP(input);
-       memset((void*) chars, 0, sizeof(chars));
+       if (type == IS_UNICODE) {
+               UChar buf[3];
+               int buf_len;
 
-       while (len > 0) {
-               chars[*buf]++;
-               buf++;
-               len--;
-       }
+               zend_hash_init(&uchars, 0, NULL, NULL, 0);
 
-       if (mymode < 3) {
-               array_init(return_value);
-       }
+               inx = 0;
+               while (inx < input_len) {
+                       U16_NEXT_UNSAFE(input.u, inx, cp);
+                       if (zend_hash_index_find(&uchars, cp, 
(void**)&uchar_cnt_ptr) == SUCCESS) {
+                               (*uchar_cnt_ptr)++;
+                       } else {
+                               uchar_cnt = 1;
+                               zend_hash_index_update(&uchars, cp, &uchar_cnt, 
sizeof(int), NULL);
+                       }
+               }
 
-       for (inx = 0; inx < 256; inx++) {
-               switch (mymode) {
-                       case 0:
-                               add_index_long(return_value, inx, chars[inx]);
-                               break;
-                       case 1:
-                               if (chars[inx] != 0) {
-                                       add_index_long(return_value, inx, 
chars[inx]);
-                               }
-                               break;
-                       case 2:
-                               if (chars[inx] == 0) {
-                                       add_index_long(return_value, inx, 
chars[inx]);
-                               }
-                               break;
-                       case 3:
-                               if (chars[inx] != 0) {
-                                       retstr[retlen++] = inx;
-                               }
-                               break;
-                       case 4:
-                               if (chars[inx] == 0) {
-                                       retstr[retlen++] = inx;
-                               }
-                               break;
+               if (mode < 3) {
+                       array_init(return_value);
                }
-       }
 
-       if (mymode >= 3 && mymode <= 4) {
-               RETURN_STRINGL(retstr, retlen, 1);
+               for (zend_hash_internal_pointer_reset(&uchars);
+                        zend_hash_get_current_data(&uchars, 
(void**)&uchar_cnt_ptr) == SUCCESS;
+                        zend_hash_move_forward(&uchars)) {
+
+                       zend_hash_get_current_key(&uchars, NULL, (ulong*)&cp, 
0);
+
+                       buf_len = zend_codepoint_to_uchar(cp, buf);
+                       buf[buf_len] = 0;
+                       add_u_assoc_long_ex(return_value, IS_UNICODE, 
ZSTR(buf), buf_len+1, *uchar_cnt_ptr);
+               }
+
+               zend_hash_destroy(&uchars);
+       } else {
+               buf = (unsigned char *) input.s;
+               memset((void*) chars, 0, sizeof(chars));
+
+               while (input_len > 0) {
+                       chars[*buf]++;
+                       buf++;
+                       input_len--;
+               }
+
+               if (mode < 3) {
+                       array_init(return_value);
+               }
+
+               for (inx = 0; inx < 256; inx++) {
+                       switch (mode) {
+                               case 0:
+                                       add_index_long(return_value, inx, 
chars[inx]);
+                                       break;
+                               case 1:
+                                       if (chars[inx] != 0) {
+                                               add_index_long(return_value, 
inx, chars[inx]);
+                                       }
+                                       break;
+                               case 2:
+                                       if (chars[inx] == 0) {
+                                               add_index_long(return_value, 
inx, chars[inx]);
+                                       }
+                                       break;
+                               case 3:
+                                       if (chars[inx] != 0) {
+                                               retstr[retlen++] = inx;
+                                       }
+                                       break;
+                               case 4:
+                                       if (chars[inx] == 0) {
+                                               retstr[retlen++] = inx;
+                                       }
+                                       break;
+                       }
+               }
+               if (mode >= 3 && mode <= 4) {
+                       RETURN_STRINGL(retstr, retlen, 1);
+               }
        }
 }
 /* }}} */



-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to