andrei          Sat Feb 11 00:16:43 2006 UTC

  Modified files:              
    /php-src/ext/unicode        unicode_iterators.c 
  Log:
  Implement character/word/line/sentence iterators and the reverse
  counterparts.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.18&r2=1.19&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.18 
php-src/ext/unicode/unicode_iterators.c:1.19
--- php-src/ext/unicode/unicode_iterators.c:1.18        Fri Feb 10 00:23:29 2006
+++ php-src/ext/unicode/unicode_iterators.c     Sat Feb 11 00:16:43 2006
@@ -14,7 +14,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: unicode_iterators.c,v 1.18 2006/02/10 00:23:29 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.19 2006/02/11 00:16:43 andrei Exp $ */
 
 /*
  * TODO
@@ -28,11 +28,16 @@
 #include "php.h"
 #include "zend_interfaces.h"
 #include "zend_exceptions.h"
+#include <unicode/ubrk.h>
 
 typedef enum {
        ITER_CODE_UNIT,
        ITER_CODE_POINT,
        ITER_COMB_SEQUENCE,
+       ITER_CHARACTER,
+       ITER_WORD,
+       ITER_LINE,
+       ITER_SENTENCE,
        ITER_TYPE_LAST,
 } text_iter_type;
 
@@ -60,6 +65,12 @@
                        int32_t start;
                        int32_t end;
                } cs;
+               struct {
+                       UBreakIterator *iter;
+                       int32_t index;
+                       int32_t start;
+                       int32_t end;
+               } brk;
        } u;
 } text_iter_obj;
 
@@ -76,6 +87,13 @@
        void (*rewind) (text_iter_obj* object TSRMLS_DC);
 } text_iter_ops;
 
+enum UBreakIteratorType brk_type_map[] = {
+       UBRK_CHARACTER,
+       UBRK_WORD,
+       UBRK_LINE,
+       UBRK_SENTENCE,
+};
+
 PHPAPI zend_class_entry* text_iterator_aggregate_ce;
 PHPAPI zend_class_entry* text_iterator_ce;
 PHPAPI zend_class_entry* rev_text_iterator_ce;
@@ -276,12 +294,95 @@
 };
 
 
+/* UBreakIterator Character Ops */
+
+static int text_iter_brk_char_valid(text_iter_obj* object TSRMLS_DC)
+{
+       if (object->flags & ITER_REVERSE) {
+               return (object->u.brk.start != UBRK_DONE);
+       } else {
+               return (object->u.brk.end != UBRK_DONE);
+       }
+}
+
+static void text_iter_brk_char_current(text_iter_obj* object TSRMLS_DC)
+{
+       uint32_t length;
+       int32_t start = object->u.brk.start;
+       int32_t end = object->u.brk.end;
+
+       if (object->flags & ITER_REVERSE) {
+               if (end == UBRK_DONE) {
+                       end = object->text_len;
+               }
+       } else {
+               if (start == UBRK_DONE) {
+                       start = 0;
+               }
+       }
+       length = end - start;
+       if (length > object->current_alloc-1) {
+               object->current_alloc = length+1;
+               Z_USTRVAL_P(object->current) = 
eurealloc(Z_USTRVAL_P(object->current), object->current_alloc);
+       }
+       u_memcpy(Z_USTRVAL_P(object->current), object->text + start, length);
+       Z_USTRVAL_P(object->current)[length] = 0;
+       Z_USTRLEN_P(object->current) = length;
+}
+
+static int text_iter_brk_char_key(text_iter_obj* object TSRMLS_DC)
+{
+       return object->u.brk.index;
+}
+
+static void text_iter_brk_char_next(text_iter_obj* object TSRMLS_DC)
+{
+       if (object->flags & ITER_REVERSE) {
+               if (object->u.brk.start != UBRK_DONE) {
+                       object->u.brk.end = object->u.brk.start;
+                       object->u.brk.start = ubrk_previous(object->u.brk.iter);
+                       object->u.brk.index++;
+               }
+       } else {
+               if (object->u.brk.end != UBRK_DONE) {
+                       object->u.brk.start = object->u.brk.end;
+                       object->u.brk.end = ubrk_next(object->u.brk.iter);
+                       object->u.brk.index++;
+               }
+       }
+}
+
+static void text_iter_brk_char_rewind(text_iter_obj *object TSRMLS_DC)
+{
+       if (object->flags & ITER_REVERSE) {
+               object->u.brk.end   = ubrk_last(object->u.brk.iter);
+               object->u.brk.start = ubrk_previous(object->u.brk.iter);
+       } else {
+               object->u.brk.start = ubrk_first(object->u.brk.iter);
+               object->u.brk.end   = ubrk_next(object->u.brk.iter);
+       }
+       object->u.brk.index = 0;
+}
+
+static text_iter_ops text_iter_brk_ops = {
+       text_iter_brk_char_valid,
+       text_iter_brk_char_current,
+       text_iter_brk_char_key,
+       text_iter_brk_char_next,
+       text_iter_brk_char_rewind,
+};
+
+
 /* Ops array */
 
 static text_iter_ops* iter_ops[] = {
        &text_iter_cu_ops,
        &text_iter_cp_ops,
        &text_iter_cs_ops,
+       &text_iter_brk_ops,
+       &text_iter_brk_ops,
+       &text_iter_brk_ops,
+       &text_iter_brk_ops,
 };
 
 /* Iterator Funcs */
@@ -376,6 +477,9 @@
        if (intern->text) {
                efree(intern->text);
        }
+       if (intern->type > ITER_CHARACTER && intern->u.brk.iter) {
+               ubrk_close(intern->u.brk.iter);
+       }
        zval_ptr_dtor(&intern->current);
        efree(object);
 }
@@ -399,6 +503,7 @@
        intern->current_alloc = 3;
        Z_USTRVAL_P(intern->current) = eumalloc(3);
        Z_USTRVAL_P(intern->current)[0] = 0;
+       Z_USTRLEN_P(intern->current) = 0;
        Z_TYPE_P(intern->current) = IS_UNICODE;
 
        retval.handle = zend_objects_store_put(intern, 
(zend_objects_store_dtor_t)zend_objects_destroy_object, 
(zend_objects_free_object_storage_t) text_iterator_free_storage, NULL 
TSRMLS_CC);
@@ -426,11 +531,11 @@
        intern->text_len = text_len;
        if (ZEND_NUM_ARGS() > 1) {
                ti_type = flags & ITER_TYPE_MASK;
-               if (ti_type < ITER_TYPE_LAST) { 
-                       intern->type = ti_type;
-               } else {
+               if (ti_type < 0 || ti_type >= ITER_TYPE_LAST) { 
                        php_error(E_WARNING, "Invalid iterator type in 
TextIterator constructor");
+                       ti_type = ITER_CODE_POINT;
                }
+               intern->type = ti_type;
                intern->flags = flags;
        }
 
@@ -438,6 +543,15 @@
                intern->flags |= ITER_REVERSE;
        }
 
+       if (ti_type >= ITER_CHARACTER && ti_type < ITER_TYPE_LAST) {
+               UErrorCode status = U_ZERO_ERROR;
+               intern->u.brk.iter = ubrk_open(brk_type_map[ti_type - 
ITER_CHARACTER], UG(default_locale), text, text_len, &status);
+               if (!U_SUCCESS(status)) {
+                       php_error(E_RECOVERABLE_ERROR, "Could not create 
UBreakIterator: %s", u_errorName(status));
+                       return;
+               }
+       }
+
        iter_ops[intern->type]->rewind(intern TSRMLS_CC);
 }
 
@@ -513,6 +627,10 @@
        zend_declare_class_constant_long(text_iterator_ce, "CODE_UNIT", 
sizeof("CODE_UNIT")-1, ITER_CODE_UNIT TSRMLS_CC);
        zend_declare_class_constant_long(text_iterator_ce, "CODE_POINT", 
sizeof("CODE_POINT")-1, ITER_CODE_POINT TSRMLS_CC);
        zend_declare_class_constant_long(text_iterator_ce, "COMB_SEQUENCE", 
sizeof("COMB_SEQUENCE")-1, ITER_COMB_SEQUENCE TSRMLS_CC);
+       zend_declare_class_constant_long(text_iterator_ce, "CHARACTER", 
sizeof("CHARACTER")-1, ITER_CHARACTER TSRMLS_CC);
+       zend_declare_class_constant_long(text_iterator_ce, "WORD", 
sizeof("WORD")-1, ITER_WORD TSRMLS_CC);
+       zend_declare_class_constant_long(text_iterator_ce, "LINE", 
sizeof("LINE")-1, ITER_LINE TSRMLS_CC);
+       zend_declare_class_constant_long(text_iterator_ce, "SENTENCE", 
sizeof("SENTENCE")-1, ITER_SENTENCE TSRMLS_CC);
 }
 
 /*

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to