andrei          Fri Feb  3 00:09:19 2006 UTC

  Modified files:              
    /php-src/ext/unicode        unicode_iterators.c 
  Log:
  Rewrite to use C-level iterators for performance. Also, cache the string
  in the iterator object for immutability.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.4&r2=1.5&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.4 
php-src/ext/unicode/unicode_iterators.c:1.5
--- php-src/ext/unicode/unicode_iterators.c:1.4 Thu Feb  2 06:01:27 2006
+++ php-src/ext/unicode/unicode_iterators.c     Fri Feb  3 00:09:19 2006
@@ -14,7 +14,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: unicode_iterators.c,v 1.4 2006/02/02 06:01:27 sebastian Exp $ */
+/* $Id: unicode_iterators.c,v 1.5 2006/02/03 00:09:19 andrei Exp $ */
 
 
 #include "php.h"
@@ -30,34 +30,157 @@
 
 typedef struct {
        zend_object             std;
-       zval*                   text;
+       UChar*                  text;
+       uint32_t                text_len;
+       text_iter_type  type;
+       zval*                   current;
        int32_t                 offset;
        int32_t                 index;
-       text_iter_type  type;
-} text_iter_t;
+} text_iter_obj;
+
+typedef struct {
+       zend_object_iterator intern;
+       text_iter_obj*           object;
+} text_iter_it;
 
 PHPAPI zend_class_entry* text_iterator_aggregate_ce;
 PHPAPI zend_class_entry* text_iterator_ce;
 
+/* Code point ops */
+
+static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC)
+{
+       return (object->offset < object->text_len);
+}
+
+static void text_iter_cp_get_current_data(text_iter_obj* object TSRMLS_DC)
+{
+       UChar32 cp;
+       int32_t tmp, buf_len;
+
+       tmp = object->offset;
+       U16_NEXT(object->text, tmp, object->text_len, cp);
+       buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object->current));
+       Z_USTRVAL_P(object->current)[buf_len] = 0;
+       Z_USTRLEN_P(object->current) = buf_len;
+}
+
+static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC)
+{
+       return object->index;
+}
+
+static void text_iter_cp_move_forward(text_iter_obj* object TSRMLS_DC)
+{
+       U16_FWD_1(object->text, object->offset, object->text_len);
+       object->index++;
+}
+
+static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC)
+{
+       object->offset = 0;
+       object->index  = 0;
+}
+
+
+/* Iterator Funcs */
+
+static void text_iter_dtor(zend_object_iterator* iter TSRMLS_DC)
+{
+       text_iter_it* iterator = (text_iter_it *) iter;
+       zval_ptr_dtor((zval **)&iterator->intern.data);
+       efree(iterator);
+}
+
+static int text_iter_valid(zend_object_iterator* iter TSRMLS_DC)
+{
+       text_iter_it*  iterator = (text_iter_it *) iter;
+       text_iter_obj* object   = iterator->object;
+
+       if (text_iter_cp_valid(object TSRMLS_CC))
+               return SUCCESS;
+       else
+               return FAILURE;
+}
+
+static void text_iter_get_current_data(zend_object_iterator* iter, zval*** 
data TSRMLS_DC)
+{
+       text_iter_it*  iterator = (text_iter_it *) iter;
+       text_iter_obj* object   = iterator->object;
+
+       text_iter_cp_get_current_data(object TSRMLS_CC);
+       *data = &object->current;
+}
+
+static int text_iter_get_current_key(zend_object_iterator* iter, char 
**str_key, uint *str_key_len, ulong *int_key TSRMLS_DC)
+{
+       text_iter_it*  iterator = (text_iter_it *) iter;
+       text_iter_obj* object   = iterator->object;
+
+       *int_key = text_iter_cp_get_current_key(object TSRMLS_CC);
+       return HASH_KEY_IS_LONG;
+}
+
+static void text_iter_move_forward(zend_object_iterator* iter TSRMLS_DC)
+{
+       text_iter_it*  iterator = (text_iter_it *) iter;
+       text_iter_obj* object   = iterator->object;
+
+       text_iter_cp_move_forward(object TSRMLS_CC);
+}
+
+static void text_iter_rewind(zend_object_iterator* iter TSRMLS_DC)
+{
+       text_iter_it*  iterator = (text_iter_it *) iter;
+       text_iter_obj* object   = iterator->object;
+
+       text_iter_cp_rewind(object TSRMLS_CC);
+}
+
+zend_object_iterator_funcs text_iter_cp_funcs = {
+       text_iter_dtor,
+       text_iter_valid,
+       text_iter_get_current_data,
+       text_iter_get_current_key,
+       text_iter_move_forward,
+       text_iter_rewind,
+};
+
+static zend_object_iterator* text_iter_get_iterator(zend_class_entry *ce, zval 
*object TSRMLS_DC)
+{
+       text_iter_it*   iterator        = emalloc(sizeof(text_iter_it));
+       text_iter_obj*  iter_object = (text_iter_obj *) 
zend_object_store_get_object(object TSRMLS_CC);
+
+       ZVAL_ADDREF(object);
+       iterator->intern.data  = (void *) object;
+       iterator->intern.funcs = &text_iter_cp_funcs;
+       iterator->object           = iter_object;
+
+       return (zend_object_iterator *) iterator;
+}
+
 static void text_iterator_free_storage(void *object TSRMLS_DC)
 {
-       text_iter_t *intern = (text_iter_t *) object;
+       text_iter_obj *intern = (text_iter_obj *) object;
 
        zend_hash_destroy(intern->std.properties);
        FREE_HASHTABLE(intern->std.properties);
 
-       if (intern->text) zval_ptr_dtor(&intern->text);
+       if (intern->text) {
+               efree(intern->text);
+       }
+       zval_ptr_dtor(&intern->current);
        efree(object);
 }
 
 static zend_object_value text_iterator_new(zend_class_entry *class_type 
TSRMLS_DC)
 {
        zend_object_value retval;
-       text_iter_t *intern;
+       text_iter_obj *intern;
        zval *tmp;
 
-       intern = emalloc(sizeof(text_iter_t));
-       memset(intern, 0, sizeof(text_iter_t));
+       intern = emalloc(sizeof(text_iter_obj));
+       memset(intern, 0, sizeof(text_iter_obj));
        intern->std.ce = class_type;
 
        ALLOC_HASHTABLE(intern->std.properties);
@@ -65,6 +188,9 @@
        zend_hash_copy(intern->std.properties, &class_type->default_properties, 
(copy_ctor_func_t) zval_add_ref, (void *) &tmp, sizeof(zval *));
 
        intern->type = ITER_CODE_POINT;
+       MAKE_STD_ZVAL(intern->current); /* pre-allocate buffer for codepoint */
+       Z_USTRVAL_P(intern->current) = eumalloc(3);
+       Z_TYPE_P(intern->current) = IS_UNICODE;
 
        retval.handle = zend_objects_store_put(intern, 
(zend_objects_store_dtor_t)zend_objects_destroy_object, 
(zend_objects_free_object_storage_t) text_iterator_free_storage, NULL 
TSRMLS_CC);
        retval.handlers = zend_get_std_object_handlers();
@@ -72,81 +198,64 @@
        return retval;
 }
 
-static void text_iter_rewind(text_iter_t *intern TSRMLS_DC)
-{
-       intern->offset = 0;
-       intern->index  = 0;
-}
-
-
 PHP_METHOD(TextIterator, __construct)
 {
-       zval *text;
+       UChar *text;
+       int32_t text_len;
        zval *object = getThis();
-       text_iter_t *intern;
+       text_iter_obj *intern;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &text) == 
FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "u", &text, 
&text_len) == FAILURE) {
                return;
        }
 
-       if (Z_TYPE_P(text) != IS_UNICODE) {
-               
zend_throw_exception(U_CLASS_ENTRY(spl_ce_InvalidArgumentException), "Text 
iterator expects argument to be a Unicode string", 0 TSRMLS_CC);
-               return;
-       }
-
-       intern = (text_iter_t*) zend_object_store_get_object(object TSRMLS_CC);
+       intern = (text_iter_obj*) zend_object_store_get_object(object 
TSRMLS_CC);
 
-       ZVAL_ADDREF(text);
-       intern->text = text;
+       intern->text = eustrndup(text, text_len);
+       intern->text_len = text_len;
 
-       text_iter_rewind(intern TSRMLS_CC);
+       text_iter_cp_rewind(intern TSRMLS_CC);
 }
 
 PHP_METHOD(TextIterator, current)
 {
-       UChar32 cp;
-       UChar buf[3];
-       uint32_t tmp, buf_len;
        zval *object = getThis();
-       text_iter_t *intern = (text_iter_t*) 
zend_object_store_get_object(object TSRMLS_CC);
+       text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
-       tmp = intern->offset;
-       U16_NEXT(Z_USTRVAL_P(intern->text), tmp, Z_USTRLEN_P(intern->text), cp);
-       buf_len = zend_codepoint_to_uchar(cp, buf);
-       RETURN_UNICODEL(buf, buf_len, 1);
+       text_iter_cp_get_current_data(intern TSRMLS_CC);
+       RETURN_UNICODEL(Z_USTRVAL_P(intern->current), 
Z_USTRLEN_P(intern->current), 1);
 }
 
 PHP_METHOD(TextIterator, next)
 {
        zval *object = getThis();
-       text_iter_t *intern = (text_iter_t*) 
zend_object_store_get_object(object TSRMLS_CC);
+       text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
-       U16_FWD_1(Z_USTRVAL_P(intern->text), intern->offset, 
Z_USTRLEN_P(intern->text));
-       intern->index++;
+       text_iter_cp_move_forward(intern TSRMLS_CC);
 }
 
 PHP_METHOD(TextIterator, key)
 {
        zval *object = getThis();
-       text_iter_t *intern = (text_iter_t*) 
zend_object_store_get_object(object TSRMLS_CC);
+       text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
-       RETURN_LONG(intern->index);
+       RETURN_LONG(text_iter_cp_get_current_key(intern TSRMLS_CC));
 }
 
 PHP_METHOD(TextIterator, valid)
 {
        zval *object = getThis();
-       text_iter_t *intern = (text_iter_t*) 
zend_object_store_get_object(object TSRMLS_CC);
+       text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
-       RETURN_BOOL(intern->offset < Z_USTRLEN_P(intern->text));
+       RETURN_BOOL(text_iter_cp_valid(intern TSRMLS_CC));
 }
 
 PHP_METHOD(TextIterator, rewind)
 {
        zval *object = getThis();
-       text_iter_t *intern = (text_iter_t*) 
zend_object_store_get_object(object TSRMLS_CC);
+       text_iter_obj *intern = (text_iter_obj*) 
zend_object_store_get_object(object TSRMLS_CC);
 
-       text_iter_rewind(intern TSRMLS_CC);
+       text_iter_cp_rewind(intern TSRMLS_CC);
 }
 
 static zend_function_entry text_iterator_funcs[] = {
@@ -167,6 +276,7 @@
        text_iterator_ce = zend_register_internal_class(&ce TSRMLS_CC);
        zend_class_implements(text_iterator_ce TSRMLS_CC, 1, zend_ce_iterator);
        text_iterator_ce->create_object = text_iterator_new;
+       text_iterator_ce->get_iterator  = text_iter_get_iterator;
 }
 
 /*

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to