andrei Fri Feb 3 00:09:19 2006 UTC Modified files: /php-src/ext/unicode unicode_iterators.c Log: Rewrite to use C-level iterators for performance. Also, cache the string in the iterator object for immutability.
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.4&r2=1.5&diff_format=u Index: php-src/ext/unicode/unicode_iterators.c diff -u php-src/ext/unicode/unicode_iterators.c:1.4 php-src/ext/unicode/unicode_iterators.c:1.5 --- php-src/ext/unicode/unicode_iterators.c:1.4 Thu Feb 2 06:01:27 2006 +++ php-src/ext/unicode/unicode_iterators.c Fri Feb 3 00:09:19 2006 @@ -14,7 +14,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: unicode_iterators.c,v 1.4 2006/02/02 06:01:27 sebastian Exp $ */ +/* $Id: unicode_iterators.c,v 1.5 2006/02/03 00:09:19 andrei Exp $ */ #include "php.h" @@ -30,34 +30,157 @@ typedef struct { zend_object std; - zval* text; + UChar* text; + uint32_t text_len; + text_iter_type type; + zval* current; int32_t offset; int32_t index; - text_iter_type type; -} text_iter_t; +} text_iter_obj; + +typedef struct { + zend_object_iterator intern; + text_iter_obj* object; +} text_iter_it; PHPAPI zend_class_entry* text_iterator_aggregate_ce; PHPAPI zend_class_entry* text_iterator_ce; +/* Code point ops */ + +static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC) +{ + return (object->offset < object->text_len); +} + +static void text_iter_cp_get_current_data(text_iter_obj* object TSRMLS_DC) +{ + UChar32 cp; + int32_t tmp, buf_len; + + tmp = object->offset; + U16_NEXT(object->text, tmp, object->text_len, cp); + buf_len = zend_codepoint_to_uchar(cp, Z_USTRVAL_P(object->current)); + Z_USTRVAL_P(object->current)[buf_len] = 0; + Z_USTRLEN_P(object->current) = buf_len; +} + +static int text_iter_cp_get_current_key(text_iter_obj* object TSRMLS_DC) +{ + return object->index; +} + +static void text_iter_cp_move_forward(text_iter_obj* object TSRMLS_DC) +{ + U16_FWD_1(object->text, object->offset, object->text_len); + object->index++; +} + +static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC) +{ + object->offset = 0; + object->index = 0; +} + + +/* Iterator Funcs */ + +static void text_iter_dtor(zend_object_iterator* iter TSRMLS_DC) +{ + text_iter_it* iterator = (text_iter_it *) iter; + zval_ptr_dtor((zval **)&iterator->intern.data); + efree(iterator); +} + +static int text_iter_valid(zend_object_iterator* iter TSRMLS_DC) +{ + text_iter_it* iterator = (text_iter_it *) iter; + text_iter_obj* object = iterator->object; + + if (text_iter_cp_valid(object TSRMLS_CC)) + return SUCCESS; + else + return FAILURE; +} + +static void text_iter_get_current_data(zend_object_iterator* iter, zval*** data TSRMLS_DC) +{ + text_iter_it* iterator = (text_iter_it *) iter; + text_iter_obj* object = iterator->object; + + text_iter_cp_get_current_data(object TSRMLS_CC); + *data = &object->current; +} + +static int text_iter_get_current_key(zend_object_iterator* iter, char **str_key, uint *str_key_len, ulong *int_key TSRMLS_DC) +{ + text_iter_it* iterator = (text_iter_it *) iter; + text_iter_obj* object = iterator->object; + + *int_key = text_iter_cp_get_current_key(object TSRMLS_CC); + return HASH_KEY_IS_LONG; +} + +static void text_iter_move_forward(zend_object_iterator* iter TSRMLS_DC) +{ + text_iter_it* iterator = (text_iter_it *) iter; + text_iter_obj* object = iterator->object; + + text_iter_cp_move_forward(object TSRMLS_CC); +} + +static void text_iter_rewind(zend_object_iterator* iter TSRMLS_DC) +{ + text_iter_it* iterator = (text_iter_it *) iter; + text_iter_obj* object = iterator->object; + + text_iter_cp_rewind(object TSRMLS_CC); +} + +zend_object_iterator_funcs text_iter_cp_funcs = { + text_iter_dtor, + text_iter_valid, + text_iter_get_current_data, + text_iter_get_current_key, + text_iter_move_forward, + text_iter_rewind, +}; + +static zend_object_iterator* text_iter_get_iterator(zend_class_entry *ce, zval *object TSRMLS_DC) +{ + text_iter_it* iterator = emalloc(sizeof(text_iter_it)); + text_iter_obj* iter_object = (text_iter_obj *) zend_object_store_get_object(object TSRMLS_CC); + + ZVAL_ADDREF(object); + iterator->intern.data = (void *) object; + iterator->intern.funcs = &text_iter_cp_funcs; + iterator->object = iter_object; + + return (zend_object_iterator *) iterator; +} + static void text_iterator_free_storage(void *object TSRMLS_DC) { - text_iter_t *intern = (text_iter_t *) object; + text_iter_obj *intern = (text_iter_obj *) object; zend_hash_destroy(intern->std.properties); FREE_HASHTABLE(intern->std.properties); - if (intern->text) zval_ptr_dtor(&intern->text); + if (intern->text) { + efree(intern->text); + } + zval_ptr_dtor(&intern->current); efree(object); } static zend_object_value text_iterator_new(zend_class_entry *class_type TSRMLS_DC) { zend_object_value retval; - text_iter_t *intern; + text_iter_obj *intern; zval *tmp; - intern = emalloc(sizeof(text_iter_t)); - memset(intern, 0, sizeof(text_iter_t)); + intern = emalloc(sizeof(text_iter_obj)); + memset(intern, 0, sizeof(text_iter_obj)); intern->std.ce = class_type; ALLOC_HASHTABLE(intern->std.properties); @@ -65,6 +188,9 @@ zend_hash_copy(intern->std.properties, &class_type->default_properties, (copy_ctor_func_t) zval_add_ref, (void *) &tmp, sizeof(zval *)); intern->type = ITER_CODE_POINT; + MAKE_STD_ZVAL(intern->current); /* pre-allocate buffer for codepoint */ + Z_USTRVAL_P(intern->current) = eumalloc(3); + Z_TYPE_P(intern->current) = IS_UNICODE; retval.handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL TSRMLS_CC); retval.handlers = zend_get_std_object_handlers(); @@ -72,81 +198,64 @@ return retval; } -static void text_iter_rewind(text_iter_t *intern TSRMLS_DC) -{ - intern->offset = 0; - intern->index = 0; -} - - PHP_METHOD(TextIterator, __construct) { - zval *text; + UChar *text; + int32_t text_len; zval *object = getThis(); - text_iter_t *intern; + text_iter_obj *intern; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &text) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "u", &text, &text_len) == FAILURE) { return; } - if (Z_TYPE_P(text) != IS_UNICODE) { - zend_throw_exception(U_CLASS_ENTRY(spl_ce_InvalidArgumentException), "Text iterator expects argument to be a Unicode string", 0 TSRMLS_CC); - return; - } - - intern = (text_iter_t*) zend_object_store_get_object(object TSRMLS_CC); + intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); - ZVAL_ADDREF(text); - intern->text = text; + intern->text = eustrndup(text, text_len); + intern->text_len = text_len; - text_iter_rewind(intern TSRMLS_CC); + text_iter_cp_rewind(intern TSRMLS_CC); } PHP_METHOD(TextIterator, current) { - UChar32 cp; - UChar buf[3]; - uint32_t tmp, buf_len; zval *object = getThis(); - text_iter_t *intern = (text_iter_t*) zend_object_store_get_object(object TSRMLS_CC); + text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); - tmp = intern->offset; - U16_NEXT(Z_USTRVAL_P(intern->text), tmp, Z_USTRLEN_P(intern->text), cp); - buf_len = zend_codepoint_to_uchar(cp, buf); - RETURN_UNICODEL(buf, buf_len, 1); + text_iter_cp_get_current_data(intern TSRMLS_CC); + RETURN_UNICODEL(Z_USTRVAL_P(intern->current), Z_USTRLEN_P(intern->current), 1); } PHP_METHOD(TextIterator, next) { zval *object = getThis(); - text_iter_t *intern = (text_iter_t*) zend_object_store_get_object(object TSRMLS_CC); + text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); - U16_FWD_1(Z_USTRVAL_P(intern->text), intern->offset, Z_USTRLEN_P(intern->text)); - intern->index++; + text_iter_cp_move_forward(intern TSRMLS_CC); } PHP_METHOD(TextIterator, key) { zval *object = getThis(); - text_iter_t *intern = (text_iter_t*) zend_object_store_get_object(object TSRMLS_CC); + text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); - RETURN_LONG(intern->index); + RETURN_LONG(text_iter_cp_get_current_key(intern TSRMLS_CC)); } PHP_METHOD(TextIterator, valid) { zval *object = getThis(); - text_iter_t *intern = (text_iter_t*) zend_object_store_get_object(object TSRMLS_CC); + text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); - RETURN_BOOL(intern->offset < Z_USTRLEN_P(intern->text)); + RETURN_BOOL(text_iter_cp_valid(intern TSRMLS_CC)); } PHP_METHOD(TextIterator, rewind) { zval *object = getThis(); - text_iter_t *intern = (text_iter_t*) zend_object_store_get_object(object TSRMLS_CC); + text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC); - text_iter_rewind(intern TSRMLS_CC); + text_iter_cp_rewind(intern TSRMLS_CC); } static zend_function_entry text_iterator_funcs[] = { @@ -167,6 +276,7 @@ text_iterator_ce = zend_register_internal_class(&ce TSRMLS_CC); zend_class_implements(text_iterator_ce TSRMLS_CC, 1, zend_ce_iterator); text_iterator_ce->create_object = text_iterator_new; + text_iterator_ce->get_iterator = text_iter_get_iterator; } /*
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php