andrei Sat Jun 24 18:18:38 2006 UTC
Modified files:
/php-src/ext/unicode unicode_iterators.c
Log:
- Remove support for code units in TextIterator (people shouldn't be
examining individual code units anyway)
- Add offset() method.
- Add optional locale parameter to the constructor.
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.24&r2=1.25&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.24
php-src/ext/unicode/unicode_iterators.c:1.25
--- php-src/ext/unicode/unicode_iterators.c:1.24 Fri Mar 24 21:06:36 2006
+++ php-src/ext/unicode/unicode_iterators.c Sat Jun 24 18:18:38 2006
@@ -14,14 +14,13 @@
+----------------------------------------------------------------------+
*/
-/* $Id: unicode_iterators.c,v 1.24 2006/03/24 21:06:36 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.25 2006/06/24 18:18:38 andrei Exp $ */
/*
* TODO
*
* - optimize current() to pass return_value to the handler so that it fills it
* in directly instead of creating a new zval
- * - return code units as binary strings? integers? or leave as unicode
strings?
* - implement Countable (or count_elements handler) and Seekable interfaces
*/
@@ -31,7 +30,6 @@
#include <unicode/ubrk.h>
typedef enum {
- ITER_CODE_UNIT,
ITER_CODE_POINT,
ITER_COMB_SEQUENCE,
ITER_CHARACTER,
@@ -53,23 +51,21 @@
size_t current_alloc;
long flags;
union {
+ int32_t start;
struct {
+ int32_t start;
int32_t index;
- int32_t offset;
} cp;
struct {
- int32_t index;
- } cu;
- struct {
- int32_t index;
int32_t start;
int32_t end;
+ int32_t index;
} cs;
struct {
- UBreakIterator *iter;
- int32_t index;
int32_t start;
int32_t end;
+ int32_t index;
+ UBreakIterator *iter;
} brk;
} u;
zend_object_iterator iter;
@@ -99,71 +95,14 @@
PHPAPI zend_class_entry* text_iterator_ce;
PHPAPI zend_class_entry* rev_text_iterator_ce;
-/* Code unit ops */
-
-static int text_iter_cu_valid(text_iter_obj* object TSRMLS_DC)
-{
- if (object->flags & ITER_REVERSE) {
- return (object->u.cu.index >= 0);
- } else {
- return (object->u.cu.index < object->text_len);
- }
-}
-
-static void text_iter_cu_current(text_iter_obj* object TSRMLS_DC)
-{
- u_memcpy(Z_USTRVAL_P(object->current), object->text +
object->u.cu.index, 1);
- Z_USTRVAL_P(object->current)[1] = 0;
- Z_USTRLEN_P(object->current) = 1;
-}
-
-static int text_iter_cu_key(text_iter_obj* object TSRMLS_DC)
-{
- if (object->flags & ITER_REVERSE) {
- return object->text_len - object->u.cu.index - 1;
- } else {
- return object->u.cu.index;
- }
-}
-
-static void text_iter_cu_next(text_iter_obj* object TSRMLS_DC)
-{
- if (object->flags & ITER_REVERSE) {
- if (object->u.cu.index >= 0) {
- object->u.cu.index--;
- }
- } else {
- if (object->u.cu.index < object->text_len) {
- object->u.cu.index++;
- }
- }
-}
-
-static void text_iter_cu_rewind(text_iter_obj *object TSRMLS_DC)
-{
- if (object->flags & ITER_REVERSE) {
- object->u.cu.index = object->text_len-1;
- } else {
- object->u.cu.index = 0;
- }
-}
-
-static text_iter_ops text_iter_cu_ops = {
- text_iter_cu_valid,
- text_iter_cu_current,
- text_iter_cu_key,
- text_iter_cu_next,
- text_iter_cu_rewind,
-};
-
/* Code point ops */
static int text_iter_cp_valid(text_iter_obj* object TSRMLS_DC)
{
if (object->flags & ITER_REVERSE) {
- return (object->u.cp.offset > 0);
+ return (object->u.cp.start > 0);
} else {
- return (object->u.cp.offset < object->text_len);
+ return (object->u.cp.start < object->text_len);
}
}
@@ -172,7 +111,7 @@
UChar32 cp;
int32_t tmp, buf_len;
- tmp = object->u.cp.offset;
+ tmp = object->u.cp.start;
if (object->flags & ITER_REVERSE) {
U16_PREV(object->text, 0, tmp, cp);
} else {
@@ -191,9 +130,9 @@
static void text_iter_cp_next(text_iter_obj* object TSRMLS_DC)
{
if (object->flags & ITER_REVERSE) {
- U16_BACK_1(object->text, 0, object->u.cp.offset);
+ U16_BACK_1(object->text, 0, object->u.cp.start);
} else {
- U16_FWD_1(object->text, object->u.cp.offset, object->text_len);
+ U16_FWD_1(object->text, object->u.cp.start, object->text_len);
}
object->u.cp.index++;
}
@@ -201,9 +140,9 @@
static void text_iter_cp_rewind(text_iter_obj *object TSRMLS_DC)
{
if (object->flags & ITER_REVERSE) {
- object->u.cp.offset = object->text_len;
+ object->u.cp.start = object->text_len;
} else {
- object->u.cp.offset = 0;
+ object->u.cp.start = 0;
}
object->u.cp.index = 0;
}
@@ -377,7 +316,6 @@
/* Ops array */
static text_iter_ops* iter_ops[] = {
- &text_iter_cu_ops,
&text_iter_cp_ops,
&text_iter_cs_ops,
&text_iter_brk_ops,
@@ -514,9 +452,11 @@
zval *object = getThis();
text_iter_obj *intern;
text_iter_type ti_type;
+ char *locale = NULL;
+ int locale_len;
long flags = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "u|l", &text,
&text_len, &flags) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "u|ls", &text,
&text_len, &flags, &locale, &locale_len) == FAILURE) {
return;
}
@@ -540,9 +480,10 @@
if (intern->type >= ITER_CHARACTER && intern->type < ITER_TYPE_LAST) {
UErrorCode status = U_ZERO_ERROR;
- intern->u.brk.iter = ubrk_open(brk_type_map[intern->type -
ITER_CHARACTER], UG(default_locale), text, text_len, &status);
+ locale = locale ? locale : UG(default_locale);
+ intern->u.brk.iter = ubrk_open(brk_type_map[intern->type -
ITER_CHARACTER], locale, text, text_len, &status);
if (!U_SUCCESS(status)) {
- php_error(E_RECOVERABLE_ERROR, "Could not create
UBreakIterator: %s", u_errorName(status));
+ php_error(E_RECOVERABLE_ERROR, "Could not create
UBreakIterator for '%s' locale: %s", locale, u_errorName(status));
return;
}
}
@@ -591,13 +532,25 @@
iter_ops[intern->type]->rewind(intern TSRMLS_CC);
}
+PHP_METHOD(TextIterator, offset)
+{
+ zval *object = getThis();
+ text_iter_obj *intern = (text_iter_obj*)
zend_object_store_get_object(object TSRMLS_CC);
+
+ RETURN_LONG(intern->u.start);
+}
+
static zend_function_entry text_iterator_funcs[] = {
PHP_ME(TextIterator, __construct, NULL, ZEND_ACC_PUBLIC)
+
+ /* Iterator interface methods */
PHP_ME(TextIterator, current, NULL, ZEND_ACC_PUBLIC)
PHP_ME(TextIterator, next, NULL, ZEND_ACC_PUBLIC)
PHP_ME(TextIterator, key, NULL, ZEND_ACC_PUBLIC)
PHP_ME(TextIterator, valid, NULL, ZEND_ACC_PUBLIC)
PHP_ME(TextIterator, rewind, NULL, ZEND_ACC_PUBLIC)
+
+ PHP_ME(TextIterator, offset, NULL, ZEND_ACC_PUBLIC)
{NULL, NULL, NULL}
};
@@ -610,16 +563,15 @@
text_iterator_ce->create_object = text_iterator_new;
text_iterator_ce->get_iterator = text_iter_get_iterator;
text_iterator_ce->ce_flags |= ZEND_ACC_FINAL_CLASS;
- zend_class_implements(text_iterator_ce TSRMLS_CC, 1,
zend_ce_traversable);
+ zend_class_implements(text_iterator_ce TSRMLS_CC, 1, zend_ce_iterator);
INIT_CLASS_ENTRY(ce, "ReverseTextIterator", text_iterator_funcs);
rev_text_iterator_ce = zend_register_internal_class(&ce TSRMLS_CC);
rev_text_iterator_ce->create_object = text_iterator_new;
rev_text_iterator_ce->get_iterator = text_iter_get_iterator;
rev_text_iterator_ce->ce_flags |= ZEND_ACC_FINAL_CLASS;
- zend_class_implements(rev_text_iterator_ce TSRMLS_CC, 1,
zend_ce_traversable);
+ zend_class_implements(rev_text_iterator_ce TSRMLS_CC, 1,
zend_ce_iterator);
- zend_declare_class_constant_long(text_iterator_ce, "CODE_UNIT",
sizeof("CODE_UNIT")-1, ITER_CODE_UNIT TSRMLS_CC);
zend_declare_class_constant_long(text_iterator_ce, "CODE_POINT",
sizeof("CODE_POINT")-1, ITER_CODE_POINT TSRMLS_CC);
zend_declare_class_constant_long(text_iterator_ce, "COMB_SEQUENCE",
sizeof("COMB_SEQUENCE")-1, ITER_COMB_SEQUENCE TSRMLS_CC);
zend_declare_class_constant_long(text_iterator_ce, "CHARACTER",
sizeof("CHARACTER")-1, ITER_CHARACTER TSRMLS_CC);
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php