andrei Mon Jul 10 20:14:12 2006 UTC
Modified files:
/php-src/ext/unicode unicode_iterators.c
Log:
Fix combining sequence iterators for forward and backward movement.
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.34&r2=1.35&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.34
php-src/ext/unicode/unicode_iterators.c:1.35
--- php-src/ext/unicode/unicode_iterators.c:1.34 Sat Jul 8 18:46:24 2006
+++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 20:14:12 2006
@@ -14,7 +14,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: unicode_iterators.c,v 1.34 2006/07/08 18:46:24 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.35 2006/07/10 20:14:12 andrei Exp $ */
/*
* TODO
@@ -306,6 +306,59 @@
/* Combining sequence ops */
+static void text_iter_helper_move(zend_bool forward, UChar *text, int32_t
text_len, int32_t *offset, int32_t *cp_offset)
+{
+ UChar32 cp;
+ int32_t tmp, tmp2;
+
+ if (*offset == UBRK_DONE) {
+ return;
+ }
+
+ if (forward) {
+ if (*offset == text_len) {
+ *offset = UBRK_DONE;
+ *cp_offset = UBRK_DONE;
+ } else {
+ U16_NEXT(text, (*offset), text_len, cp);
+ (*cp_offset)++;
+
+ if (u_getCombiningClass(cp) == 0) {
+ tmp = *offset;
+ tmp2 = *cp_offset;
+ /*
+ * At the end of the string cp will be 0
because of the NULL
+ * terminating NULL, so combining class will be
0 as well.
+ */
+ while (tmp < text_len) {
+ U16_NEXT(text, tmp, text_len, cp);
+ tmp2++;
+ if (u_getCombiningClass(cp) == 0) {
+ break;
+ } else {
+ *offset = tmp;
+ *cp_offset = tmp2;
+ }
+ }
+ }
+ }
+ } else {
+ if (*offset == 0) {
+ *offset = UBRK_DONE;
+ *cp_offset = UBRK_DONE;
+ } else {
+ U16_PREV(text, 0, (*offset), cp);
+ (*cp_offset)--;
+ if (u_getCombiningClass(cp) != 0) {
+ do {
+ U16_PREV(text, 0, (*offset), cp);
+ (*cp_offset)--;
+ } while (*offset > 0 && u_getCombiningClass(cp)
!= 0);
+ }
+ }
+ }
+}
+
static int text_iter_cs_valid(text_iter_obj* object, long flags TSRMLS_DC)
{
return (object->u.cs.start != UBRK_DONE);
@@ -313,25 +366,41 @@
static void text_iter_cs_current(text_iter_obj* object, long flags TSRMLS_DC)
{
- uint32_t length;
UChar *start;
+ int32_t length = -1;
- if (object->u.cs.start == UBRK_DONE || object->u.cs.end == UBRK_DONE) {
- length = 0;
- } else {
+ if (object->u.cs.start != UBRK_DONE) {
if (flags & ITER_REVERSE) {
+ if (object->u.cs.end == object->u.cs.start) {
+ text_iter_helper_move(0, object->text,
object->text_len,
+
&object->u.cs.start, &object->u.cs.start_cp_offset);
+ }
start = object->text + object->u.cs.end;
} else {
+ if (object->u.cs.end == object->u.cs.start) {
+ text_iter_helper_move(1, object->text,
object->text_len,
+
&object->u.cs.end, &object->u.cs.end_cp_offset);
+ }
start = object->text + object->u.cs.start;
}
- length = abs(object->u.cs.end - object->u.cs.start);
+
+ if (object->u.cs.end == UBRK_DONE) {
+ length = 0;
+ } else {
+ length = abs(object->u.cs.end - object->u.cs.start);
+ }
+ } else {
+ length = 0;
+ }
+
+ if (length != 0) {
if (length+1 > object->current_alloc) {
object->current_alloc = length+1;
Z_USTRVAL_P(object->current) =
eurealloc(Z_USTRVAL_P(object->current), object->current_alloc);
}
u_memcpy(Z_USTRVAL_P(object->current), start, length);
}
-
+
Z_USTRVAL_P(object->current)[length] = 0;
Z_USTRLEN_P(object->current) = length;
}
@@ -343,60 +412,23 @@
static int text_iter_cs_offset(text_iter_obj* object, long flags TSRMLS_DC)
{
- if (flags & ITER_REVERSE) {
- return object->u.cs.end_cp_offset;
- } else {
- return object->u.cs.start_cp_offset;
- }
+ return object->u.cs.start_cp_offset;
}
static void text_iter_cs_next(text_iter_obj* object, long flags TSRMLS_DC)
{
- UChar32 cp;
- int32_t tmp, tmp2;
-
if (object->u.cs.start == UBRK_DONE) {
return;
}
- object->u.cs.start = object->u.cs.end;
- object->u.cs.start_cp_offset = object->u.cs.end_cp_offset;
if (flags & ITER_REVERSE) {
- if (object->u.cs.end == 0) {
- object->u.cs.end = UBRK_DONE;
- object->u.cs.end_cp_offset = UBRK_DONE;
- } else {
- U16_PREV(object->text, 0, object->u.cs.end, cp);
- object->u.cs.end_cp_offset--;
- if (u_getCombiningClass(cp) != 0) {
- do {
- U16_PREV(object->text, 0,
object->u.cs.end, cp);
- object->u.cs.end_cp_offset--;
- } while (object->u.cs.end > 0 &&
u_getCombiningClass(cp) != 0);
- }
- }
+ text_iter_helper_move(0, object->text, object->text_len,
+ &object->u.cs.start,
&object->u.cs.start_cp_offset);
+ object->u.cs.end = object->u.cs.start;
} else {
- if (object->u.cs.end == object->text_len) {
- object->u.cs.end = UBRK_DONE;
- object->u.cs.end_cp_offset = UBRK_DONE;
- } else {
- U16_NEXT(object->text, object->u.cs.end,
object->text_len, cp);
- object->u.cs.end_cp_offset++;
- if (u_getCombiningClass(cp) == 0) {
- tmp = object->u.cs.end;
- tmp2 = object->u.cs.end_cp_offset;
- while (tmp < object->text_len) {
- U16_NEXT(object->text, tmp,
object->text_len, cp);
- tmp2++;
- if (u_getCombiningClass(cp) == 0) {
- break;
- } else {
- object->u.cs.end = tmp;
- object->u.cs.end_cp_offset =
tmp2;
- }
- }
- }
- }
+ text_iter_helper_move(1, object->text, object->text_len,
+ &object->u.cs.start,
&object->u.cs.start_cp_offset);
+ object->u.cs.end = object->u.cs.start;
}
object->u.cs.index++;
}
@@ -411,7 +443,6 @@
object->u.cs.start = object->u.cs.end = 0;
object->u.cs.start_cp_offset = object->u.cs.end_cp_offset = 0;
}
- text_iter_cs_next(object, flags TSRMLS_CC); /* find first sequence */
object->u.cs.index = 0; /* because _next increments index */
}
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php