andrei Mon Jul 10 21:42:25 2006 UTC
Modified files:
/php-src/ext/unicode unicode_iterators.c
Log:
Implement isBoundary() for combining sequences.
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.36&r2=1.37&diff_format=u
Index: php-src/ext/unicode/unicode_iterators.c
diff -u php-src/ext/unicode/unicode_iterators.c:1.36
php-src/ext/unicode/unicode_iterators.c:1.37
--- php-src/ext/unicode/unicode_iterators.c:1.36 Mon Jul 10 21:18:01 2006
+++ php-src/ext/unicode/unicode_iterators.c Mon Jul 10 21:42:25 2006
@@ -14,7 +14,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: unicode_iterators.c,v 1.36 2006/07/10 21:18:01 andrei Exp $ */
+/* $Id: unicode_iterators.c,v 1.37 2006/07/10 21:42:25 andrei Exp $ */
/*
* TODO
@@ -448,7 +448,7 @@
static void text_iter_cs_following(text_iter_obj *object, int32_t offset, long
flags TSRMLS_DC)
{
- int32_t k, tmp;
+ int32_t k;
if (offset < 0) {
offset = 0;
@@ -501,7 +501,54 @@
static zend_bool text_iter_cs_isBoundary(text_iter_obj *object, int32_t
offset, long flags TSRMLS_DC)
{
- return 1;
+ UChar32 cp;
+ int32_t k, tmp;
+ zend_bool result;
+
+ if (offset < 0) {
+ offset = 0;
+ }
+
+ /*
+ * On invalid iterator we always want to start looking for the code unit
+ * offset from the beginning of the string.
+ */
+ if (object->u.cs.start_cp_offset == UBRK_DONE) {
+ object->u.cs.start_cp_offset = 0;
+ object->u.cs.start = 0;
+ }
+
+ /*
+ * Try to locate the code unit position relative to the last known
codepoint
+ * offset.
+ */
+ k = object->u.cs.start;
+ if (offset > object->u.cs.start_cp_offset) {
+ U16_FWD_N(object->text, k, object->text_len, offset -
object->u.cs.start_cp_offset);
+ } else {
+ U16_BACK_N(object->text, 0, k, object->u.cs.start_cp_offset -
offset);
+ }
+
+ /* end of the text is always a boundary */
+ if (k == object->text_len) {
+ offset = u_countChar32(object->text, object->text_len);
+ result = 1;
+ } else {
+ /* if the next codepoint is a base character, it's a boundary */
+ tmp = k;
+ U16_NEXT(object->text, tmp, object->text_len, cp);
+ result = (u_getCombiningClass(cp) == 0);
+ }
+
+ if (k == object->u.cs.start) {
+ return result;
+ }
+
+ object->u.cs.start = k;
+ object->u.cs.start_cp_offset = offset;
+ object->u.cs.end = object->u.cs.start;
+
+ return result;
}
static text_iter_ops text_iter_cs_ops = {
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php