# New Ticket Created by  NotFound 
# Please include the string:  [perl #55712]
# in the subject line of all future correspondence about this issue. 
# <URL: http://rt.perl.org/rt3/Ticket/Display.html?id=55712 >


This patch adds the functions Parrot_string_length and
Parrot_string_byte_length described in pdd28 and uses them instead of
string_length in a lot of places. Also modifies the string_length and
string_compute_strlen to give them his intended meaning, to help
transition. Also makes some cleaning in encoding related parts.

The intention is to start approaching to pdd28, and make easier later
steps in that direction.

-- 
Salu2
Index: src/oo.c
===================================================================
--- src/oo.c	(revisión: 28275)
+++ src/oo.c	(copia de trabajo)
@@ -155,7 +155,7 @@
             if (string_str_index(interp, vtable_name,
                 CONST_STRING(interp, "__"), 0) == 0) {
                 vtable_name = string_substr(interp, vtable_name, 2,
-                    string_length(interp, vtable_name) - 2, NULL, 0);
+                    Parrot_string_length(interp, vtable_name) - 2, NULL, 0);
             }
 
             VTABLE_add_vtable_override(interp, self, vtable_name, vtable_sub);
@@ -834,7 +834,7 @@
                         object, meth_str, "v");
         }
         else if (meth_str != NULL &&
-                string_length(interp, meth_str) != 0 && !default_meth) {
+                Parrot_string_byte_length(interp, meth_str) != 0 && !default_meth) {
             real_exception(interp, NULL, METH_NOT_FOUND,
                     "Class BUILD method ('%Ss') not found", meth_str);
         }
Index: src/ops/string.ops
===================================================================
--- src/ops/string.ops	(revisión: 28275)
+++ src/ops/string.ops	(copia de trabajo)
@@ -153,19 +153,11 @@
 =cut
 
 inline op length(out INT, in STR) :base_mem {
-    $1 = $2 ? string_length(interp, $2) : 0;
+    $1 = Parrot_string_length(interp, $2);
 }
 
 inline op bytelength(out INT, in STR) :base_mem {
-    UINTVAL n;
-    STRING * const s = $2;
-    if (!s)
-        n = 0;
-    else {
-        n = s->bufused;
-        PARROT_ASSERT(n == ENCODING_BYTES(interp, $2));
-    }
-    $1 = n;
+    $1 = Parrot_string_byte_length(interp, $2);
 }
 
 
@@ -228,7 +220,7 @@
 =cut
 
 inline op substr(out STR, in STR, in INT) :base_core {
-    const INTVAL len = string_length(interp, $2);
+    const INTVAL len = Parrot_string_length(interp, $2);
     $1 = string_substr(interp, $2, $3, len, &$1, 0);
 }
 
Index: src/ops/io.ops
===================================================================
--- src/ops/io.ops	(revisión: 28275)
+++ src/ops/io.ops	(copia de trabajo)
@@ -187,7 +187,7 @@
 
 op print(in STR) :base_io {
   STRING * const s = $1;
-  if (s && string_length(interp, s)) {
+  if (s && Parrot_string_byte_length(interp, s)) {
     PIO_putps(interp, _PIO_STDOUT(interp), s);
   }
 }
@@ -224,7 +224,7 @@
 
 op printerr(in STR) :base_io {
   STRING * const s = $1;
-  if (s && string_length(interp, s))
+  if (s && Parrot_string_byte_length(interp, s))
     PIO_putps(interp, _PIO_STDERR(interp), s);
 }
 
Index: src/string.c
===================================================================
--- src/string.c	(revisión: 28275)
+++ src/string.c	(copia de trabajo)
@@ -716,10 +716,7 @@
         PObj_bufstart(s) = s->strstart = PARROT_const_cast(char *, buffer);
         PObj_buflen(s)   = s->bufused  = len;
 
-        if (encoding == Parrot_fixed_8_encoding_ptr)
-            s->strlen = len;
-        else
-            string_compute_strlen(interp, s);
+        s->strlen = len;
 
         return s;
     }
@@ -729,10 +726,7 @@
     if (buffer) {
         mem_sys_memcopy(s->strstart, buffer, len);
         s->bufused = len;
-        if (encoding == Parrot_fixed_8_encoding_ptr)
-            s->strlen = len;
-        else
-            string_compute_strlen(interp, s);
+        s->strlen = len;
     }
     else {
         s->strlen = s->bufused = 0;
@@ -774,6 +768,62 @@
 
 =over 4
 
+=cut
+
+*/
+
+/*
+
+=item C<UINTVAL Parrot_string_byte_length>
+
+Returns the number of bytes in the specified Parrot string.
+The character width of variable-width encodings is ignored.
+Combining characteres are no treated differently than other
+characters.
+
+=cut
+
+*/
+
+PARROT_API
+PARROT_WARN_UNUSED_RESULT
+UINTVAL
+Parrot_string_byte_length(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
+{
+    /*
+    return s ? s->strlen : 0;
+    */
+    return s ? s->bufused : 0;
+}
+
+/*
+
+=item C<UINTVAL Parrot_string_length>
+
+Returns the number of characters in the specified Parrot string.
+Combining characters are each counted separately.
+Variable-width encodings may lookahead
+
+=cut
+
+*/
+
+PARROT_API
+PARROT_WARN_UNUSED_RESULT
+UINTVAL
+Parrot_string_length(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
+{
+    DECL_CONST_CAST;
+
+    UINTVAL result = s ?
+        CHARSET_CODEPOINTS(interp, PARROT_const_cast(STRING *, s)) :
+        0;
+
+    return result;
+}
+
+/*
+
 =item C<UINTVAL string_length>
 
 Returns the number of characters in the specified Parrot string.
@@ -962,7 +1012,7 @@
 
 =item C<INTVAL string_compute_strlen>
 
-Calculates and returns the number of characters in the specified Parrot string.
+Returns the number of characters in the specified Parrot string.
 
 =cut
 
@@ -975,8 +1025,7 @@
 {
     PARROT_ASSERT(s);
 
-    s->strlen = CHARSET_CODEPOINTS(interp, s);
-    return s->strlen;
+    return CHARSET_CODEPOINTS(interp, s);
 }
 
 
@@ -1237,7 +1286,7 @@
                 rep->strstart, rep->bufused);
 
         if (diff)
-            (void)string_compute_strlen(interp, src);
+            src->strlen = ENCODING_BYTES(interp, src);
     }
 
     /* Replacement is larger than avail buffer, grow the string */
@@ -1255,7 +1304,7 @@
         mem_sys_memcopy((char *)src->strstart + start_byte, rep->strstart,
                 rep->bufused);
         src->bufused += diff;
-        (void)string_compute_strlen(interp, src);
+        src->strlen = ENCODING_BYTES(interp, src);
     }
 
     /* src is modified, now return the original substring */
@@ -2429,11 +2478,12 @@
 
     /* this also validates the string */
     if (encoding != result->encoding)
-        string_compute_strlen(interp, result);
+        result->strlen = ENCODING_BYTES(interp, result);
 
-    else if (!CHARSET_VALIDATE(interp, result, 0))
+    else if (!CHARSET_VALIDATE(interp, result, 0)) {
         real_exception(interp, NULL, INVALID_STRING_REPRESENTATION,
                 "Malformed string");
+    }
 
     return result;
 }
Index: src/charset/iso-8859-1.c
===================================================================
--- src/charset/iso-8859-1.c	(revisión: 28275)
+++ src/charset/iso-8859-1.c	(copia de trabajo)
@@ -476,7 +476,7 @@
 {
     UINTVAL offset;
 
-    for (offset = 0; offset < string_length(interp, src); ++offset) {
+    for (offset = 0; offset < Parrot_string_length(interp, src); ++offset) {
         const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
         if (codepoint >= 0x100)
             return 0;
Index: src/charset/unicode.c
===================================================================
--- src/charset/unicode.c	(revisión: 28275)
+++ src/charset/unicode.c	(copia de trabajo)
@@ -636,7 +636,7 @@
     String_iter iter;
 
     ENCODING_ITER_INIT(interp, src, &iter);
-    for (offset = 0; offset < string_length(interp, src); ++offset) {
+    for (offset = 0; offset < Parrot_string_length(interp, src); ++offset) {
         const UINTVAL codepoint = iter.get_and_advance(interp, &iter);
         /* Check for Unicode non-characters */
         if (codepoint >= 0xfdd0 &&
Index: src/charset/ascii.c
===================================================================
--- src/charset/ascii.c	(revisión: 28275)
+++ src/charset/ascii.c	(copia de trabajo)
@@ -609,7 +609,7 @@
     String_iter iter;
 
     ENCODING_ITER_INIT(interp, src, &iter);
-    for (offset = 0; offset < string_length(interp, src); ++offset) {
+    for (offset = 0; offset < Parrot_string_length(interp, src); ++offset) {
         const UINTVAL codepoint = iter.get_and_advance(interp, &iter);
         if (codepoint >= 0x80)
             return 0;
Index: src/string_primitives.c
===================================================================
--- src/string_primitives.c	(revisión: 28275)
+++ src/string_primitives.c	(copia de trabajo)
@@ -86,7 +86,7 @@
 {
     UINTVAL workchar = 0;
     UINTVAL charcount = 0;
-    const UINTVAL len = string_length(interp, string);
+    const UINTVAL len = Parrot_string_byte_length(interp, string);
     /* Well, not right now */
     UINTVAL codepoint = CHARSET_GET_BYTE(interp, string, *offset);
     ++*offset;
Index: src/encodings/utf8.c
===================================================================
--- src/encodings/utf8.c	(revisión: 28275)
+++ src/encodings/utf8.c	(copia de trabajo)
@@ -293,7 +293,8 @@
 
     if (c > 0x10FFFF || UNICODE_IS_SURROGATE(c)) {
         real_exception(interp, NULL, INVALID_CHARACTER,
-                           "Invalid character for UTF-8 encoding\n");
+            "Invalid character for UTF-8 encoding: '%x'\n",
+            (unsigned int) c);
     }
 
     while (u8end > u8ptr) {
Index: src/pmc/string.pmc
===================================================================
--- src/pmc/string.pmc	(revisión: 28275)
+++ src/pmc/string.pmc	(copia de trabajo)
@@ -604,7 +604,7 @@
 */
 
     VTABLE INTVAL exists_keyed(PMC *key) {
-        INTVAL n = string_length(INTERP, VTABLE_get_string(INTERP, SELF));
+        INTVAL n = Parrot_string_length(INTERP, VTABLE_get_string(INTERP, SELF));
         INTVAL k = VTABLE_get_integer(INTERP, key);
         return (INTVAL)((k>=0 && k<=n) || (k<0 && -k<=n));
     }
@@ -645,7 +645,7 @@
 
     void set_string_keyed(PMC *key, STRING * const value) {
         STRING * const s   = PMC_str_val(SELF);
-        const INTVAL   len = string_length(INTERP, value);
+        const INTVAL   len = Parrot_string_length(INTERP, value);
         string_replace(INTERP, s, key_integer(INTERP, key), len, value, NULL);
     }
 
@@ -665,8 +665,8 @@
 */
 
     METHOD replace(STRING *orig, STRING *_new) {
-        const INTVAL   old_len = string_length(INTERP, orig);
-        const INTVAL   new_len = string_length(INTERP, _new);
+        const INTVAL   old_len = Parrot_string_length(INTERP, orig);
+        const INTVAL   new_len = Parrot_string_length(INTERP, _new);
         STRING * const s       = VTABLE_get_string(INTERP, SELF);
         INTVAL         i       = 0;
 
@@ -763,7 +763,7 @@
 */
 
     VTABLE INTVAL elements() {
-        return string_length(INTERP, VTABLE_get_string(INTERP, SELF));
+        return Parrot_string_length(INTERP, VTABLE_get_string(INTERP, SELF));
     }
 
     VTABLE PMC *slice(PMC *key, INTVAL f) {
@@ -784,7 +784,7 @@
         PObj_get_FLAGS(key) |= KEY_integer_FLAG;
         PMC_int_val(key)     = 0;
 
-        if (!string_length(INTERP, VTABLE_get_string(INTERP, SELF)))
+        if (!Parrot_string_length(INTERP, VTABLE_get_string(INTERP, SELF)))
             PMC_int_val(key) = -1;
 
         return iter;
@@ -864,7 +864,7 @@
         INTVAL        *tr_data;
         INTVAL         i;
 
-        const INTVAL len = string_length(interp, src);
+        const INTVAL len = Parrot_string_byte_length(interp, src);
 
         if (!len)
             RETURN(void);
@@ -898,7 +898,7 @@
     METHOD reverse(STRING *src) {
         INTVAL         i;
         unsigned char *p;
-        INTVAL         len = string_length(interp, src);
+        INTVAL         len = Parrot_string_byte_length(interp, src);
 
         if (!len)
             RETURN(void);
@@ -930,7 +930,7 @@
     METHOD is_integer(STRING *src) {
         INTVAL         i;
         unsigned char *p;
-        const INTVAL   len = string_length(interp, src);
+        const INTVAL   len = Parrot_string_byte_length(interp, src);
 
         if (!len)
             RETURN(INTVAL 0);
Index: src/pmc/codestring.pmc
===================================================================
--- src/pmc/codestring.pmc	(revisión: 28275)
+++ src/pmc/codestring.pmc	(copia de trabajo)
@@ -110,11 +110,11 @@
         }
 
         (void) string_replace(INTERP, fmt, pos, 2, repl, NULL);
-        replen = string_length(INTERP, repl);
+        replen = Parrot_string_length(INTERP, repl);
     }
 
     /* Add a newline if necessary */
-    if ('\n' != string_index(INTERP, fmt, string_length(interp, fmt) - 1))
+    if ('\n' != string_index(INTERP, fmt, Parrot_string_length(interp, fmt) - 1))
         fmt = string_concat(INTERP, fmt, newline, 0);
 
     S1 = string_concat(INTERP, SELF.get_string(), fmt, 0);
Index: src/pmc/fixedintegerarray.pmc
===================================================================
--- src/pmc/fixedintegerarray.pmc	(revisión: 28275)
+++ src/pmc/fixedintegerarray.pmc	(copia de trabajo)
@@ -67,7 +67,7 @@
         else
             SELF = pmc_new(INTERP, type);
 
-        l = string_length(INTERP, rep);
+        l = Parrot_string_byte_length(INTERP, rep);
 
         if (!l)
             return SELF;
Index: src/pmc/namespace.pmc
===================================================================
--- src/pmc/namespace.pmc	(revisión: 28275)
+++ src/pmc/namespace.pmc	(copia de trabajo)
@@ -180,7 +180,7 @@
                 if (string_str_index(interp, key,
                     CONST_STRING(interp, "__"), 0) == 0) {
                     STRING * const meth_name = string_substr(interp, key, 2,
-                        string_length(interp, key) - 2, NULL, 0);
+                        Parrot_string_length(interp, key) - 2, NULL, 0);
                     sub->vtable_index        =
                         Parrot_get_vtable_index(interp, meth_name);
                 }
Index: src/pmc/nci.pmc
===================================================================
--- src/pmc/nci.pmc	(revisión: 28275)
+++ src/pmc/nci.pmc	(copia de trabajo)
@@ -97,7 +97,7 @@
         nci_info->signature  = key;
 
         /* Arity is length of that string minus one (the return type). */
-        nci_info->arity      = string_length(INTERP, key) - 1;
+        nci_info->arity      = Parrot_string_length(INTERP, key) - 1;
 
         /* Build call function. */
         nci_info->func       = (PMC *)(build_call_func(INTERP, SELF, key));
Index: src/pmc/default.pmc
===================================================================
--- src/pmc/default.pmc	(revisión: 28275)
+++ src/pmc/default.pmc	(copia de trabajo)
@@ -240,17 +240,17 @@
         if (pos < 0)
             return 0;
 
-        if (pos >= (INTVAL)string_length(interp, what))
+        if (pos >= (INTVAL)Parrot_string_length(interp, what))
             return 0;
 
-        len = string_length(interp, method);
+        len = Parrot_string_length(interp, method);
 
         if (pos && string_index(interp, what, pos - 1) != 32) {
             pos += len;
             continue;
         }
 
-        if (pos+len < (INTVAL)string_length(interp, what) &&
+        if (pos+len < (INTVAL)Parrot_string_length(interp, what) &&
             string_index(interp, what, pos + len) != 32) {
             pos += len;
             continue;
Index: src/pmc/parrotio.pmc
===================================================================
--- src/pmc/parrotio.pmc	(revisión: 28275)
+++ src/pmc/parrotio.pmc	(copia de trabajo)
@@ -243,7 +243,7 @@
                 RETURN(PMC *PMCNULL);
 
             /* readline should better return the string w/o NL */
-            len = string_length(INTERP, res);
+            len = Parrot_string_byte_length(INTERP, res);
             while (len &&
                     (((char*)res->strstart)[len - 1] == '\n' ||
                      ((char*)res->strstart)[len - 1] == '\r')) {
@@ -312,7 +312,7 @@
             do {
                 STRING * const part = PIO_reads(INTERP, SELF, 4096);
 
-                if (string_length(INTERP, part) == 0)
+                if (Parrot_string_byte_length(INTERP, part) == 0)
                     break;
                 result = string_append(INTERP, result, part);
             } while (1);
Index: src/library.c
===================================================================
--- src/library.c	(revisión: 28275)
+++ src/library.c	(copia de trabajo)
@@ -538,7 +538,7 @@
     for (i = 0; i < n; ++i) {
         STRING * const path = VTABLE_get_string_keyed_int(interp, paths, i);
 
-        if (string_length(interp, prefix) && !is_abs_path(path))
+        if (Parrot_string_length(interp, prefix) && !is_abs_path(path))
             full_name = path_concat(interp, prefix, path);
         else
             full_name = string_copy(interp, path);
@@ -650,7 +650,7 @@
     STRING * const slash1 = CONST_STRING(interp, "/");
     STRING * const slash2 = CONST_STRING(interp, "\\");
     STRING * const dot    = CONST_STRING(interp, ".");
-    const INTVAL len = string_length(interp, in);
+    const INTVAL len = Parrot_string_length(interp, in);
     STRING *stem;
     INTVAL pos_sl, pos_dot;
 
Index: src/io/io_string.c
===================================================================
--- src/io/io_string.c	(revisión: 28275)
+++ src/io/io_string.c	(copia de trabajo)
@@ -152,7 +152,7 @@
     }
 
     l->self = string_append(interp, old_string, s);
-    return string_length(interp, (STRING *)l->self);
+    return Parrot_string_byte_length(interp, (STRING *)l->self);
 }
 
 /*
Index: include/parrot/string_funcs.h
===================================================================
--- include/parrot/string_funcs.h	(revisión: 28275)
+++ include/parrot/string_funcs.h	(copia de trabajo)
@@ -312,6 +312,18 @@
         __attribute__nonnull__(3);
 
 PARROT_API
+PARROT_WARN_UNUSED_RESULT
+UINTVAL
+Parrot_string_byte_length(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
+        __attribute__nonnull__(1);
+
+PARROT_API
+PARROT_WARN_UNUSED_RESULT
+UINTVAL
+Parrot_string_length(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
+        __attribute__nonnull__(1);
+
+PARROT_API
 PARROT_PURE_FUNCTION
 UINTVAL string_length(SHIM_INTERP, ARGIN(const STRING *s))
         __attribute__nonnull__(2);

Reply via email to