# New Ticket Created by NotFound # Please include the string: [perl #55712] # in the subject line of all future correspondence about this issue. # <URL: http://rt.perl.org/rt3/Ticket/Display.html?id=55712 >
This patch adds the functions Parrot_string_length and Parrot_string_byte_length described in pdd28 and uses them instead of string_length in a lot of places. Also modifies the string_length and string_compute_strlen to give them his intended meaning, to help transition. Also makes some cleaning in encoding related parts. The intention is to start approaching to pdd28, and make easier later steps in that direction. -- Salu2
Index: src/oo.c =================================================================== --- src/oo.c (revisión: 28275) +++ src/oo.c (copia de trabajo) @@ -155,7 +155,7 @@ if (string_str_index(interp, vtable_name, CONST_STRING(interp, "__"), 0) == 0) { vtable_name = string_substr(interp, vtable_name, 2, - string_length(interp, vtable_name) - 2, NULL, 0); + Parrot_string_length(interp, vtable_name) - 2, NULL, 0); } VTABLE_add_vtable_override(interp, self, vtable_name, vtable_sub); @@ -834,7 +834,7 @@ object, meth_str, "v"); } else if (meth_str != NULL && - string_length(interp, meth_str) != 0 && !default_meth) { + Parrot_string_byte_length(interp, meth_str) != 0 && !default_meth) { real_exception(interp, NULL, METH_NOT_FOUND, "Class BUILD method ('%Ss') not found", meth_str); } Index: src/ops/string.ops =================================================================== --- src/ops/string.ops (revisión: 28275) +++ src/ops/string.ops (copia de trabajo) @@ -153,19 +153,11 @@ =cut inline op length(out INT, in STR) :base_mem { - $1 = $2 ? string_length(interp, $2) : 0; + $1 = Parrot_string_length(interp, $2); } inline op bytelength(out INT, in STR) :base_mem { - UINTVAL n; - STRING * const s = $2; - if (!s) - n = 0; - else { - n = s->bufused; - PARROT_ASSERT(n == ENCODING_BYTES(interp, $2)); - } - $1 = n; + $1 = Parrot_string_byte_length(interp, $2); } @@ -228,7 +220,7 @@ =cut inline op substr(out STR, in STR, in INT) :base_core { - const INTVAL len = string_length(interp, $2); + const INTVAL len = Parrot_string_length(interp, $2); $1 = string_substr(interp, $2, $3, len, &$1, 0); } Index: src/ops/io.ops =================================================================== --- src/ops/io.ops (revisión: 28275) +++ src/ops/io.ops (copia de trabajo) @@ -187,7 +187,7 @@ op print(in STR) :base_io { STRING * const s = $1; - if (s && string_length(interp, s)) { + if (s && Parrot_string_byte_length(interp, s)) { PIO_putps(interp, _PIO_STDOUT(interp), s); } } @@ -224,7 +224,7 @@ op printerr(in STR) :base_io { STRING * const s = $1; - if (s && string_length(interp, s)) + if (s && Parrot_string_byte_length(interp, s)) PIO_putps(interp, _PIO_STDERR(interp), s); } Index: src/string.c =================================================================== --- src/string.c (revisión: 28275) +++ src/string.c (copia de trabajo) @@ -716,10 +716,7 @@ PObj_bufstart(s) = s->strstart = PARROT_const_cast(char *, buffer); PObj_buflen(s) = s->bufused = len; - if (encoding == Parrot_fixed_8_encoding_ptr) - s->strlen = len; - else - string_compute_strlen(interp, s); + s->strlen = len; return s; } @@ -729,10 +726,7 @@ if (buffer) { mem_sys_memcopy(s->strstart, buffer, len); s->bufused = len; - if (encoding == Parrot_fixed_8_encoding_ptr) - s->strlen = len; - else - string_compute_strlen(interp, s); + s->strlen = len; } else { s->strlen = s->bufused = 0; @@ -774,6 +768,62 @@ =over 4 +=cut + +*/ + +/* + +=item C<UINTVAL Parrot_string_byte_length> + +Returns the number of bytes in the specified Parrot string. +The character width of variable-width encodings is ignored. +Combining characteres are no treated differently than other +characters. + +=cut + +*/ + +PARROT_API +PARROT_WARN_UNUSED_RESULT +UINTVAL +Parrot_string_byte_length(PARROT_INTERP, ARGIN_NULLOK(const STRING *s)) +{ + /* + return s ? s->strlen : 0; + */ + return s ? s->bufused : 0; +} + +/* + +=item C<UINTVAL Parrot_string_length> + +Returns the number of characters in the specified Parrot string. +Combining characters are each counted separately. +Variable-width encodings may lookahead + +=cut + +*/ + +PARROT_API +PARROT_WARN_UNUSED_RESULT +UINTVAL +Parrot_string_length(PARROT_INTERP, ARGIN_NULLOK(const STRING *s)) +{ + DECL_CONST_CAST; + + UINTVAL result = s ? + CHARSET_CODEPOINTS(interp, PARROT_const_cast(STRING *, s)) : + 0; + + return result; +} + +/* + =item C<UINTVAL string_length> Returns the number of characters in the specified Parrot string. @@ -962,7 +1012,7 @@ =item C<INTVAL string_compute_strlen> -Calculates and returns the number of characters in the specified Parrot string. +Returns the number of characters in the specified Parrot string. =cut @@ -975,8 +1025,7 @@ { PARROT_ASSERT(s); - s->strlen = CHARSET_CODEPOINTS(interp, s); - return s->strlen; + return CHARSET_CODEPOINTS(interp, s); } @@ -1237,7 +1286,7 @@ rep->strstart, rep->bufused); if (diff) - (void)string_compute_strlen(interp, src); + src->strlen = ENCODING_BYTES(interp, src); } /* Replacement is larger than avail buffer, grow the string */ @@ -1255,7 +1304,7 @@ mem_sys_memcopy((char *)src->strstart + start_byte, rep->strstart, rep->bufused); src->bufused += diff; - (void)string_compute_strlen(interp, src); + src->strlen = ENCODING_BYTES(interp, src); } /* src is modified, now return the original substring */ @@ -2429,11 +2478,12 @@ /* this also validates the string */ if (encoding != result->encoding) - string_compute_strlen(interp, result); + result->strlen = ENCODING_BYTES(interp, result); - else if (!CHARSET_VALIDATE(interp, result, 0)) + else if (!CHARSET_VALIDATE(interp, result, 0)) { real_exception(interp, NULL, INVALID_STRING_REPRESENTATION, "Malformed string"); + } return result; } Index: src/charset/iso-8859-1.c =================================================================== --- src/charset/iso-8859-1.c (revisión: 28275) +++ src/charset/iso-8859-1.c (copia de trabajo) @@ -476,7 +476,7 @@ { UINTVAL offset; - for (offset = 0; offset < string_length(interp, src); ++offset) { + for (offset = 0; offset < Parrot_string_length(interp, src); ++offset) { const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, offset); if (codepoint >= 0x100) return 0; Index: src/charset/unicode.c =================================================================== --- src/charset/unicode.c (revisión: 28275) +++ src/charset/unicode.c (copia de trabajo) @@ -636,7 +636,7 @@ String_iter iter; ENCODING_ITER_INIT(interp, src, &iter); - for (offset = 0; offset < string_length(interp, src); ++offset) { + for (offset = 0; offset < Parrot_string_length(interp, src); ++offset) { const UINTVAL codepoint = iter.get_and_advance(interp, &iter); /* Check for Unicode non-characters */ if (codepoint >= 0xfdd0 && Index: src/charset/ascii.c =================================================================== --- src/charset/ascii.c (revisión: 28275) +++ src/charset/ascii.c (copia de trabajo) @@ -609,7 +609,7 @@ String_iter iter; ENCODING_ITER_INIT(interp, src, &iter); - for (offset = 0; offset < string_length(interp, src); ++offset) { + for (offset = 0; offset < Parrot_string_length(interp, src); ++offset) { const UINTVAL codepoint = iter.get_and_advance(interp, &iter); if (codepoint >= 0x80) return 0; Index: src/string_primitives.c =================================================================== --- src/string_primitives.c (revisión: 28275) +++ src/string_primitives.c (copia de trabajo) @@ -86,7 +86,7 @@ { UINTVAL workchar = 0; UINTVAL charcount = 0; - const UINTVAL len = string_length(interp, string); + const UINTVAL len = Parrot_string_byte_length(interp, string); /* Well, not right now */ UINTVAL codepoint = CHARSET_GET_BYTE(interp, string, *offset); ++*offset; Index: src/encodings/utf8.c =================================================================== --- src/encodings/utf8.c (revisión: 28275) +++ src/encodings/utf8.c (copia de trabajo) @@ -293,7 +293,8 @@ if (c > 0x10FFFF || UNICODE_IS_SURROGATE(c)) { real_exception(interp, NULL, INVALID_CHARACTER, - "Invalid character for UTF-8 encoding\n"); + "Invalid character for UTF-8 encoding: '%x'\n", + (unsigned int) c); } while (u8end > u8ptr) { Index: src/pmc/string.pmc =================================================================== --- src/pmc/string.pmc (revisión: 28275) +++ src/pmc/string.pmc (copia de trabajo) @@ -604,7 +604,7 @@ */ VTABLE INTVAL exists_keyed(PMC *key) { - INTVAL n = string_length(INTERP, VTABLE_get_string(INTERP, SELF)); + INTVAL n = Parrot_string_length(INTERP, VTABLE_get_string(INTERP, SELF)); INTVAL k = VTABLE_get_integer(INTERP, key); return (INTVAL)((k>=0 && k<=n) || (k<0 && -k<=n)); } @@ -645,7 +645,7 @@ void set_string_keyed(PMC *key, STRING * const value) { STRING * const s = PMC_str_val(SELF); - const INTVAL len = string_length(INTERP, value); + const INTVAL len = Parrot_string_length(INTERP, value); string_replace(INTERP, s, key_integer(INTERP, key), len, value, NULL); } @@ -665,8 +665,8 @@ */ METHOD replace(STRING *orig, STRING *_new) { - const INTVAL old_len = string_length(INTERP, orig); - const INTVAL new_len = string_length(INTERP, _new); + const INTVAL old_len = Parrot_string_length(INTERP, orig); + const INTVAL new_len = Parrot_string_length(INTERP, _new); STRING * const s = VTABLE_get_string(INTERP, SELF); INTVAL i = 0; @@ -763,7 +763,7 @@ */ VTABLE INTVAL elements() { - return string_length(INTERP, VTABLE_get_string(INTERP, SELF)); + return Parrot_string_length(INTERP, VTABLE_get_string(INTERP, SELF)); } VTABLE PMC *slice(PMC *key, INTVAL f) { @@ -784,7 +784,7 @@ PObj_get_FLAGS(key) |= KEY_integer_FLAG; PMC_int_val(key) = 0; - if (!string_length(INTERP, VTABLE_get_string(INTERP, SELF))) + if (!Parrot_string_length(INTERP, VTABLE_get_string(INTERP, SELF))) PMC_int_val(key) = -1; return iter; @@ -864,7 +864,7 @@ INTVAL *tr_data; INTVAL i; - const INTVAL len = string_length(interp, src); + const INTVAL len = Parrot_string_byte_length(interp, src); if (!len) RETURN(void); @@ -898,7 +898,7 @@ METHOD reverse(STRING *src) { INTVAL i; unsigned char *p; - INTVAL len = string_length(interp, src); + INTVAL len = Parrot_string_byte_length(interp, src); if (!len) RETURN(void); @@ -930,7 +930,7 @@ METHOD is_integer(STRING *src) { INTVAL i; unsigned char *p; - const INTVAL len = string_length(interp, src); + const INTVAL len = Parrot_string_byte_length(interp, src); if (!len) RETURN(INTVAL 0); Index: src/pmc/codestring.pmc =================================================================== --- src/pmc/codestring.pmc (revisión: 28275) +++ src/pmc/codestring.pmc (copia de trabajo) @@ -110,11 +110,11 @@ } (void) string_replace(INTERP, fmt, pos, 2, repl, NULL); - replen = string_length(INTERP, repl); + replen = Parrot_string_length(INTERP, repl); } /* Add a newline if necessary */ - if ('\n' != string_index(INTERP, fmt, string_length(interp, fmt) - 1)) + if ('\n' != string_index(INTERP, fmt, Parrot_string_length(interp, fmt) - 1)) fmt = string_concat(INTERP, fmt, newline, 0); S1 = string_concat(INTERP, SELF.get_string(), fmt, 0); Index: src/pmc/fixedintegerarray.pmc =================================================================== --- src/pmc/fixedintegerarray.pmc (revisión: 28275) +++ src/pmc/fixedintegerarray.pmc (copia de trabajo) @@ -67,7 +67,7 @@ else SELF = pmc_new(INTERP, type); - l = string_length(INTERP, rep); + l = Parrot_string_byte_length(INTERP, rep); if (!l) return SELF; Index: src/pmc/namespace.pmc =================================================================== --- src/pmc/namespace.pmc (revisión: 28275) +++ src/pmc/namespace.pmc (copia de trabajo) @@ -180,7 +180,7 @@ if (string_str_index(interp, key, CONST_STRING(interp, "__"), 0) == 0) { STRING * const meth_name = string_substr(interp, key, 2, - string_length(interp, key) - 2, NULL, 0); + Parrot_string_length(interp, key) - 2, NULL, 0); sub->vtable_index = Parrot_get_vtable_index(interp, meth_name); } Index: src/pmc/nci.pmc =================================================================== --- src/pmc/nci.pmc (revisión: 28275) +++ src/pmc/nci.pmc (copia de trabajo) @@ -97,7 +97,7 @@ nci_info->signature = key; /* Arity is length of that string minus one (the return type). */ - nci_info->arity = string_length(INTERP, key) - 1; + nci_info->arity = Parrot_string_length(INTERP, key) - 1; /* Build call function. */ nci_info->func = (PMC *)(build_call_func(INTERP, SELF, key)); Index: src/pmc/default.pmc =================================================================== --- src/pmc/default.pmc (revisión: 28275) +++ src/pmc/default.pmc (copia de trabajo) @@ -240,17 +240,17 @@ if (pos < 0) return 0; - if (pos >= (INTVAL)string_length(interp, what)) + if (pos >= (INTVAL)Parrot_string_length(interp, what)) return 0; - len = string_length(interp, method); + len = Parrot_string_length(interp, method); if (pos && string_index(interp, what, pos - 1) != 32) { pos += len; continue; } - if (pos+len < (INTVAL)string_length(interp, what) && + if (pos+len < (INTVAL)Parrot_string_length(interp, what) && string_index(interp, what, pos + len) != 32) { pos += len; continue; Index: src/pmc/parrotio.pmc =================================================================== --- src/pmc/parrotio.pmc (revisión: 28275) +++ src/pmc/parrotio.pmc (copia de trabajo) @@ -243,7 +243,7 @@ RETURN(PMC *PMCNULL); /* readline should better return the string w/o NL */ - len = string_length(INTERP, res); + len = Parrot_string_byte_length(INTERP, res); while (len && (((char*)res->strstart)[len - 1] == '\n' || ((char*)res->strstart)[len - 1] == '\r')) { @@ -312,7 +312,7 @@ do { STRING * const part = PIO_reads(INTERP, SELF, 4096); - if (string_length(INTERP, part) == 0) + if (Parrot_string_byte_length(INTERP, part) == 0) break; result = string_append(INTERP, result, part); } while (1); Index: src/library.c =================================================================== --- src/library.c (revisión: 28275) +++ src/library.c (copia de trabajo) @@ -538,7 +538,7 @@ for (i = 0; i < n; ++i) { STRING * const path = VTABLE_get_string_keyed_int(interp, paths, i); - if (string_length(interp, prefix) && !is_abs_path(path)) + if (Parrot_string_length(interp, prefix) && !is_abs_path(path)) full_name = path_concat(interp, prefix, path); else full_name = string_copy(interp, path); @@ -650,7 +650,7 @@ STRING * const slash1 = CONST_STRING(interp, "/"); STRING * const slash2 = CONST_STRING(interp, "\\"); STRING * const dot = CONST_STRING(interp, "."); - const INTVAL len = string_length(interp, in); + const INTVAL len = Parrot_string_length(interp, in); STRING *stem; INTVAL pos_sl, pos_dot; Index: src/io/io_string.c =================================================================== --- src/io/io_string.c (revisión: 28275) +++ src/io/io_string.c (copia de trabajo) @@ -152,7 +152,7 @@ } l->self = string_append(interp, old_string, s); - return string_length(interp, (STRING *)l->self); + return Parrot_string_byte_length(interp, (STRING *)l->self); } /* Index: include/parrot/string_funcs.h =================================================================== --- include/parrot/string_funcs.h (revisión: 28275) +++ include/parrot/string_funcs.h (copia de trabajo) @@ -312,6 +312,18 @@ __attribute__nonnull__(3); PARROT_API +PARROT_WARN_UNUSED_RESULT +UINTVAL +Parrot_string_byte_length(PARROT_INTERP, ARGIN_NULLOK(const STRING *s)) + __attribute__nonnull__(1); + +PARROT_API +PARROT_WARN_UNUSED_RESULT +UINTVAL +Parrot_string_length(PARROT_INTERP, ARGIN_NULLOK(const STRING *s)) + __attribute__nonnull__(1); + +PARROT_API PARROT_PURE_FUNCTION UINTVAL string_length(SHIM_INTERP, ARGIN(const STRING *s)) __attribute__nonnull__(2);