details: https://hg.nginx.org/njs/rev/8170f061bbde branches: changeset: 2066:8170f061bbde user: Dmitry Volyntsev <xei...@nginx.com> date: Tue Mar 07 20:35:00 2023 -0800 description: Introduced njs_string_offset() which support any string.
diffstat: src/njs_iterator.c | 2 +- src/njs_json.c | 7 +----- src/njs_parser.c | 5 ++- src/njs_regexp.c | 16 ++++++++------ src/njs_string.c | 57 +++++++++++++++-------------------------------------- src/njs_string.h | 18 +++++++++++++++- 6 files changed, 46 insertions(+), 59 deletions(-) diffs (297 lines): diff -r e3a609ff9001 -r 8170f061bbde src/njs_iterator.c --- a/src/njs_iterator.c Fri Mar 03 18:50:23 2023 -0800 +++ b/src/njs_iterator.c Tue Mar 07 20:35:00 2023 -0800 @@ -558,7 +558,7 @@ njs_object_iterate_reverse(njs_vm_t *vm, i = from + 1; if (i > to) { - p = njs_string_offset(string_prop.start, end, from); + p = njs_string_utf8_offset(string_prop.start, end, from); p = njs_utf8_next(p, end); } diff -r e3a609ff9001 -r 8170f061bbde src/njs_json.c --- a/src/njs_json.c Fri Mar 03 18:50:23 2023 -0800 +++ b/src/njs_json.c Tue Mar 07 20:35:00 2023 -0800 @@ -233,12 +233,7 @@ njs_json_stringify(njs_vm_t *vm, njs_val return NJS_ERROR; } - if (length > 10) { - p = njs_string_offset(prop.start, prop.start + prop.size, 10); - - } else { - p = prop.start + prop.size; - } + p = njs_string_offset(&prop, njs_min(length, 10)); stringify->space.start = prop.start; stringify->space.length = p - prop.start; diff -r e3a609ff9001 -r 8170f061bbde src/njs_parser.c --- a/src/njs_parser.c Fri Mar 03 18:50:23 2023 -0800 +++ b/src/njs_parser.c Tue Mar 07 20:35:00 2023 -0800 @@ -8589,7 +8589,8 @@ njs_parser_string_create(njs_vm_t *vm, n njs_decode_utf8(&dst, &token->text); if (length > NJS_STRING_MAP_STRIDE && dst.length != length) { - njs_string_offset_map_init(value->long_string.data->start, dst.length); + njs_string_utf8_offset_map_init(value->long_string.data->start, + dst.length); } return NJS_OK; @@ -8833,7 +8834,7 @@ next_char: } if (length > NJS_STRING_MAP_STRIDE && length != size) { - njs_string_offset_map_init(start, size); + njs_string_utf8_offset_map_init(start, size); } return NJS_TOKEN_STRING; diff -r e3a609ff9001 -r 8170f061bbde src/njs_regexp.c --- a/src/njs_regexp.c Fri Mar 03 18:50:23 2023 -0800 +++ b/src/njs_regexp.c Tue Mar 07 20:35:00 2023 -0800 @@ -891,9 +891,9 @@ njs_regexp_builtin_exec(njs_vm_t *vm, nj offset = last_index; } else { - /* UTF-8 string. */ - offset = njs_string_offset(string.start, string.start + string.size, - last_index) - string.start; + offset = njs_string_utf8_offset(string.start, + string.start + string.size, last_index) + - string.start; } ret = njs_regexp_match(vm, &pattern->regex[type], string.start, offset, @@ -1360,7 +1360,8 @@ njs_regexp_prototype_symbol_replace(njs_ if ((size_t) length != s.size) { /* UTF-8 string. */ - pos = njs_string_offset(s.start, s.start + s.size, pos) - s.start; + pos = njs_string_utf8_offset(s.start, s.start + s.size, pos) + - s.start; } pos = njs_max(njs_min(pos, (int64_t) s.size), 0); @@ -1643,8 +1644,8 @@ njs_regexp_prototype_symbol_split(njs_vm } if (utf8 == NJS_STRING_UTF8) { - start = njs_string_offset(s.start, s.start + s.size, p); - end = njs_string_offset(s.start, s.start + s.size, q); + start = njs_string_utf8_offset(s.start, s.start + s.size, p); + end = njs_string_utf8_offset(s.start, s.start + s.size, q); } else { start = &s.start[p]; @@ -1691,7 +1692,8 @@ njs_regexp_prototype_symbol_split(njs_vm end = &s.start[s.size]; if (utf8 == NJS_STRING_UTF8) { - start = (p < length) ? njs_string_offset(s.start, s.start + s.size, p) + start = (p < length) ? njs_string_utf8_offset(s.start, s.start + s.size, + p) : end; } else { diff -r e3a609ff9001 -r 8170f061bbde src/njs_string.c --- a/src/njs_string.c Fri Mar 03 18:50:23 2023 -0800 +++ b/src/njs_string.c Tue Mar 07 20:35:00 2023 -0800 @@ -1146,7 +1146,7 @@ njs_string_prototype_to_bytes(njs_vm_t * /* UTF-8 string. */ end = string.start + string.size; - s = njs_string_offset(string.start, end, slice.start); + s = njs_string_utf8_offset(string.start, end, slice.start); length = slice.length; @@ -1503,7 +1503,7 @@ njs_string_slice_string_prop(njs_string_ end = start + string->size; if (slice->start < slice->string_length) { - start = njs_string_offset(start, end, slice->start); + start = njs_string_utf8_offset(start, end, slice->start); /* Evaluate size of the slice in bytes and adjust length. */ p = start; @@ -1584,9 +1584,8 @@ njs_string_prototype_char_code_at(njs_vm } else { njs_utf8_decode_init(&ctx); - /* UTF-8 string. */ end = string.start + string.size; - start = njs_string_offset(string.start, end, index); + start = njs_string_utf8_offset(string.start, end, index); code = njs_utf8_decode(&ctx, &start, end); } @@ -2151,7 +2150,7 @@ njs_string_index_of(njs_string_prop_t *s } else { /* UTF-8 string. */ - p = njs_string_offset(string->start, end, index); + p = njs_string_utf8_offset(string->start, end, index); end -= search->size - 1; while (p < end) { @@ -2296,7 +2295,7 @@ njs_string_prototype_last_index_of(njs_v goto done; } - p = njs_string_offset(string.start, end, index); + p = njs_string_utf8_offset(string.start, end, index); for (; p >= string.start; p = njs_utf8_prev(p)) { if ((p + s.size) <= end && memcmp(p, s.start, s.size) == 0) { @@ -2376,15 +2375,7 @@ njs_string_prototype_includes(njs_vm_t * if (length - index >= search_length) { end = string.start + string.size; - - if (string.size == (size_t) length) { - /* Byte or ASCII string. */ - p = string.start + index; - - } else { - /* UTF-8 string. */ - p = njs_string_offset(string.start, end, index); - } + p = njs_string_offset(&string, index); end -= search.size - 1; @@ -2482,15 +2473,7 @@ njs_string_prototype_starts_or_ends_with } end = string.start + string.size; - - if (string.size == (size_t) length) { - /* Byte or ASCII string. */ - p = string.start + index; - - } else { - /* UTF-8 string. */ - p = njs_string_offset(string.start, end, index); - } + p = njs_string_offset(&string, index); if ((size_t) (end - p) >= search.size && memcmp(p, search.start, search.size) == 0) @@ -2512,11 +2495,11 @@ done: /* - * njs_string_offset() assumes that index is correct. + * njs_string_utf8_offset() assumes that index is correct. */ const u_char * -njs_string_offset(const u_char *start, const u_char *end, size_t index) +njs_string_utf8_offset(const u_char *start, const u_char *end, size_t index) { uint32_t *map; njs_uint_t skip; @@ -2525,7 +2508,7 @@ njs_string_offset(const u_char *start, c map = njs_string_map_start(end); if (map[0] == 0) { - njs_string_offset_map_init(start, end - start); + njs_string_utf8_offset_map_init(start, end - start); } start += map[index / NJS_STRING_MAP_STRIDE - 1]; @@ -2562,7 +2545,7 @@ njs_string_index(njs_string_prop_t *stri map = njs_string_map_start(end); if (map[0] == 0) { - njs_string_offset_map_init(string->start, string->size); + njs_string_utf8_offset_map_init(string->start, string->size); } while (index + NJS_STRING_MAP_STRIDE < string->length @@ -2587,7 +2570,7 @@ njs_string_index(njs_string_prop_t *stri void -njs_string_offset_map_init(const u_char *start, size_t size) +njs_string_utf8_offset_map_init(const u_char *start, size_t size) { size_t offset; uint32_t *map; @@ -3055,7 +3038,7 @@ njs_string_prototype_pad(njs_vm_t *vm, n if (pad_string.size != (size_t) pad_length) { /* UTF-8 string. */ end = pad_string.start + pad_string.size; - end = njs_string_offset(pad_string.start, end, trunc); + end = njs_string_utf8_offset(pad_string.start, end, trunc); trunc = end - pad_string.start; padding = pad_string.size * n + trunc; @@ -3799,14 +3782,7 @@ njs_string_prototype_replace(njs_vm_t *v } } - if (njs_is_byte_or_ascii_string(&string)) { - p = string.start + pos; - - } else { - /* UTF-8 string. */ - p = njs_string_offset(string.start, string.start + string.size, - pos); - } + p = njs_string_offset(&string, pos); (void) njs_string_prop(&ret_string, &retval); @@ -3867,9 +3843,8 @@ njs_string_prototype_replace(njs_vm_t *v p = string.start + pos; } else { - /* UTF-8 string. */ - p = njs_string_offset(string.start, string.start + string.size, - pos); + p = njs_string_utf8_offset(string.start, string.start + string.size, + pos); } (void) njs_string_prop(&ret_string, &retval); diff -r e3a609ff9001 -r 8170f061bbde src/njs_string.h --- a/src/njs_string.h Fri Mar 03 18:50:23 2023 -0800 +++ b/src/njs_string.h Tue Mar 07 20:35:00 2023 -0800 @@ -243,10 +243,10 @@ void njs_string_slice_string_prop(njs_st const njs_string_prop_t *string, const njs_slice_prop_t *slice); njs_int_t njs_string_slice(njs_vm_t *vm, njs_value_t *dst, const njs_string_prop_t *string, const njs_slice_prop_t *slice); -const u_char *njs_string_offset(const u_char *start, const u_char *end, +const u_char *njs_string_utf8_offset(const u_char *start, const u_char *end, size_t index); uint32_t njs_string_index(njs_string_prop_t *string, uint32_t offset); -void njs_string_offset_map_init(const u_char *start, size_t size); +void njs_string_utf8_offset_map_init(const u_char *start, size_t size); double njs_string_to_index(const njs_value_t *value); njs_int_t njs_string_encode_uri(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs, njs_index_t component); @@ -266,6 +266,20 @@ njs_int_t njs_string_get_substitution(nj njs_value_t *groups, njs_value_t *replacement, njs_value_t *retval); +njs_inline const u_char * +njs_string_offset(njs_string_prop_t *string, int64_t index) +{ + if (njs_is_byte_or_ascii_string(string)) { + return string->start + index; + } + + /* UTF-8 string. */ + + return njs_string_utf8_offset(string->start, string->start + string->size, + index); +} + + extern const njs_object_init_t njs_string_instance_init; extern const njs_object_type_init_t njs_string_type_init; _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org https://mailman.nginx.org/mailman/listinfo/nginx-devel