details: http://hg.nginx.org/njs/rev/fec0d8dfa38c branches: changeset: 183:fec0d8dfa38c user: Igor Sysoev <i...@sysoev.ru> date: Mon Sep 26 14:01:45 2016 +0300 description: String processing unification using njs_string_length(), njs_utf8_t, and njs_regexp_utf8_t.
diffstat: njs/njs_regexp.c | 62 ++++++++++++++----------------------- njs/njs_string.c | 92 ++++++++++++++++--------------------------------------- njs/njs_string.h | 2 +- njs/njs_vm.c | 3 +- 4 files changed, 54 insertions(+), 105 deletions(-) diffs (383 lines): diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_regexp.c --- a/njs/njs_regexp.c Mon Sep 26 14:01:39 2016 +0300 +++ b/njs/njs_regexp.c Mon Sep 26 14:01:45 2016 +0300 @@ -44,7 +44,7 @@ static u_char *njs_regexp_compile_trace_ static u_char *njs_regexp_match_trace_handler(nxt_trace_t *trace, nxt_trace_data_t *td, u_char *start); static njs_ret_t njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, - u_char *string, nxt_regex_match_data_t *match_data, nxt_uint_t utf8); + njs_utf8_t utf8, u_char *string, nxt_regex_match_data_t *match_data); static njs_ret_t njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value, u_char *start, uint32_t size, int32_t length); @@ -539,7 +539,8 @@ static njs_ret_t njs_regexp_prototype_source(njs_vm_t *vm, njs_value_t *value) { u_char *source; - size_t length, size; + int32_t length; + uint32_t size; njs_regexp_pattern_t *pattern; pattern = value->data.u.regexp->pattern; @@ -558,7 +559,8 @@ njs_regexp_prototype_to_string(njs_vm_t nxt_uint_t nargs, njs_index_t unused) { u_char *source; - size_t length, size; + int32_t length; + uint32_t size; njs_regexp_pattern_t *pattern; pattern = args[0].data.u.regexp->pattern; @@ -624,10 +626,11 @@ njs_regexp_prototype_exec(njs_vm_t *vm, njs_index_t unused) { njs_ret_t ret; - nxt_uint_t n, utf8; + njs_utf8_t utf8; njs_value_t *value; njs_regexp_t *regexp; njs_string_prop_t string; + njs_regexp_utf8_t type; njs_regexp_pattern_t *pattern; nxt_regex_match_data_t *match_data; @@ -648,38 +651,35 @@ njs_regexp_prototype_exec(njs_vm_t *vm, (void) njs_string_prop(&string, value); - /* Byte string. */ - utf8 = 0; - n = 0; + utf8 = NJS_STRING_BYTE; + type = NJS_REGEXP_BYTE; if (string.length != 0) { - /* ASCII string. */ - utf8 = 1; - n = 1; + utf8 = NJS_STRING_ASCII; + type = NJS_REGEXP_UTF8; if (string.length != string.size) { - /* UTF-8 string. */ - utf8 = 2; + utf8 = NJS_STRING_UTF8; } } pattern = regexp->pattern; - if (nxt_regex_is_valid(&pattern->regex[n])) { + if (nxt_regex_is_valid(&pattern->regex[type])) { string.start += regexp->last_index; string.size -= regexp->last_index; - match_data = nxt_regex_match_data(&pattern->regex[n], + match_data = nxt_regex_match_data(&pattern->regex[type], vm->regex_context); if (nxt_slow_path(match_data == NULL)) { return NXT_ERROR; } - ret = njs_regexp_match(vm, &pattern->regex[n], string.start, + ret = njs_regexp_match(vm, &pattern->regex[type], string.start, string.size, match_data); if (ret >= 0) { - return njs_regexp_exec_result(vm, regexp, string.start, match_data, - utf8); + return njs_regexp_exec_result(vm, regexp, utf8, string.start, + match_data); } if (nxt_slow_path(ret != NXT_REGEX_NOMATCH)) { @@ -697,8 +697,8 @@ njs_regexp_prototype_exec(njs_vm_t *vm, static njs_ret_t -njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, u_char *string, - nxt_regex_match_data_t *match_data, nxt_uint_t utf8) +njs_regexp_exec_result(njs_vm_t *vm, njs_regexp_t *regexp, njs_utf8_t utf8, + u_char *string, nxt_regex_match_data_t *match_data) { int *captures; u_char *start; @@ -726,20 +726,10 @@ njs_regexp_exec_result(njs_vm_t *vm, njs start = &string[captures[n]]; size = captures[n + 1] - captures[n]; - switch (utf8) { - case 0: - length = 0; - break; - case 1: - length = size; - break; - default: - length = nxt_utf8_length(start, size); - break; - } + length = njs_string_length(utf8, start, size); - ret = njs_regexp_string_create(vm, &array->start[i], - start, size, length); + ret = njs_regexp_string_create(vm, &array->start[i], start, size, + length); if (nxt_slow_path(ret != NXT_OK)) { goto fail; } @@ -812,13 +802,9 @@ static njs_ret_t njs_regexp_string_create(njs_vm_t *vm, njs_value_t *value, u_char *start, uint32_t size, int32_t length) { - if (nxt_fast_path(length >= 0)) { - return njs_string_create(vm, value, start, size, length); - } + length = (length >= 0) ? length : 0; - vm->exception = &njs_exception_internal_error; - - return NXT_ERROR; + return njs_string_create(vm, value, start, size, length); } diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_string.c --- a/njs/njs_string.c Mon Sep 26 14:01:39 2016 +0300 +++ b/njs/njs_string.c Mon Sep 26 14:01:45 2016 +0300 @@ -88,7 +88,7 @@ static nxt_noinline ssize_t njs_string_i static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args, njs_regexp_pattern_t *pattern); static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, - u_char *start, size_t size, nxt_uint_t utf8); + njs_utf8_t utf8, u_char *start, size_t size); static njs_ret_t njs_string_replace_regexp(njs_vm_t *vm, njs_value_t *args, njs_string_replace_t *r); static njs_ret_t njs_string_replace_regexp_function(njs_vm_t *vm, @@ -1609,8 +1609,9 @@ njs_string_match_multiple(njs_vm_t *vm, u_char *start; int32_t size, length; njs_ret_t ret; - nxt_uint_t n, utf8; + njs_utf8_t utf8; njs_array_t *array; + njs_regexp_utf8_t type; njs_string_prop_t string; args[1].data.u.regexp->last_index = 0; @@ -1618,26 +1619,23 @@ njs_string_match_multiple(njs_vm_t *vm, (void) njs_string_prop(&string, &args[0]); - /* Byte string. */ - utf8 = 0; - n = 0; + utf8 = NJS_STRING_BYTE; + type = NJS_REGEXP_BYTE; if (string.length != 0) { - /* ASCII string. */ - utf8 = 1; - n = 1; + utf8 = NJS_STRING_ASCII; + type = NJS_REGEXP_UTF8; if (string.length != string.size) { - /* UTF-8 string. */ - utf8 = 2; + utf8 = NJS_STRING_UTF8; } } - if (nxt_regex_is_valid(&pattern->regex[n])) { + if (nxt_regex_is_valid(&pattern->regex[type])) { array = NULL; do { - ret = njs_regexp_match(vm, &pattern->regex[n], string.start, + ret = njs_regexp_match(vm, &pattern->regex[type], string.start, string.size, vm->single_match_data); if (ret >= 0) { if (array != NULL) { @@ -1667,25 +1665,7 @@ njs_string_match_multiple(njs_vm_t *vm, size = captures[1] - captures[0]; - switch (utf8) { - case 0: - length = 0; - break; - - case 1: - length = size; - break; - - default: - length = nxt_utf8_length(start, size); - - if (nxt_slow_path(length < 0)) { - vm->exception = &njs_exception_internal_error; - return NXT_ERROR; - } - - break; - } + length = njs_string_length(utf8, start, size); ret = njs_string_create(vm, &array->start[array->length], start, size, length); @@ -1721,9 +1701,10 @@ njs_string_prototype_split(njs_vm_t *vm, u_char *p, *start, *next; size_t size; uint32_t limit; - nxt_uint_t n, utf8; + njs_utf8_t utf8; njs_array_t *array; const u_char *end; + njs_regexp_utf8_t type; njs_string_prop_t string, split; njs_regexp_pattern_t *pattern; @@ -1751,18 +1732,15 @@ njs_string_prototype_split(njs_vm_t *vm, goto single; } - /* Byte string. */ - utf8 = 0; - n = 0; + utf8 = NJS_STRING_BYTE; + type = NJS_REGEXP_BYTE; if (string.length != 0) { - /* ASCII string. */ - utf8 = 1; + utf8 = NJS_STRING_ASCII; + type = NJS_REGEXP_UTF8; if (string.length != string.size) { - /* UTF-8 string. */ - utf8 = 2; - n = 1; + utf8 = NJS_STRING_UTF8; } } @@ -1795,7 +1773,7 @@ njs_string_prototype_split(njs_vm_t *vm, size = p - start; - ret = njs_string_split_part_add(vm, array, start, size, utf8); + ret = njs_string_split_part_add(vm, array, utf8, start, size); if (nxt_slow_path(ret != NXT_OK)) { return ret; } @@ -1810,7 +1788,7 @@ njs_string_prototype_split(njs_vm_t *vm, case NJS_REGEXP: pattern = args[1].data.u.regexp->pattern; - if (!nxt_regex_is_valid(&pattern->regex[n])) { + if (!nxt_regex_is_valid(&pattern->regex[type])) { goto single; } @@ -1818,7 +1796,7 @@ njs_string_prototype_split(njs_vm_t *vm, end = string.start + string.size; do { - ret = njs_regexp_match(vm, &pattern->regex[n], start, + ret = njs_regexp_match(vm, &pattern->regex[type], start, end - start, vm->single_match_data); if (ret >= 0) { captures = nxt_regex_captures(vm->single_match_data); @@ -1842,7 +1820,7 @@ njs_string_prototype_split(njs_vm_t *vm, size = p - start; - ret = njs_string_split_part_add(vm, array, start, size, utf8); + ret = njs_string_split_part_add(vm, array, utf8, start, size); if (nxt_slow_path(ret != NXT_OK)) { return ret; } @@ -1876,28 +1854,12 @@ done: static njs_ret_t -njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, u_char *start, - size_t size, nxt_uint_t utf8) +njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, njs_utf8_t utf8, + u_char *start, size_t size) { ssize_t length; - switch (utf8) { - case 0: - length = 0; - break; - - case 1: - length = size; - break; - - default: - length = nxt_utf8_length(start, size); - - if (nxt_slow_path(length < 0)) { - vm->exception = &njs_exception_internal_error; - return NXT_ERROR; - } - } + length = njs_string_length(utf8, start, size); return njs_array_string_add(vm, array, start, size, length); } @@ -2141,7 +2103,7 @@ njs_string_replace_regexp_function(njs_v size = captures[k + 1] - captures[k]; k += 2; - length = njs_string_length(start, size, r->utf8); + length = njs_string_length(r->utf8, start, size); ret = njs_string_create(vm, &arguments[i], start, size, length); if (nxt_slow_path(ret != NXT_OK)) { @@ -2153,7 +2115,7 @@ njs_string_replace_regexp_function(njs_v njs_number_set(&arguments[n + 1], captures[0]); /* The whole string being examined. */ - length = njs_string_length(r->part[0].start, r->part[0].size, r->utf8); + length = njs_string_length(r->utf8, r->part[0].start, r->part[0].size); ret = njs_string_create(vm, &arguments[n + 2], r->part[0].start, r->part[0].size, length); diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_string.h --- a/njs/njs_string.h Mon Sep 26 14:01:39 2016 +0300 +++ b/njs/njs_string.h Mon Sep 26 14:01:45 2016 +0300 @@ -89,7 +89,7 @@ typedef enum { nxt_inline uint32_t -njs_string_length(u_char *start, size_t size, njs_utf8_t utf8) +njs_string_length(njs_utf8_t utf8, u_char *start, size_t size) { ssize_t length; diff -r eed097d72d5e -r fec0d8dfa38c njs/njs_vm.c --- a/njs/njs_vm.c Mon Sep 26 14:01:39 2016 +0300 +++ b/njs/njs_vm.c Mon Sep 26 14:01:45 2016 +0300 @@ -3334,7 +3334,7 @@ njs_value_string_copy(njs_vm_t *vm, nxt_ void njs_vm_throw_exception(njs_vm_t *vm, u_char *buf, uint32_t size) { - uint32_t length; + int32_t length; njs_value_t *value; value = nxt_mem_cache_alloc(vm->mem_cache_pool, sizeof(njs_value_t)); @@ -3343,6 +3343,7 @@ njs_vm_throw_exception(njs_vm_t *vm, u_c vm->exception = value; length = nxt_utf8_length(buf, size); + length = (length >= 0) ? length : 0; (void) njs_string_new(vm, value, buf, size, length); } _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org http://mailman.nginx.org/mailman/listinfo/nginx-devel