To parse a json string prior to this change, json_lex_input is called with each character of the string. If the character needs to be copied to the buffer, it is copied individually. This is an expensive operation, as often there are multiple characters in a row that need to be copied, and copying memory in blocks is more efficient than byte by byte. To improve this, the string is now copied in blocks. As long as the parser state stays the same, no copy is performed. As soon as the state is about to change (for example, reaching the end of a string), the last n characters are copied to the buffer.
There is also a conditional that checks if the current character is a new line, which is quite unlikely. When this was examined with perf, the comparison had a very high CPU cycle usage. To improve this, the OVS_UNLIKELY macro was used, which forces the compiler to switch the order of the instructions. The json_lex_input function is called for each character of a string, and calling the function so many times uses a lot of CPU cycles. Making it an inline function greatly reduced the percentage of cycles used by the call. Here is the improvement seen in the json-string-benchmark test: SIZE Q S BEFORE AFTER CHANGE -------------------------------------------------------- 100000 0 0 : 0.842 ms 0.511 ms -39.3 % 100000 2 1 : 0.917 ms 0.561 ms -38.8 % 100000 10 1 : 1.063 ms 0.678 ms -36.2 % 10000000 0 0 : 85.328 ms 51.998 ms -39.1 % 10000000 2 1 : 92.555 ms 57.353 ms -38.0 % 10000000 10 1 : 106.728 ms 69.288 ms -35.1 % 100000000 0 0 : 955.375 ms 632.300 ms -33.8 % 100000000 2 1 : 1031.700 ms 689.375 ms -33.2 % 100000000 10 1 : 1189.300 ms 821.850 ms -30.0 % Here Q is probability (%) for a character to be a '\"' and S is probability (%) to be a special character ( < 32). Signed-off-by: Rosemarie O'Riorden <rorior...@redhat.com> --- lib/json.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/lib/json.c b/lib/json.c index 720c73d94..f48cb3241 100644 --- a/lib/json.c +++ b/lib/json.c @@ -101,6 +101,7 @@ struct json_parser { int line_number; int column_number; int byte_number; + const unsigned char *start; /* Parsing. */ enum json_parse_state parse_state; @@ -976,16 +977,18 @@ json_lex_string(struct json_parser *p) } } -static bool -json_lex_input(struct json_parser *p, unsigned char c) +static inline ALWAYS_INLINE bool +json_lex_input(struct json_parser *p, const unsigned char *ch) { struct json_token token; + unsigned char c = *ch; switch (p->lex_state) { case JSON_LEX_START: switch (c) { case ' ': case '\t': case '\n': case '\r': /* Nothing to do. */ + p->start = ch + 1; return true; case 'a': case 'b': case 'c': case 'd': case 'e': @@ -995,21 +998,25 @@ json_lex_input(struct json_parser *p, unsigned char c) case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': p->lex_state = JSON_LEX_KEYWORD; + p->start = ch; break; case '[': case '{': case ']': case '}': case ':': case ',': token.type = c; json_parser_input(p, &token); + p->start = ch + 1; return true; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': p->lex_state = JSON_LEX_NUMBER; + p->start = ch; break; case '"': p->lex_state = JSON_LEX_STRING; + p->start = ch + 1; return true; default: @@ -1024,6 +1031,7 @@ json_lex_input(struct json_parser *p, unsigned char c) case JSON_LEX_KEYWORD: if (!isalpha((unsigned char) c)) { + ds_put_buffer(&p->buffer, (const char *) p->start, ch - p->start); json_lex_keyword(p); return false; } @@ -1031,6 +1039,7 @@ json_lex_input(struct json_parser *p, unsigned char c) case JSON_LEX_NUMBER: if (!strchr(".0123456789eE-+", c)) { + ds_put_buffer(&p->buffer, (const char *) p->start, ch - p->start); json_lex_number(p); return false; } @@ -1040,6 +1049,7 @@ json_lex_input(struct json_parser *p, unsigned char c) if (c == '\\') { p->lex_state = JSON_LEX_ESCAPE; } else if (c == '"') { + ds_put_buffer(&p->buffer, (const char *) p->start, ch - p->start); json_lex_string(p); return true; } else if (c < 0x20) { @@ -1055,7 +1065,6 @@ json_lex_input(struct json_parser *p, unsigned char c) default: abort(); } - ds_put_char(&p->buffer, c); return true; } @@ -1164,10 +1173,11 @@ size_t json_parser_feed(struct json_parser *p, const char *input, size_t n) { size_t i; + p->start = (const unsigned char *) input; for (i = 0; !p->done && i < n; ) { - if (json_lex_input(p, input[i])) { + if (json_lex_input(p, (unsigned const char *) &input[i])) { p->byte_number++; - if (input[i] == '\n') { + if (OVS_UNLIKELY(input[i] == '\n')) { p->column_number = 0; p->line_number++; } else { @@ -1176,6 +1186,10 @@ json_parser_feed(struct json_parser *p, const char *input, size_t n) i++; } } + if (!p->done) { + ds_put_buffer(&p->buffer, (const char *) p->start, + (const unsigned char *) &input[i] - p->start); + } return i; } @@ -1189,6 +1203,7 @@ struct json * json_parser_finish(struct json_parser *p) { struct json *json; + const unsigned char *space = (const unsigned char *) " "; switch (p->lex_state) { case JSON_LEX_START: @@ -1201,7 +1216,8 @@ json_parser_finish(struct json_parser *p) case JSON_LEX_NUMBER: case JSON_LEX_KEYWORD: - json_lex_input(p, ' '); + p->start = space; + json_lex_input(p, space); break; } -- 2.35.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev