On 3/29/22 18:51, Rosemarie O'Riorden wrote: > To parse a json string prior to this change, json_lex_input is called > with each character of the string. If the character needs to be copied > to the buffer, it is copied individually. This is an expensive > operation, as often there are multiple characters in a row > that need to be copied, and copying memory in blocks is more efficient > than byte by byte. To improve this, the string is now copied > in blocks with an offset counter. A copy is performed when the parser > state equals done. > > Functions that are called for each character use a lot of CPU cycles. > Making these functions inline greatly reduces the cycles used and > improves overall performance. Since json_lex_input was only needed in > one place, it doesn't have to be its own function. > > There is also a conditional that checks if the current character is a > new line, which is quite unlikely. When this was examined with perf, the > comparison had a very high CPU cycle usage. To improve this, the > OVS_UNLIKELY macro was used, which forces the compiler to switch the > order of the instructions. > > Here is the improvement seen in the json-string-benchmark test: > > SIZE Q S BEFORE AFTER CHANGE > -------------------------------------------------------- > 100000 0 0 : 0.842 ms 0.489 ms -41.9 % > 100000 2 1 : 0.917 ms 0.535 ms -41.7 % > 100000 10 1 : 1.063 ms 0.656 ms -38.3 % > 10000000 0 0 : 85.328 ms 49.878 ms -41.5 % > 10000000 2 1 : 92.555 ms 54.778 ms -40.8 % > 10000000 10 1 : 106.728 ms 66.735 ms -37.5 % > 100000000 0 0 : 955.375 ms 621.950 ms -34.9 % > 100000000 2 1 : 1031.700 ms 665.200 ms -35.5 % > 100000000 10 1 : 1189.300 ms 796.050 ms -33.0 % > > Here Q is probability (%) for a character to be a '\"' and > S is probability (%) to be a special character ( < 32). > > Signed-off-by: Rosemarie O'Riorden <rorior...@redhat.com> > ---
Hi Rosemarie, I just have two small style related comments but I guess, if Ilya agrees, that those can be fixed up at commit time. I should've mentioned those on the previous review but I missed them, sorry. Aside from that the code looks OK to me, thanks! Acked-by: Dumitru Ceara <dce...@redhat.com> Thanks, Dumitru > Adds a local counter so p->start is no longer needed > Moves contents of json_lex_input into json_parser_feed > Adds function json_parser_account_byte > Performs copy when parser state equals done > > lib/json.c | 200 +++++++++++++++++++++++++++++------------------------ > 1 file changed, 110 insertions(+), 90 deletions(-) > > diff --git a/lib/json.c b/lib/json.c > index 720c73d94..2b18ec550 100644 > --- a/lib/json.c > +++ b/lib/json.c > @@ -976,88 +976,6 @@ json_lex_string(struct json_parser *p) > } > } > > -static bool > -json_lex_input(struct json_parser *p, unsigned char c) > -{ > - struct json_token token; > - > - switch (p->lex_state) { > - case JSON_LEX_START: > - switch (c) { > - case ' ': case '\t': case '\n': case '\r': > - /* Nothing to do. */ > - return true; > - > - case 'a': case 'b': case 'c': case 'd': case 'e': > - case 'f': case 'g': case 'h': case 'i': case 'j': > - case 'k': case 'l': case 'm': case 'n': case 'o': > - case 'p': case 'q': case 'r': case 's': case 't': > - case 'u': case 'v': case 'w': case 'x': case 'y': > - case 'z': > - p->lex_state = JSON_LEX_KEYWORD; > - break; > - > - case '[': case '{': case ']': case '}': case ':': case ',': > - token.type = c; > - json_parser_input(p, &token); > - return true; > - > - case '-': > - case '0': case '1': case '2': case '3': case '4': > - case '5': case '6': case '7': case '8': case '9': > - p->lex_state = JSON_LEX_NUMBER; > - break; > - > - case '"': > - p->lex_state = JSON_LEX_STRING; > - return true; > - > - default: > - if (isprint(c)) { > - json_error(p, "invalid character '%c'", c); > - } else { > - json_error(p, "invalid character U+%04x", c); > - } > - return true; > - } > - break; > - > - case JSON_LEX_KEYWORD: > - if (!isalpha((unsigned char) c)) { > - json_lex_keyword(p); > - return false; > - } > - break; > - > - case JSON_LEX_NUMBER: > - if (!strchr(".0123456789eE-+", c)) { > - json_lex_number(p); > - return false; > - } > - break; > - > - case JSON_LEX_STRING: > - if (c == '\\') { > - p->lex_state = JSON_LEX_ESCAPE; > - } else if (c == '"') { > - json_lex_string(p); > - return true; > - } else if (c < 0x20) { > - json_error(p, "U+%04X must be escaped in quoted string", c); > - return true; > - } > - break; > - > - case JSON_LEX_ESCAPE: > - p->lex_state = JSON_LEX_STRING; > - break; > - > - default: > - abort(); > - } > - ds_put_char(&p->buffer, c); > - return true; > -} > > /* Parsing. */ > > @@ -1160,22 +1078,124 @@ json_parser_create(int flags) > return p; > } > > +static inline void ALWAYS_INLINE > +json_parser_account_byte(struct json_parser *p, unsigned char c) > +{ > + p->byte_number++; > + if (OVS_UNLIKELY(c == '\n')) { > + p->column_number = 0; > + p->line_number++; > + } else { > + p->column_number++; > + } > +} > + I think this function definition should go in the "static functions section" above, after json_parser_input_string(). > size_t > json_parser_feed(struct json_parser *p, const char *input, size_t n) > { > + size_t token_start = 0; > size_t i; > + > for (i = 0; !p->done && i < n; ) { > - if (json_lex_input(p, input[i])) { > - p->byte_number++; > - if (input[i] == '\n') { > - p->column_number = 0; > - p->line_number++; > - } else { > - p->column_number++; > + bool consumed = true; > + > + const char *start_p = &input[token_start]; > + unsigned char c = input[i]; > + struct json_token token; > + > + switch (p->lex_state) { > + case JSON_LEX_START: > + switch (c) { > + case ' ': case '\t': case '\n': case '\r': > + /* Nothing to do. */ > + Nit: empty line not really needed. > + token_start = i + 1; > + break; > + > + case 'a': case 'b': case 'c': case 'd': case 'e': > + case 'f': case 'g': case 'h': case 'i': case 'j': > + case 'k': case 'l': case 'm': case 'n': case 'o': > + case 'p': case 'q': case 'r': case 's': case 't': > + case 'u': case 'v': case 'w': case 'x': case 'y': > + case 'z': > + p->lex_state = JSON_LEX_KEYWORD; > + token_start = i; > + break; > + > + case '[': case '{': case ']': case '}': case ':': case ',': > + token.type = c; > + json_parser_input(p, &token); > + token_start = i + 1; > + break; > + > + case '-': > + case '0': case '1': case '2': case '3': case '4': > + case '5': case '6': case '7': case '8': case '9': > + p->lex_state = JSON_LEX_NUMBER; > + token_start = i; > + break; > + > + case '"': > + p->lex_state = JSON_LEX_STRING; > + token_start = i + 1; > + break; > + > + default: > + if (isprint(c)) { > + json_error(p, "invalid character '%c'", c); > + } else { > + json_error(p, "invalid character U+%04x", c); > + } > + break; > + } > + break; > + > + case JSON_LEX_KEYWORD: > + if (!isalpha((unsigned char) c)) { > + ds_put_buffer(&p->buffer, start_p, i - token_start); > + json_lex_keyword(p); > + consumed = false; > + break; > } > + break; > + > + case JSON_LEX_NUMBER: > + if (!strchr(".0123456789eE-+", c)) { > + ds_put_buffer(&p->buffer, start_p, i - token_start); > + json_lex_number(p); > + consumed = false; > + break; > + } > + break; > + > + case JSON_LEX_STRING: > + if (c == '\\') { > + p->lex_state = JSON_LEX_ESCAPE; > + } else if (c == '"') { > + ds_put_buffer(&p->buffer, start_p, i - token_start); > + json_lex_string(p); > + } else if (c < 0x20) { > + json_error(p, "U+%04X must be escaped in quoted string", c); > + } > + break; > + > + case JSON_LEX_ESCAPE: > + p->lex_state = JSON_LEX_STRING; > + break; > + > + default: > + ovs_abort(0, "unexpected lexer state"); > + } > + > + if (consumed) { > + json_parser_account_byte(p, c); > i++; > } > } > + > + if (!p->done) { > + ds_put_buffer(&p->buffer, &input[token_start], i - token_start); > + } > return i; > } > > @@ -1201,7 +1221,7 @@ json_parser_finish(struct json_parser *p) > > case JSON_LEX_NUMBER: > case JSON_LEX_KEYWORD: > - json_lex_input(p, ' '); > + json_parser_feed(p, " ", 1); > break; > } > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev