To parse a json string prior to this change, json_lex_input is called
with each character of the string. If the character needs to be copied
to the buffer, it is copied individually. This is an expensive
operation, as often there are multiple characters in a row
that need to be copied, and copying memory in blocks is more efficient
than byte by byte. To improve this, the string is now copied
in blocks. As long as the parser state stays the same, no copy is
performed. As soon as the state is about to change (for example,
reaching the end of a string), the last n characters are copied to the
buffer.

There is also a conditional that checks if the current character is a
new line, which is quite unlikely. When this was examined with perf, the
comparison had a very high CPU cycle usage. To improve this, the
OVS_UNLIKELY macro was used, which forces the compiler to switch the
order of the instructions.

The json_lex_input function is called for each character of a string,
and calling the function so many times uses a lot of CPU cycles.
Making it an inline function greatly reduced the percentage of cycles
used by the call.

Here is the improvement seen in the json-string-benchmark test:

  SIZE      Q  S         BEFORE       AFTER      CHANGE
--------------------------------------------------------
100000      0  0 :      0.842 ms     0.511 ms   -39.3 %
100000      2  1 :      0.917 ms     0.561 ms   -38.8 %
100000      10 1 :      1.063 ms     0.678 ms   -36.2 %
10000000    0  0 :     85.328 ms    51.998 ms   -39.1 %
10000000    2  1 :     92.555 ms    57.353 ms   -38.0 %
10000000    10 1 :    106.728 ms    69.288 ms   -35.1 %
100000000   0  0 :    955.375 ms   632.300 ms   -33.8 %
100000000   2  1 :   1031.700 ms   689.375 ms   -33.2 %
100000000   10 1 :   1189.300 ms   821.850 ms   -30.0 %

Here Q is probability (%) for a character to be a '\"' and
S is probability (%) to be a special character ( < 32).

Signed-off-by: Rosemarie O'Riorden <rorior...@redhat.com>
---
 lib/json.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/lib/json.c b/lib/json.c
index 720c73d94..f48cb3241 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -101,6 +101,7 @@ struct json_parser {
     int line_number;
     int column_number;
     int byte_number;
+    const unsigned char *start;
 
     /* Parsing. */
     enum json_parse_state parse_state;
@@ -976,16 +977,18 @@ json_lex_string(struct json_parser *p)
     }
 }
 
-static bool
-json_lex_input(struct json_parser *p, unsigned char c)
+static inline ALWAYS_INLINE bool
+json_lex_input(struct json_parser *p, const unsigned char *ch)
 {
     struct json_token token;
+    unsigned char c = *ch;
 
     switch (p->lex_state) {
     case JSON_LEX_START:
         switch (c) {
         case ' ': case '\t': case '\n': case '\r':
             /* Nothing to do. */
+            p->start = ch + 1;
             return true;
 
         case 'a': case 'b': case 'c': case 'd': case 'e':
@@ -995,21 +998,25 @@ json_lex_input(struct json_parser *p, unsigned char c)
         case 'u': case 'v': case 'w': case 'x': case 'y':
         case 'z':
             p->lex_state = JSON_LEX_KEYWORD;
+            p->start = ch;
             break;
 
         case '[': case '{': case ']': case '}': case ':': case ',':
             token.type = c;
             json_parser_input(p, &token);
+            p->start = ch + 1;
             return true;
 
         case '-':
         case '0': case '1': case '2': case '3': case '4':
         case '5': case '6': case '7': case '8': case '9':
             p->lex_state = JSON_LEX_NUMBER;
+            p->start = ch;
             break;
 
         case '"':
             p->lex_state = JSON_LEX_STRING;
+            p->start = ch + 1;
             return true;
 
         default:
@@ -1024,6 +1031,7 @@ json_lex_input(struct json_parser *p, unsigned char c)
 
     case JSON_LEX_KEYWORD:
         if (!isalpha((unsigned char) c)) {
+            ds_put_buffer(&p->buffer, (const char *) p->start, ch - p->start);
             json_lex_keyword(p);
             return false;
         }
@@ -1031,6 +1039,7 @@ json_lex_input(struct json_parser *p, unsigned char c)
 
     case JSON_LEX_NUMBER:
         if (!strchr(".0123456789eE-+", c)) {
+            ds_put_buffer(&p->buffer, (const char *) p->start, ch - p->start);
             json_lex_number(p);
             return false;
         }
@@ -1040,6 +1049,7 @@ json_lex_input(struct json_parser *p, unsigned char c)
         if (c == '\\') {
             p->lex_state = JSON_LEX_ESCAPE;
         } else if (c == '"') {
+            ds_put_buffer(&p->buffer, (const char *) p->start, ch - p->start);
             json_lex_string(p);
             return true;
         } else if (c < 0x20) {
@@ -1055,7 +1065,6 @@ json_lex_input(struct json_parser *p, unsigned char c)
     default:
         abort();
     }
-    ds_put_char(&p->buffer, c);
     return true;
 }
 
@@ -1164,10 +1173,11 @@ size_t
 json_parser_feed(struct json_parser *p, const char *input, size_t n)
 {
     size_t i;
+    p->start = (const unsigned char *) input;
     for (i = 0; !p->done && i < n; ) {
-        if (json_lex_input(p, input[i])) {
+        if (json_lex_input(p, (unsigned const char *) &input[i])) {
             p->byte_number++;
-            if (input[i] == '\n') {
+            if (OVS_UNLIKELY(input[i] == '\n')) {
                 p->column_number = 0;
                 p->line_number++;
             } else {
@@ -1176,6 +1186,10 @@ json_parser_feed(struct json_parser *p, const char 
*input, size_t n)
             i++;
         }
     }
+    if (!p->done) {
+        ds_put_buffer(&p->buffer, (const char *) p->start,
+                (const unsigned char *) &input[i] - p->start);
+    }
     return i;
 }
 
@@ -1189,6 +1203,7 @@ struct json *
 json_parser_finish(struct json_parser *p)
 {
     struct json *json;
+    const unsigned char *space = (const unsigned char *) " ";
 
     switch (p->lex_state) {
     case JSON_LEX_START:
@@ -1201,7 +1216,8 @@ json_parser_finish(struct json_parser *p)
 
     case JSON_LEX_NUMBER:
     case JSON_LEX_KEYWORD:
-        json_lex_input(p, ' ');
+        p->start = space;
+        json_lex_input(p, space);
         break;
     }
 
-- 
2.35.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to