Paolo Bonzini <[email protected]> writes:

> Now fully exploit the push parser, feeding it one token at a time
> without having to wait until braces and brackets are balanced.
>
> While the nesting counts are retained for error recovery purposes,
> the system can now report the first parsing error without waiting
> for parentheses to be balanced.  This also means that JSON_ERROR
> can be handled in json-parser.c, not json-streamer.c.
>
> After reporting the error, json-streamer.c then enters an error recovery
> mode where subsequent errors are suppressed.  This mimics the previous
> error reporting behavior, but it provides prompt feedback on parsing
> errors.  As an example, here is an example interaction with qemu-ga.
>
> BEFORE (error reported only once braces are balanced):
>
>    >> {"execute":foo
>    >> }
>    << {"error": {"class": "GenericError", "desc": "JSON parse error, invalid 
> keyword 'foo'"}}
>    >> {"execute":"somecommand"}
>    << {"error": {"class": "CommandNotFound", "desc": "The command somecommand 
> has not been found"}}
>
> AFTER (error reported immediately, but similar error recovery as before):
>
>    >> {"execute":foo
>    << {"error": {"class": "GenericError", "desc": "JSON parse error, invalid 
> keyword 'foo'"}}
>    >> }
>    >> {"execute":"somecommand"}
>    << {"error": {"class": "CommandNotFound", "desc": "The command somecommand 
> has not been found"}}

Welcome quality of life improvement for human use of QMP, such as manual
testing.  The experience is still sub-par when you miss closing
parenthesis: you still get the silent treatment.  However, the confused
input a confused user is likely to send then is now more likely to
produce an error, and thus a parser reset.

>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
>  include/qobject/json-parser.h |   3 +-
>  qobject/json-parser.c         |   4 ++
>  qobject/json-streamer.c       | 100 ++++++++++++++--------------------
>  3 files changed, 46 insertions(+), 61 deletions(-)
>
> diff --git a/include/qobject/json-parser.h b/include/qobject/json-parser.h
> index 0cf6932ecdc..3479e637588 100644
> --- a/include/qobject/json-parser.h
> +++ b/include/qobject/json-parser.h
> @@ -33,7 +33,8 @@ typedef struct JSONMessageParser {
>      JSONParserContext parser;
>      unsigned int brace_count;
>      unsigned int bracket_count;
> -    GQueue tokens;
> +    unsigned int token_count;
> +    bool error;
>      uint64_t token_size;
>  } JSONMessageParser;
>  
> diff --git a/qobject/json-parser.c b/qobject/json-parser.c
> index 3b5edc5bae4..b77baab585f 100644
> --- a/qobject/json-parser.c
> +++ b/qobject/json-parser.c
> @@ -659,6 +659,10 @@ QObject *json_parser_feed(JSONParserContext *ctxt, const 
> JSONToken *token,
>  
>      assert(!ctxt->err);
>      switch (token->type) {
> +    case JSON_ERROR:
> +        parse_error(ctxt, token, "JSON parse error, stray '%s'", token->str);
> +        break;
> +
>      case JSON_END_OF_INPUT:
>          /* Check for premature end of input */
>          if (!g_queue_is_empty(ctxt->stack)) {
> diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c
> index b0bf2083ca6..82d2bbc9426 100644
> --- a/qobject/json-streamer.c
> +++ b/qobject/json-streamer.c
> @@ -1,5 +1,5 @@
>  /*
> - * JSON streaming support
> + * JSON parser - callback interface and error recovery

The file name is now a bit of a misnomer.  I don't care.

>   *
>   * Copyright IBM, Corp. 2009
>   *
> @@ -19,23 +19,16 @@
>  #define MAX_TOKEN_COUNT (2ULL << 20)
>  #define MAX_NESTING (1 << 10)
>  
> -static void json_message_free_tokens(JSONMessageParser *parser)
> -{
> -    JSONToken *token;
> -
> -    while ((token = g_queue_pop_head(&parser->tokens))) {
> -        g_free(token);
> -    }
> -}
> -
>  void json_message_process_token(JSONLexer *lexer, GString *input,
>                                  JSONTokenType type, int x, int y)
>  {
>      JSONMessageParser *parser = container_of(lexer, JSONMessageParser, 
> lexer);
> -    QObject *json = NULL;
>      Error *err = NULL;
> -    JSONToken *token;
>  
> +    parser->token_size += input->len;
> +    parser->token_count++;

Note for later: @token_size and @token_count are incremented early
rather than late.

> +
> +    /* Detect message boundaries for error recovery purposes.  */
>      switch (type) {
>      case JSON_LCURLY:
>          parser->brace_count++;
> @@ -56,12 +49,6 @@ void json_message_process_token(JSONLexer *lexer, GString 
> *input,
>          parser->bracket_count--;
>          break;
>      case JSON_ERROR:
> -        error_setg(&err, "JSON parse error, stray '%s'", input->str);
> -        goto out_emit;
> -    case JSON_END_OF_INPUT:
> -        if (g_queue_is_empty(&parser->tokens)) {
> -            return;
> -        }
>      end_error_recovery:
>          /*
>           * Cause error recovery to end immediately.
> @@ -76,49 +63,43 @@ void json_message_process_token(JSONLexer *lexer, GString 
> *input,
>          break;
>      }
>  
> -    /*
> -     * Security consideration, we limit total memory allocated per object
> -     * and the maximum recursion depth that a message can force.
> -     */
> -    if (parser->token_size + input->len + 1 > MAX_TOKEN_SIZE) {

Left operand of > is unincremented token_size plus increment plus 1.

> -        error_setg(&err, "JSON token size limit exceeded");
> -        goto out_emit;
> -    }
> -    if (g_queue_get_length(&parser->tokens) + 1 > MAX_TOKEN_COUNT) {
> -        error_setg(&err, "JSON token count limit exceeded");
> -        goto out_emit;
> -    }
> -    if (parser->bracket_count + parser->brace_count > MAX_NESTING) {
> -        error_setg(&err, "JSON nesting depth limit exceeded");
> -        goto out_emit;
> -    }
> +    if (parser->error) {
> +        /* error recovery, eat tokens until parentheses balance */
> +    } else {
> +        /*
> +         * Safety consideration, we limit total memory allocated per object
> +         * and the maximum nesting depth that a message can force.
> +         */
> +        if (parser->token_size > MAX_TOKEN_SIZE) {

Left operand of > is incremented token size.

Isn't this one less than before the patch?

> +            error_setg(&err, "JSON token size limit exceeded");
> +        } else if (parser->token_count > MAX_TOKEN_COUNT) {

Likewise.

> +            error_setg(&err, "JSON token count limit exceeded");
> +        } else if (parser->bracket_count + parser->brace_count > 
> MAX_NESTING) {
> +            error_setg(&err, "JSON nesting depth limit exceeded");
> +        } else {
> +            g_autofree JSONToken *token = json_token(type, x, y, input);
> +            QObject *json = json_parser_feed(&parser->parser, token, &err);
> +            if (json) {
> +                parser->emit(parser->opaque, json, NULL);
> +            }
> +        }
>  
> -    token = json_token(type, x, y, input);
> -    parser->token_size += input->len;
> -
> -    g_queue_push_tail(&parser->tokens, token);
> -
> -    if (parser->brace_count > 0 || parser->bracket_count > 0) {
> -        return;
> -    }
> -
> -    /* Process all tokens in the queue */
> -    while (!g_queue_is_empty(&parser->tokens)) {
> -        token = g_queue_pop_head(&parser->tokens);
> -        json = json_parser_feed(&parser->parser, token, &err);
> -        g_free(token);
> -        if (json || err) {
> -            break;
> +        if (err) {
> +            parser->emit(parser->opaque, NULL, err);
> +            /* start recovery */
> +            parser->error = true;
>          }
>      }
>  
> -out_emit:
> -    parser->brace_count = 0;
> -    parser->bracket_count = 0;
> -    json_message_free_tokens(parser);
> -    parser->token_size = 0;
> -    parser->emit(parser->opaque, json, err);
> -    json_parser_reset(&parser->parser);
> +    if ((parser->brace_count == 0 && parser->bracket_count == 0)
> +        || type == JSON_END_OF_INPUT) {

We need to check for JSON_END_OF_INPUT, because this is the one error
that cannot go through normal error recovery "eat tokens until curlies
and squares balance or we ate JSON_END_OF_INPUT".  Took me a few minutes
to figure out.

> +        parser->error = false;
> +        parser->brace_count = 0;
> +        parser->bracket_count = 0;
> +        parser->token_count = 0;
> +        parser->token_size = 0;
> +        json_parser_reset(&parser->parser);
> +    }
>  }
>  
>  void json_message_parser_init(JSONMessageParser *parser,
> @@ -128,9 +109,10 @@ void json_message_parser_init(JSONMessageParser *parser,
>  {
>      parser->emit = emit;
>      parser->opaque = opaque;
> +    parser->error = false;
>      parser->brace_count = 0;
>      parser->bracket_count = 0;
> -    g_queue_init(&parser->tokens);
> +    parser->token_count = 0;
>      parser->token_size = 0;
>  
>      json_parser_init(&parser->parser, ap);
> @@ -146,12 +128,10 @@ void json_message_parser_feed(JSONMessageParser *parser,
>  void json_message_parser_flush(JSONMessageParser *parser)
>  {
>      json_lexer_flush(&parser->lexer);
> -    assert(g_queue_is_empty(&parser->tokens));
>  }
>  
>  void json_message_parser_destroy(JSONMessageParser *parser)
>  {
>      json_lexer_destroy(&parser->lexer);
> -    json_message_free_tokens(parser);
>      json_parser_destroy(&parser->parser);
>  }


Reply via email to