Paolo Bonzini <[email protected]> writes:

> Now fully exploit the push parser, feeding it one token at a time
> without having to wait until braces and brackets are balanced.
>
> While the nesting counts are retained for error recovery purposes,
> the system can now report the first parsing error without waiting
> for delimiters to be balanced.  This also means that JSON_ERROR
> can be handled in json-parser.c, not json-streamer.c.
>
> After reporting the error, json-streamer.c then enters an error recovery
> mode where subsequent errors are suppressed.  This mimics the previous
> error reporting behavior, but it provides prompt feedback on parsing
> errors.  As an example, here is an example interaction with qemu-ga.
>
> BEFORE (error reported only once braces are balanced):
>
>    >> {'execute':foo
>    >> }
>    << {"error": {"class": "GenericError", "desc": "JSON parse error, invalid 
> keyword 'foo'"}}
>    >> {'execute':'somecommand'}
>    << {"error": {"class": "CommandNotFound", "desc": "The command somecommand 
> has not been found"}}
>
> AFTER (error reported immediately, but similar error recovery as before):
>
>    >> {'execute':foo
>    << {"error": {"class": "GenericError", "desc": "JSON parse error, invalid 
> keyword 'foo'"}}
>    >> }
>    >> {'execute':'qmp_capabilities'}
>    << {"error": {"class": "CommandNotFound", "desc": "The command 
> qmp_capabilities has not been found"}}

Welcome quality of life improvement for human use of QMP, such as manual
testing.  The experience is still sub-par when you miss closing
parenthesis: you get the silent treatment.

Nitpick #1: Single quotes are an syntax extension made to enable more
readable QMP input in C strings.  Letting it leak into the external
interface was a mistake.  The less we show their use, the better.  Let's
stick to doublequotes here.

Nitpick #2: example input differs needlessly between BEFORE and AFTER.

>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
>  include/qobject/json-parser.h |   5 +-
>  qobject/json-parser-int.h     |   1 +
>  qobject/json-parser.c         |  26 +++++---
>  qobject/json-streamer.c       | 116 ++++++++++++++--------------------
>  4 files changed, 68 insertions(+), 80 deletions(-)
>
> diff --git a/include/qobject/json-parser.h b/include/qobject/json-parser.h
> index 05346fa816b..923eb74ca00 100644
> --- a/include/qobject/json-parser.h
> +++ b/include/qobject/json-parser.h
> @@ -29,11 +29,12 @@ typedef struct JSONParserContext {
>  typedef struct JSONMessageParser {
>      void (*emit)(void *opaque, QObject *json, Error *err);
>      void *opaque;
> -    va_list *ap;
>      JSONLexer lexer;
> +    JSONParserContext parser;
>      int brace_count;
>      int bracket_count;
> -    GQueue tokens;
> +    int token_count;
> +    bool error;
>      uint64_t token_size;
>  } JSONMessageParser;
>  
> diff --git a/qobject/json-parser-int.h b/qobject/json-parser-int.h
> index 05e2e8e1831..1f435cb8eb2 100644
> --- a/qobject/json-parser-int.h
> +++ b/qobject/json-parser-int.h
> @@ -50,6 +50,7 @@ void json_message_process_token(JSONLexer *lexer, GString 
> *input,
>  /* json-parser.c */
>  JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr);
>  void json_parser_init(JSONParserContext *ctxt, va_list *ap);
> +void json_parser_reset(JSONParserContext *ctxt);
>  QObject *json_parser_feed(JSONParserContext *ctxt, const JSONToken *token, 
> Error **errp);
>  void json_parser_destroy(JSONParserContext *ctxt);
>  
> diff --git a/qobject/json-parser.c b/qobject/json-parser.c
> index 338f0c5aae6..7abdea4dacb 100644
> --- a/qobject/json-parser.c
> +++ b/qobject/json-parser.c
> @@ -541,21 +541,27 @@ JSONToken *json_token(JSONTokenType type, int x, int y, 
> GString *tokstr)
>      return token;
>  }
>  
> -void json_parser_init(JSONParserContext *ctxt, va_list *ap)
> -{
> -    ctxt->err = NULL;
> -    ctxt->stack = g_queue_new();
> -    ctxt->ap = ap;
> -}
> -
> -void json_parser_destroy(JSONParserContext *ctxt)
> +void json_parser_reset(JSONParserContext *ctxt)
>  {
>      JSONParserStackEntry *entry;
>  
> +    ctxt->err = NULL;
>      while ((entry = g_queue_pop_tail(ctxt->stack)) != NULL) {
>          qobject_unref(entry->partial);
>          g_free(entry);
>      }
> +}
> +
> +void json_parser_init(JSONParserContext *ctxt, va_list *ap)
> +{
> +    ctxt->stack = g_queue_new();
> +    ctxt->ap = ap;
> +    json_parser_reset(ctxt);
> +}
> +
> +void json_parser_destroy(JSONParserContext *ctxt)
> +{
> +    json_parser_reset(ctxt);
>      g_queue_free(ctxt->stack);
>      ctxt->stack = NULL;
>  }
> @@ -566,6 +572,10 @@ QObject *json_parser_feed(JSONParserContext *ctxt, const 
> JSONToken *token, Error
>  
>      assert(!ctxt->err);
>      switch (token->type) {
> +    case JSON_ERROR:
> +        parse_error(ctxt, token, "JSON parse error, stray '%s'", token->str);
> +        break;
> +
>      case JSON_END_OF_INPUT:
>          /* Check for premature end of input */
>          if (!g_queue_is_empty(ctxt->stack)) {
> diff --git a/qobject/json-streamer.c b/qobject/json-streamer.c
> index 6c93e6fd78d..a1210128ac1 100644
> --- a/qobject/json-streamer.c
> +++ b/qobject/json-streamer.c
> @@ -1,5 +1,5 @@
>  /*
> - * JSON streaming support
> + * JSON parser - callback interface and error recovery

"Streaming" has always been misleading.  This isn't a streaming parser,
and never was.  It's a conventional parser wed to a push lexer with a
hack before this series, and a push parser afterwards.

>   *
>   * Copyright IBM, Corp. 2009
>   *
> @@ -19,97 +19,73 @@
>  #define MAX_TOKEN_COUNT (2ULL << 20)
>  #define MAX_NESTING (1 << 10)
>  
> -static void json_message_free_tokens(JSONMessageParser *parser)
> -{
> -    JSONToken *token;
> -
> -    while ((token = g_queue_pop_head(&parser->tokens))) {
> -        g_free(token);
> -    }
> -}
> -
>  void json_message_process_token(JSONLexer *lexer, GString *input,
>                                  JSONTokenType type, int x, int y)
>  {
>      JSONMessageParser *parser = container_of(lexer, JSONMessageParser, 
> lexer);
> -    JSONParserContext ctxt;
> -    QObject *json = NULL;
> +    g_autofree JSONToken *token = json_token(type, x, y, input);
>      Error *err = NULL;
> -    JSONToken *token;
>  
> +    parser->token_size += input->len;
> +    parser->token_count++;
> +
> +    /* Detect message boundaries for error recovery purposes.  */

I like the comment.

>      switch (type) {
>      case JSON_LCURLY:
>          parser->brace_count++;
>          break;
>      case JSON_RCURLY:
> -        parser->brace_count--;
> +        if (parser->brace_count > 0) {
> +            parser->brace_count--;
> +        }

Looks like you're keeping @brace_count (here) and @bracket_count (below)
non-negative.  Any particular reason?

Make them unsigned?

>          break;
>      case JSON_LSQUARE:
>          parser->bracket_count++;
>          break;
>      case JSON_RSQUARE:
> -        parser->bracket_count--;
> -        break;
> -    case JSON_ERROR:
> -        error_setg(&err, "JSON parse error, stray '%s'", input->str);
> -        goto out_emit;
> -    case JSON_END_OF_INPUT:
> -        if (g_queue_is_empty(&parser->tokens)) {
> -            return;
> +        if (parser->bracket_count > 0) {
> +            parser->bracket_count--;
>          }
>          break;
>      default:
>          break;
>      }
>  
> -    /*
> -     * Security consideration, we limit total memory allocated per object
> -     * and the maximum recursion depth that a message can force.
> -     */
> -    if (parser->token_size + input->len + 1 > MAX_TOKEN_SIZE) {
> -        error_setg(&err, "JSON token size limit exceeded");
> -        goto out_emit;
> -    }
> -    if (g_queue_get_length(&parser->tokens) + 1 > MAX_TOKEN_COUNT) {
> -        error_setg(&err, "JSON token count limit exceeded");
> -        goto out_emit;
> -    }
> -    if (parser->bracket_count + parser->brace_count > MAX_NESTING) {
> -        error_setg(&err, "JSON nesting depth limit exceeded");
> -        goto out_emit;
> -    }
> +    /* during error recovery eat tokens until parentheses balance */
> +    if (!parser->error) {

I think I'd write this as

       if (parser->error) {
           /* error recovery: eat tokens until parentheses balance */
       } else {

> +        /*
> +         * Security consideration, we limit total memory allocated per object
> +         * and the maximum recursion depth that a message can force.
> +         */

This comment now needs rephrasing: there's no recursion anymore.  In the
previous commit if we're feeling persnickety.

Suggest s/security/safety/ while there.

> +        if (parser->token_size > MAX_TOKEN_SIZE) {
> +            error_setg(&err, "JSON token size limit exceeded");
> +        } else if (parser->token_count > MAX_TOKEN_COUNT) {
> +            error_setg(&err, "JSON token count limit exceeded");
> +        } else if (parser->bracket_count + parser->brace_count > 
> MAX_NESTING) {
> +            error_setg(&err, "JSON nesting depth limit exceeded");
> +        } else {
> +            QObject *json = json_parser_feed(&parser->parser, token, &err);
> +            if (json) {
> +                parser->emit(parser->opaque, json, NULL);
> +            }
> +        }
>  
> -    token = json_token(type, x, y, input);
> -    parser->token_size += input->len;
> -
> -    g_queue_push_tail(&parser->tokens, token);
> -
> -    if ((parser->brace_count > 0 || parser->bracket_count > 0)
> -        && parser->brace_count >= 0 && parser->bracket_count >= 0
> -        && type != JSON_END_OF_INPUT) {
> -        return;
> -    }
> -
> -    json_parser_init(&ctxt, parser->ap);
> -
> -    /* Process all tokens in the queue */
> -    while (!g_queue_is_empty(&parser->tokens)) {
> -        token = g_queue_pop_head(&parser->tokens);
> -        json = json_parser_feed(&ctxt, token, &err);
> -        g_free(token);
> -        if (json || err) {
> -            break;
> +        if (err) {
> +            parser->emit(parser->opaque, NULL, err);
> +            /* start recovery */
> +            parser->error = true;
>          }
>      }
>  
> -    json_parser_destroy(&ctxt);
> -
> -out_emit:
> -    parser->brace_count = 0;
> -    parser->bracket_count = 0;
> -    json_message_free_tokens(parser);
> -    parser->token_size = 0;
> -    parser->emit(parser->opaque, json, err);
> +    if ((parser->brace_count == 0 && parser->bracket_count == 0)
> +        || type == JSON_END_OF_INPUT) {
> +        parser->error = false;
> +        parser->brace_count = 0;
> +        parser->bracket_count = 0;
> +        parser->token_count = 0;
> +        parser->token_size = 0;
> +        json_parser_reset(&parser->parser);
> +    }
>  }
>  
>  void json_message_parser_init(JSONMessageParser *parser,
> @@ -119,12 +95,13 @@ void json_message_parser_init(JSONMessageParser *parser,
>  {
>      parser->emit = emit;
>      parser->opaque = opaque;
> -    parser->ap = ap;
> +    parser->error = false;
>      parser->brace_count = 0;
>      parser->bracket_count = 0;
> -    g_queue_init(&parser->tokens);
> +    parser->token_count = 0;
>      parser->token_size = 0;
>  
> +    json_parser_init(&parser->parser, ap);
>      json_lexer_init(&parser->lexer, !!ap);
>  }
>  
> @@ -137,11 +114,10 @@ void json_message_parser_feed(JSONMessageParser *parser,
>  void json_message_parser_flush(JSONMessageParser *parser)
>  {
>      json_lexer_flush(&parser->lexer);
> -    assert(g_queue_is_empty(&parser->tokens));
>  }
>  
>  void json_message_parser_destroy(JSONMessageParser *parser)
>  {
>      json_lexer_destroy(&parser->lexer);
> -    json_message_free_tokens(parser);
> +    json_parser_destroy(&parser->parser);
>  }

Not mentioned in the commit message: before the patch, the "streamer"
calls json_parser_init() and json_parser_destroy() for each balanced
expression.  Afterwards, it calls them once, and instea calls
json_parser_reset() between balanced expressions.

I wonder whether it could be a separate patch.


Reply via email to