This is an automated email from the ASF dual-hosted git repository.

jamesge pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-brpc.git


The following commit(s) were added to refs/heads/master by this push:
     new 958a4c3a json2pb::JsonToProtoMessage() supports parsing multiple jsons
958a4c3a is described below

commit 958a4c3a0c2b5811400e042a216d651fd11e02a1
Author: jamesge <jge...@gmail.com>
AuthorDate: Sun Jun 5 17:59:30 2022 +0800

    json2pb::JsonToProtoMessage() supports parsing multiple jsons
---
 src/butil/iobuf.h                        |   6 +-
 src/butil/strings/string_util.h          |   4 ++
 src/butil/third_party/rapidjson/reader.h |   5 ++
 src/json2pb/json_to_pb.cpp               | 106 +++++++++++++++++++++----------
 src/json2pb/json_to_pb.h                 |  33 ++++++++--
 src/json2pb/rapidjson.h                  |   1 +
 test/addressbook.proto                   |   2 +-
 test/brpc_protobuf_json_unittest.cpp     | 106 +++++++++++++++++++++++++++++++
 8 files changed, 219 insertions(+), 44 deletions(-)

diff --git a/src/butil/iobuf.h b/src/butil/iobuf.h
index c2743096..8e2bfe47 100644
--- a/src/butil/iobuf.h
+++ b/src/butil/iobuf.h
@@ -296,9 +296,9 @@ public:
     // Returns bytes copied.
     size_t copy_to(void* buf, size_t n = (size_t)-1L, size_t pos = 0) const;
 
-    // NOTE: first parameter is not std::string& because user may passes
-    // a pointer of std::string by mistake, in which case, compiler would
-    // call the void* version which crashes definitely.
+    // NOTE: first parameter is not std::string& because user may pass in
+    // a pointer of std::string by mistake, in which case, the void* overload
+    // would be wrongly called.
     size_t copy_to(std::string* s, size_t n = (size_t)-1L, size_t pos = 0) 
const;
     size_t append_to(std::string* s, size_t n = (size_t)-1L, size_t pos = 0) 
const;
 
diff --git a/src/butil/strings/string_util.h b/src/butil/strings/string_util.h
index 23bc01e4..4d78e20f 100644
--- a/src/butil/strings/string_util.h
+++ b/src/butil/strings/string_util.h
@@ -377,6 +377,10 @@ inline bool IsWhitespace(wchar_t c) {
   return wcschr(butil::kWhitespaceWide, c) != NULL;
 }
 
+inline bool IsBlankString(const butil::StringPiece &s) {
+    return butil::ContainsOnlyChars(s, " \r\n\t");
+}
+
 // Return a byte string in human-readable format with a unit suffix. Not
 // appropriate for use in any UI; use of FormatBytes and friends in ui/base is
 // highly recommended instead. TODO(avi): Figure out how to get callers to use
diff --git a/src/butil/third_party/rapidjson/reader.h 
b/src/butil/third_party/rapidjson/reader.h
index 552eca03..fd6398b5 100644
--- a/src/butil/third_party/rapidjson/reader.h
+++ b/src/butil/third_party/rapidjson/reader.h
@@ -430,6 +430,11 @@ public:
                     
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
                 }
+            } else {
+                // jge: Update parseResult_.Offset() when 
kParseStopwhendoneflag
+                // is set which means the user needs to know where to resume
+                // parsing in next calls to JsonToProtoMessage()
+                SetParseError(kParseErrorNone, is.Tell());
             }
         }
 
diff --git a/src/json2pb/json_to_pb.cpp b/src/json2pb/json_to_pb.cpp
index e87c3922..fc9a31a0 100644
--- a/src/json2pb/json_to_pb.cpp
+++ b/src/json2pb/json_to_pb.cpp
@@ -25,6 +25,8 @@
 #include <limits> 
 #include <google/protobuf/descriptor.h>
 #include "butil/strings/string_number_conversions.h"
+#include "butil/third_party/rapidjson/error/error.h"
+#include "butil/third_party/rapidjson/rapidjson.h"
 #include "json_to_pb.h"
 #include "zero_copy_stream_reader.h"       // ZeroCopyStreamReader
 #include "encode_decode.h"
@@ -33,12 +35,19 @@
 #include "protobuf_map.h"
 #include "rapidjson.h"
 
-#define J2PERROR(perr, fmt, ...)                                        \
+
+#define J2PERROR(perr, fmt, ...)                                    \
+    J2PERROR_WITH_PB((::google::protobuf::Message*)nullptr, perr, fmt, 
##__VA_ARGS__)
+
+#define J2PERROR_WITH_PB(pb, perr, fmt, ...)                            \
     if (perr) {                                                         \
         if (!perr->empty()) {                                           \
             perr->append(", ", 2);                                      \
         }                                                               \
-        butil::string_appendf(perr, fmt, ##__VA_ARGS__);                 \
+        butil::string_appendf(perr, fmt, ##__VA_ARGS__);                \
+        if ((pb) != nullptr) {                                            \
+            butil::string_appendf(perr, " [%s]", 
(pb)->GetDescriptor()->name().c_str());  \
+        }                                                               \
     } else { }
 
 namespace json2pb {
@@ -49,7 +58,8 @@ Json2PbOptions::Json2PbOptions()
 #else
     : base64_to_bytes(true)
 #endif
-    , array_to_single_repeated(false) {
+    , array_to_single_repeated(false)
+    , allow_remaining_bytes_after_parsing(false) {
 }
 
 enum MatchType { 
@@ -412,7 +422,7 @@ static bool JsonValueToProtoField(const 
BUTIL_RAPIDJSON_NAMESPACE::Value& value,
                         options.base64_to_bytes) {
                         std::string str_decoded;
                         if (!butil::Base64Decode(str, &str_decoded)) {
-                            J2PERROR(err, "Fail to decode base64 string=%s", 
str.c_str());
+                            J2PERROR_WITH_PB(message, err, "Fail to decode 
base64 string=%s", str.c_str());
                             return false;
                         }
                         str = str_decoded;
@@ -426,7 +436,7 @@ static bool JsonValueToProtoField(const 
BUTIL_RAPIDJSON_NAMESPACE::Value& value,
                 options.base64_to_bytes) {
                 std::string str_decoded;
                 if (!butil::Base64Decode(str, &str_decoded)) {
-                    J2PERROR(err, "Fail to decode base64 string=%s", 
str.c_str());
+                    J2PERROR_WITH_PB(message, err, "Fail to decode base64 
string=%s", str.c_str());
                     return false;
                 }
                 str = str_decoded;
@@ -509,7 +519,7 @@ bool JsonValueToProtoMessage(const 
BUTIL_RAPIDJSON_NAMESPACE::Value& json_value,
                              std::string* err) {
     const google::protobuf::Descriptor* descriptor = message->GetDescriptor();
     if (!json_value.IsObject() && !(json_value.IsArray() && 
options.array_to_single_repeated)) {
-        J2PERROR(err, "`json_value' is not a json object. %s", 
descriptor->name().c_str());
+        J2PERROR_WITH_PB(message, err, "The input is not a json object");
         return false;
     }
 
@@ -538,7 +548,7 @@ bool JsonValueToProtoMessage(const 
BUTIL_RAPIDJSON_NAMESPACE::Value& json_value,
             return JsonValueToProtoField(json_value, fields.front(), message, 
options, err);
         }
 
-        J2PERROR(err, "`json_value' of type array is not allowed here.");
+        J2PERROR_WITH_PB(message, err, "the input json can't be array here");
         return false;
     }
 
@@ -589,55 +599,89 @@ bool JsonValueToProtoMessage(const 
BUTIL_RAPIDJSON_NAMESPACE::Value& json_value,
     return true;
 }
 
-bool ZeroCopyStreamToJson(BUTIL_RAPIDJSON_NAMESPACE::Document *dest, 
-                          google::protobuf::io::ZeroCopyInputStream *stream) {
-    ZeroCopyStreamReader stream_reader(stream);
-    dest->ParseStream<0, BUTIL_RAPIDJSON_NAMESPACE::UTF8<> >(stream_reader);
-    return !dest->HasParseError();
-}
-
 inline bool JsonToProtoMessageInline(const std::string& json_string, 
                         google::protobuf::Message* message,
                         const Json2PbOptions& options,
-                        std::string* error) {
+                        std::string* error,
+                        size_t* parsed_offset) {
     if (error) {
         error->clear();
     }
     BUTIL_RAPIDJSON_NAMESPACE::Document d;
-    d.Parse<0>(json_string.c_str());
+    if (options.allow_remaining_bytes_after_parsing) {
+        
d.Parse<BUTIL_RAPIDJSON_NAMESPACE::kParseStopWhenDoneFlag>(json_string.c_str());
+        if (parsed_offset != nullptr) {
+            *parsed_offset = d.GetErrorOffset();
+        }
+    } else {
+        d.Parse<0>(json_string.c_str());
+    }
     if (d.HasParseError()) {
-        J2PERROR(error, "Invalid json format");
+        if (options.allow_remaining_bytes_after_parsing) {
+            if (d.GetParseError() == 
BUTIL_RAPIDJSON_NAMESPACE::kParseErrorDocumentEmpty) {
+                // This is usual when parsing multiple jsons, don't waste time
+                // on setting the `empty error'
+                return false;
+            }
+        }
+        J2PERROR_WITH_PB(message, error, "Invalid json: %s", 
BUTIL_RAPIDJSON_NAMESPACE::GetParseError_En(d.GetParseError()));
         return false;
     }
-    return json2pb::JsonValueToProtoMessage(d, message, options, error);
+    return JsonValueToProtoMessage(d, message, options, error);
 }
 
 bool JsonToProtoMessage(const std::string& json_string,
                         google::protobuf::Message* message,
                         const Json2PbOptions& options,
-                        std::string* error) {
-    return JsonToProtoMessageInline(json_string, message, options, error);
+                        std::string* error,
+                        size_t* parsed_offset) {
+    return JsonToProtoMessageInline(json_string, message, options, error, 
parsed_offset);
 }
 
 bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream* stream,
                         google::protobuf::Message* message,
                         const Json2PbOptions& options,
-                        std::string* error) {
+                        std::string* error,
+                        size_t* parsed_offset) {
+    ZeroCopyStreamReader reader(stream);
+    return JsonToProtoMessage(&reader, message, options, error, parsed_offset);
+}
+
+bool JsonToProtoMessage(ZeroCopyStreamReader* reader,
+                        google::protobuf::Message* message,
+                        const Json2PbOptions& options,
+                        std::string* error,
+                        size_t* parsed_offset) {
     if (error) {
         error->clear();
     }
     BUTIL_RAPIDJSON_NAMESPACE::Document d;
-    if (!json2pb::ZeroCopyStreamToJson(&d, stream)) {
-        J2PERROR(error, "Invalid json format");
+    if (options.allow_remaining_bytes_after_parsing) {
+        d.ParseStream<BUTIL_RAPIDJSON_NAMESPACE::kParseStopWhenDoneFlag, 
BUTIL_RAPIDJSON_NAMESPACE::UTF8<>>(*reader);
+        if (parsed_offset != nullptr) {
+            *parsed_offset = d.GetErrorOffset();
+        }
+    } else {
+        d.ParseStream<0, BUTIL_RAPIDJSON_NAMESPACE::UTF8<>>(*reader);
+    }
+    if (d.HasParseError()) {
+        if (options.allow_remaining_bytes_after_parsing) {
+            if (d.GetParseError() == 
BUTIL_RAPIDJSON_NAMESPACE::kParseErrorDocumentEmpty) {
+                // This is usual when parsing multiple jsons, don't waste time
+                // on setting the `empty error'
+                return false;
+            }
+        }
+        J2PERROR_WITH_PB(message, error, "Invalid json: %s", 
BUTIL_RAPIDJSON_NAMESPACE::GetParseError_En(d.GetParseError()));
         return false;
     }
-    return json2pb::JsonValueToProtoMessage(d, message, options, error);
+    return JsonValueToProtoMessage(d, message, options, error);
 }
 
 bool JsonToProtoMessage(const std::string& json_string, 
                         google::protobuf::Message* message,
                         std::string* error) {
-    return JsonToProtoMessageInline(json_string, message, Json2PbOptions(), 
error);
+    return JsonToProtoMessageInline(json_string, message, Json2PbOptions(), 
error, nullptr);
 }
 
 // For ABI compatibility with 1.0.0.0
@@ -647,21 +691,13 @@ bool JsonToProtoMessage(const std::string& json_string,
 bool JsonToProtoMessage(std::string json_string, 
                         google::protobuf::Message* message,
                         std::string* error) {
-    return JsonToProtoMessageInline(json_string, message, Json2PbOptions(), 
error);
+    return JsonToProtoMessageInline(json_string, message, Json2PbOptions(), 
error, nullptr);
 }
 
 bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream *stream,
                         google::protobuf::Message* message,
                         std::string* error) {
-    if (error) {
-        error->clear();
-    }
-    BUTIL_RAPIDJSON_NAMESPACE::Document d;
-    if (!json2pb::ZeroCopyStreamToJson(&d, stream)) {
-        J2PERROR(error, "Invalid json format");
-        return false;
-    }
-    return json2pb::JsonValueToProtoMessage(d, message, Json2PbOptions(), 
error);
+    return JsonToProtoMessage(stream, message, Json2PbOptions(), error, 
nullptr);
 }
 } //namespace json2pb
 
diff --git a/src/json2pb/json_to_pb.h b/src/json2pb/json_to_pb.h
index 67f5bdf5..44203e08 100644
--- a/src/json2pb/json_to_pb.h
+++ b/src/json2pb/json_to_pb.h
@@ -20,6 +20,7 @@
 #ifndef BRPC_JSON2PB_JSON_TO_PB_H
 #define BRPC_JSON2PB_JSON_TO_PB_H
 
+#include "json2pb/zero_copy_stream_reader.h"
 #include <google/protobuf/message.h>
 #include <google/protobuf/io/zero_copy_stream.h>    // ZeroCopyInputStream
 
@@ -36,30 +37,52 @@ struct Json2PbOptions {
     // Allow decoding json array iff there is only one repeated field.
     // Default: false.
     bool array_to_single_repeated;
+
+    // Allow more bytes remaining in the input after parsing the first json
+    // object. Useful when the input contains more than one json object.
+    bool allow_remaining_bytes_after_parsing;
 };
 
 // Convert `json' to protobuf `message'.
 // Returns true on success. `error' (if not NULL) will be set with error
 // message on failure.
+//
+// [When options.allow_remaining_bytes_after_parsing is true]
+// * `parse_offset' will be set with #bytes parsed
+// * the function still returns false on empty document but the `error' is set
+//   to empty string instead of `The document is empty'.
 bool JsonToProtoMessage(const std::string& json,
                         google::protobuf::Message* message,
                         const Json2PbOptions& options,
-                        std::string* error = NULL);
+                        std::string* error = nullptr,
+                        size_t* parsed_offset = nullptr);
 
-// send output to ZeroCopyOutputStream instead of std::string.
+// Use ZeroCopyInputStream as input instead of std::string.
 bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream *json,
+                        google::protobuf::Message *message,
+                        const Json2PbOptions &options,
+                        std::string *error = nullptr,
+                        size_t *parsed_offset = nullptr);
+
+// Use ZeroCopyStreamReader as input instead of std::string.
+// If you need to parse multiple jsons from IOBuf, you should use this
+// overload instead of the ZeroCopyInputStream one which bases on this
+// and recreates a ZeroCopyStreamReader internally that can't be reused
+// between continuous calls.
+bool JsonToProtoMessage(ZeroCopyStreamReader *json,
                         google::protobuf::Message* message,
                         const Json2PbOptions& options,
-                        std::string* error = NULL);
+                        std::string* error = nullptr,
+                        size_t* parsed_offset = nullptr);
 
 // Using default Json2PbOptions.
 bool JsonToProtoMessage(const std::string& json,
                         google::protobuf::Message* message,
-                        std::string* error = NULL);
+                        std::string* error = nullptr);
 
 bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream* stream,
                         google::protobuf::Message* message,
-                        std::string* error = NULL);
+                        std::string* error = nullptr);
 } // namespace json2pb
 
 #endif // BRPC_JSON2PB_JSON_TO_PB_H
diff --git a/src/json2pb/rapidjson.h b/src/json2pb/rapidjson.h
index d2cf3b68..fa5d354a 100644
--- a/src/json2pb/rapidjson.h
+++ b/src/json2pb/rapidjson.h
@@ -39,6 +39,7 @@
 #include "butil/third_party/rapidjson/stringbuffer.h"
 #include "butil/third_party/rapidjson/writer.h"
 #include "butil/third_party/rapidjson/optimized_writer.h"
+#include "butil/third_party/rapidjson/error/en.h"  // GetErrorCode_En
 
 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
 #pragma GCC diagnostic pop
diff --git a/test/addressbook.proto b/test/addressbook.proto
index fdbda4fc..068334f8 100644
--- a/test/addressbook.proto
+++ b/test/addressbook.proto
@@ -44,7 +44,7 @@ message Person {
 
     required double datadouble = 8;
 
-    required float  datafloat = 9; 
+    optional float  datafloat = 9; 
   
     optional uint32 datau32 = 10;
   
diff --git a/test/brpc_protobuf_json_unittest.cpp 
b/test/brpc_protobuf_json_unittest.cpp
index 64af4541..1469f498 100644
--- a/test/brpc_protobuf_json_unittest.cpp
+++ b/test/brpc_protobuf_json_unittest.cpp
@@ -22,12 +22,15 @@
 #include <string>
 #include <google/protobuf/text_format.h>
 #include "butil/iobuf.h"
+#include "butil/string_printf.h"
+#include "butil/strings/string_util.h"
 #include "butil/third_party/rapidjson/rapidjson.h"
 #include "butil/time.h"
 #include "butil/gperftools_profiler.h"
 #include "json2pb/pb_to_json.h"
 #include "json2pb/json_to_pb.h"
 #include "json2pb/encode_decode.h"
+#include "json2pb/zero_copy_stream_reader.h"
 #include "message.pb.h"
 #include "addressbook1.pb.h"
 #include "addressbook.pb.h"
@@ -1540,4 +1543,107 @@ TEST_F(ProtobufJsonTest, string_to_int64) {
     ASSERT_EQ(person.data(), 1234567);
 }
 
+TEST_F(ProtobufJsonTest, parse_multiple_json) {
+    const int COUNT = 4;
+    std::vector<std::string> expectedNames = { "tom", "bob", "jerry", "lucy" };
+    std::vector<int> expectedIds = { 33, 12, 2432, 435 };
+    std::vector<double> expectedData = { 1.0, 2.0, 3.0, 4.0 };
+    std::string jsonStr;
+    butil::IOBuf jsonBuf;
+    for (int i = 0; i < COUNT; ++i) {
+        const std::string d =
+            butil::string_printf(R"( { "name":"%s", "id":%d, "datadouble":%f } 
)",
+                              expectedNames[i].c_str(),
+                              expectedIds[i],
+                              expectedData[i]);
+        jsonStr.append(d);
+        jsonBuf.append(d);
+    }
+    
+    Person req;
+    json2pb::Json2PbOptions copt;
+    copt.allow_remaining_bytes_after_parsing = true;
+    std::string err;
+    
+    for (int i = 0; true; ++i) {
+        req.Clear();
+        size_t offset;
+        if (json2pb::JsonToProtoMessage(jsonStr, &req, copt, &err, &offset)) {
+            jsonStr = jsonStr.substr(offset);
+            ASSERT_EQ(expectedNames[i], req.name());
+            ASSERT_EQ(expectedIds[i], req.id());
+            ASSERT_EQ(expectedData[i], req.datadouble());
+            
+            std::cout << "parsed=" << req.ShortDebugString() << " 
after_offset=" << jsonStr << std::endl;
+        } else {
+            if (err.empty()) {
+                // document is empty
+                break;
+            }
+            std::cerr << "error=" << err << " offset=" << offset << std::endl;
+            ASSERT_FALSE(true);
+        }
+    }
+
+    butil::IOBufAsZeroCopyInputStream stream(jsonBuf);
+    json2pb::ZeroCopyStreamReader reader(&stream);
+
+    for (int i = 0; true; ++i) {
+        req.Clear();
+        size_t offset;
+        auto res = json2pb::JsonToProtoMessage(&reader, &req, copt, &err, 
&offset);
+        if (res) {
+            ASSERT_EQ(expectedNames[i], req.name());
+            ASSERT_EQ(expectedIds[i], req.id());
+            ASSERT_EQ(expectedData[i], req.datadouble());
+            std::string afterOffset;
+            jsonBuf.copy_to(&afterOffset, (size_t)-1L, offset);
+            std::cout << "parsed=" << req.ShortDebugString() << " 
after_offset=" << afterOffset << std::endl;
+        } else {
+            if (err.empty()) {
+                // document is empty
+                break;
+            }
+            std::cerr << "error=" << err << " offset=" << offset << std::endl;
+            ASSERT_FALSE(true) << i;
+        }
+    }
 }
+
+TEST_F(ProtobufJsonTest, parse_multiple_json_error) {
+    std::string jsonStr = R"( { "name":"tom", "id":323, "datadouble":3.2 }  
abc )";
+    butil::IOBuf jsonBuf;
+    jsonBuf.append(jsonStr);
+    
+    Person req;
+    json2pb::Json2PbOptions copt;
+    copt.allow_remaining_bytes_after_parsing = true;
+    std::string err;
+    size_t offset;
+    
+    ASSERT_TRUE(json2pb::JsonToProtoMessage(jsonStr, &req, copt, &err, 
&offset));
+    jsonStr = jsonStr.substr(offset);
+    ASSERT_STREQ("tom", req.name().c_str());
+    ASSERT_EQ(323, req.id());
+    ASSERT_EQ(3.2, req.datadouble());
+
+    req.Clear();
+    ASSERT_FALSE(json2pb::JsonToProtoMessage(jsonStr, &req, copt, &err, 
&offset));
+    ASSERT_STREQ("Invalid json: Invalid value. [Person]", err.c_str());
+    ASSERT_EQ(2ul, offset);
+
+    butil::IOBufAsZeroCopyInputStream stream(jsonBuf);
+    json2pb::ZeroCopyStreamReader reader(&stream);
+    req.Clear();
+    ASSERT_TRUE(json2pb::JsonToProtoMessage(&reader, &req, copt, &err, 
&offset));
+    ASSERT_STREQ("tom", req.name().c_str());
+    ASSERT_EQ(323, req.id());
+    ASSERT_EQ(3.2, req.datadouble());
+
+    req.Clear();
+    ASSERT_FALSE(json2pb::JsonToProtoMessage(&reader, &req, copt, &err, 
&offset));
+    ASSERT_STREQ("Invalid json: Invalid value. [Person]", err.c_str());
+    ASSERT_EQ(47ul, offset);
+}
+
+} // namespace


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@brpc.apache.org
For additional commands, e-mail: dev-h...@brpc.apache.org

Reply via email to