This is an automated email from the ASF dual-hosted git repository. jamesge pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-brpc.git
The following commit(s) were added to refs/heads/master by this push: new 958a4c3a json2pb::JsonToProtoMessage() supports parsing multiple jsons 958a4c3a is described below commit 958a4c3a0c2b5811400e042a216d651fd11e02a1 Author: jamesge <jge...@gmail.com> AuthorDate: Sun Jun 5 17:59:30 2022 +0800 json2pb::JsonToProtoMessage() supports parsing multiple jsons --- src/butil/iobuf.h | 6 +- src/butil/strings/string_util.h | 4 ++ src/butil/third_party/rapidjson/reader.h | 5 ++ src/json2pb/json_to_pb.cpp | 106 +++++++++++++++++++++---------- src/json2pb/json_to_pb.h | 33 ++++++++-- src/json2pb/rapidjson.h | 1 + test/addressbook.proto | 2 +- test/brpc_protobuf_json_unittest.cpp | 106 +++++++++++++++++++++++++++++++ 8 files changed, 219 insertions(+), 44 deletions(-) diff --git a/src/butil/iobuf.h b/src/butil/iobuf.h index c2743096..8e2bfe47 100644 --- a/src/butil/iobuf.h +++ b/src/butil/iobuf.h @@ -296,9 +296,9 @@ public: // Returns bytes copied. size_t copy_to(void* buf, size_t n = (size_t)-1L, size_t pos = 0) const; - // NOTE: first parameter is not std::string& because user may passes - // a pointer of std::string by mistake, in which case, compiler would - // call the void* version which crashes definitely. + // NOTE: first parameter is not std::string& because user may pass in + // a pointer of std::string by mistake, in which case, the void* overload + // would be wrongly called. size_t copy_to(std::string* s, size_t n = (size_t)-1L, size_t pos = 0) const; size_t append_to(std::string* s, size_t n = (size_t)-1L, size_t pos = 0) const; diff --git a/src/butil/strings/string_util.h b/src/butil/strings/string_util.h index 23bc01e4..4d78e20f 100644 --- a/src/butil/strings/string_util.h +++ b/src/butil/strings/string_util.h @@ -377,6 +377,10 @@ inline bool IsWhitespace(wchar_t c) { return wcschr(butil::kWhitespaceWide, c) != NULL; } +inline bool IsBlankString(const butil::StringPiece &s) { + return butil::ContainsOnlyChars(s, " \r\n\t"); +} + // Return a byte string in human-readable format with a unit suffix. Not // appropriate for use in any UI; use of FormatBytes and friends in ui/base is // highly recommended instead. TODO(avi): Figure out how to get callers to use diff --git a/src/butil/third_party/rapidjson/reader.h b/src/butil/third_party/rapidjson/reader.h index 552eca03..fd6398b5 100644 --- a/src/butil/third_party/rapidjson/reader.h +++ b/src/butil/third_party/rapidjson/reader.h @@ -430,6 +430,11 @@ public: RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); } + } else { + // jge: Update parseResult_.Offset() when kParseStopwhendoneflag + // is set which means the user needs to know where to resume + // parsing in next calls to JsonToProtoMessage() + SetParseError(kParseErrorNone, is.Tell()); } } diff --git a/src/json2pb/json_to_pb.cpp b/src/json2pb/json_to_pb.cpp index e87c3922..fc9a31a0 100644 --- a/src/json2pb/json_to_pb.cpp +++ b/src/json2pb/json_to_pb.cpp @@ -25,6 +25,8 @@ #include <limits> #include <google/protobuf/descriptor.h> #include "butil/strings/string_number_conversions.h" +#include "butil/third_party/rapidjson/error/error.h" +#include "butil/third_party/rapidjson/rapidjson.h" #include "json_to_pb.h" #include "zero_copy_stream_reader.h" // ZeroCopyStreamReader #include "encode_decode.h" @@ -33,12 +35,19 @@ #include "protobuf_map.h" #include "rapidjson.h" -#define J2PERROR(perr, fmt, ...) \ + +#define J2PERROR(perr, fmt, ...) \ + J2PERROR_WITH_PB((::google::protobuf::Message*)nullptr, perr, fmt, ##__VA_ARGS__) + +#define J2PERROR_WITH_PB(pb, perr, fmt, ...) \ if (perr) { \ if (!perr->empty()) { \ perr->append(", ", 2); \ } \ - butil::string_appendf(perr, fmt, ##__VA_ARGS__); \ + butil::string_appendf(perr, fmt, ##__VA_ARGS__); \ + if ((pb) != nullptr) { \ + butil::string_appendf(perr, " [%s]", (pb)->GetDescriptor()->name().c_str()); \ + } \ } else { } namespace json2pb { @@ -49,7 +58,8 @@ Json2PbOptions::Json2PbOptions() #else : base64_to_bytes(true) #endif - , array_to_single_repeated(false) { + , array_to_single_repeated(false) + , allow_remaining_bytes_after_parsing(false) { } enum MatchType { @@ -412,7 +422,7 @@ static bool JsonValueToProtoField(const BUTIL_RAPIDJSON_NAMESPACE::Value& value, options.base64_to_bytes) { std::string str_decoded; if (!butil::Base64Decode(str, &str_decoded)) { - J2PERROR(err, "Fail to decode base64 string=%s", str.c_str()); + J2PERROR_WITH_PB(message, err, "Fail to decode base64 string=%s", str.c_str()); return false; } str = str_decoded; @@ -426,7 +436,7 @@ static bool JsonValueToProtoField(const BUTIL_RAPIDJSON_NAMESPACE::Value& value, options.base64_to_bytes) { std::string str_decoded; if (!butil::Base64Decode(str, &str_decoded)) { - J2PERROR(err, "Fail to decode base64 string=%s", str.c_str()); + J2PERROR_WITH_PB(message, err, "Fail to decode base64 string=%s", str.c_str()); return false; } str = str_decoded; @@ -509,7 +519,7 @@ bool JsonValueToProtoMessage(const BUTIL_RAPIDJSON_NAMESPACE::Value& json_value, std::string* err) { const google::protobuf::Descriptor* descriptor = message->GetDescriptor(); if (!json_value.IsObject() && !(json_value.IsArray() && options.array_to_single_repeated)) { - J2PERROR(err, "`json_value' is not a json object. %s", descriptor->name().c_str()); + J2PERROR_WITH_PB(message, err, "The input is not a json object"); return false; } @@ -538,7 +548,7 @@ bool JsonValueToProtoMessage(const BUTIL_RAPIDJSON_NAMESPACE::Value& json_value, return JsonValueToProtoField(json_value, fields.front(), message, options, err); } - J2PERROR(err, "`json_value' of type array is not allowed here."); + J2PERROR_WITH_PB(message, err, "the input json can't be array here"); return false; } @@ -589,55 +599,89 @@ bool JsonValueToProtoMessage(const BUTIL_RAPIDJSON_NAMESPACE::Value& json_value, return true; } -bool ZeroCopyStreamToJson(BUTIL_RAPIDJSON_NAMESPACE::Document *dest, - google::protobuf::io::ZeroCopyInputStream *stream) { - ZeroCopyStreamReader stream_reader(stream); - dest->ParseStream<0, BUTIL_RAPIDJSON_NAMESPACE::UTF8<> >(stream_reader); - return !dest->HasParseError(); -} - inline bool JsonToProtoMessageInline(const std::string& json_string, google::protobuf::Message* message, const Json2PbOptions& options, - std::string* error) { + std::string* error, + size_t* parsed_offset) { if (error) { error->clear(); } BUTIL_RAPIDJSON_NAMESPACE::Document d; - d.Parse<0>(json_string.c_str()); + if (options.allow_remaining_bytes_after_parsing) { + d.Parse<BUTIL_RAPIDJSON_NAMESPACE::kParseStopWhenDoneFlag>(json_string.c_str()); + if (parsed_offset != nullptr) { + *parsed_offset = d.GetErrorOffset(); + } + } else { + d.Parse<0>(json_string.c_str()); + } if (d.HasParseError()) { - J2PERROR(error, "Invalid json format"); + if (options.allow_remaining_bytes_after_parsing) { + if (d.GetParseError() == BUTIL_RAPIDJSON_NAMESPACE::kParseErrorDocumentEmpty) { + // This is usual when parsing multiple jsons, don't waste time + // on setting the `empty error' + return false; + } + } + J2PERROR_WITH_PB(message, error, "Invalid json: %s", BUTIL_RAPIDJSON_NAMESPACE::GetParseError_En(d.GetParseError())); return false; } - return json2pb::JsonValueToProtoMessage(d, message, options, error); + return JsonValueToProtoMessage(d, message, options, error); } bool JsonToProtoMessage(const std::string& json_string, google::protobuf::Message* message, const Json2PbOptions& options, - std::string* error) { - return JsonToProtoMessageInline(json_string, message, options, error); + std::string* error, + size_t* parsed_offset) { + return JsonToProtoMessageInline(json_string, message, options, error, parsed_offset); } bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream* stream, google::protobuf::Message* message, const Json2PbOptions& options, - std::string* error) { + std::string* error, + size_t* parsed_offset) { + ZeroCopyStreamReader reader(stream); + return JsonToProtoMessage(&reader, message, options, error, parsed_offset); +} + +bool JsonToProtoMessage(ZeroCopyStreamReader* reader, + google::protobuf::Message* message, + const Json2PbOptions& options, + std::string* error, + size_t* parsed_offset) { if (error) { error->clear(); } BUTIL_RAPIDJSON_NAMESPACE::Document d; - if (!json2pb::ZeroCopyStreamToJson(&d, stream)) { - J2PERROR(error, "Invalid json format"); + if (options.allow_remaining_bytes_after_parsing) { + d.ParseStream<BUTIL_RAPIDJSON_NAMESPACE::kParseStopWhenDoneFlag, BUTIL_RAPIDJSON_NAMESPACE::UTF8<>>(*reader); + if (parsed_offset != nullptr) { + *parsed_offset = d.GetErrorOffset(); + } + } else { + d.ParseStream<0, BUTIL_RAPIDJSON_NAMESPACE::UTF8<>>(*reader); + } + if (d.HasParseError()) { + if (options.allow_remaining_bytes_after_parsing) { + if (d.GetParseError() == BUTIL_RAPIDJSON_NAMESPACE::kParseErrorDocumentEmpty) { + // This is usual when parsing multiple jsons, don't waste time + // on setting the `empty error' + return false; + } + } + J2PERROR_WITH_PB(message, error, "Invalid json: %s", BUTIL_RAPIDJSON_NAMESPACE::GetParseError_En(d.GetParseError())); return false; } - return json2pb::JsonValueToProtoMessage(d, message, options, error); + return JsonValueToProtoMessage(d, message, options, error); } bool JsonToProtoMessage(const std::string& json_string, google::protobuf::Message* message, std::string* error) { - return JsonToProtoMessageInline(json_string, message, Json2PbOptions(), error); + return JsonToProtoMessageInline(json_string, message, Json2PbOptions(), error, nullptr); } // For ABI compatibility with 1.0.0.0 @@ -647,21 +691,13 @@ bool JsonToProtoMessage(const std::string& json_string, bool JsonToProtoMessage(std::string json_string, google::protobuf::Message* message, std::string* error) { - return JsonToProtoMessageInline(json_string, message, Json2PbOptions(), error); + return JsonToProtoMessageInline(json_string, message, Json2PbOptions(), error, nullptr); } bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream *stream, google::protobuf::Message* message, std::string* error) { - if (error) { - error->clear(); - } - BUTIL_RAPIDJSON_NAMESPACE::Document d; - if (!json2pb::ZeroCopyStreamToJson(&d, stream)) { - J2PERROR(error, "Invalid json format"); - return false; - } - return json2pb::JsonValueToProtoMessage(d, message, Json2PbOptions(), error); + return JsonToProtoMessage(stream, message, Json2PbOptions(), error, nullptr); } } //namespace json2pb diff --git a/src/json2pb/json_to_pb.h b/src/json2pb/json_to_pb.h index 67f5bdf5..44203e08 100644 --- a/src/json2pb/json_to_pb.h +++ b/src/json2pb/json_to_pb.h @@ -20,6 +20,7 @@ #ifndef BRPC_JSON2PB_JSON_TO_PB_H #define BRPC_JSON2PB_JSON_TO_PB_H +#include "json2pb/zero_copy_stream_reader.h" #include <google/protobuf/message.h> #include <google/protobuf/io/zero_copy_stream.h> // ZeroCopyInputStream @@ -36,30 +37,52 @@ struct Json2PbOptions { // Allow decoding json array iff there is only one repeated field. // Default: false. bool array_to_single_repeated; + + // Allow more bytes remaining in the input after parsing the first json + // object. Useful when the input contains more than one json object. + bool allow_remaining_bytes_after_parsing; }; // Convert `json' to protobuf `message'. // Returns true on success. `error' (if not NULL) will be set with error // message on failure. +// +// [When options.allow_remaining_bytes_after_parsing is true] +// * `parse_offset' will be set with #bytes parsed +// * the function still returns false on empty document but the `error' is set +// to empty string instead of `The document is empty'. bool JsonToProtoMessage(const std::string& json, google::protobuf::Message* message, const Json2PbOptions& options, - std::string* error = NULL); + std::string* error = nullptr, + size_t* parsed_offset = nullptr); -// send output to ZeroCopyOutputStream instead of std::string. +// Use ZeroCopyInputStream as input instead of std::string. bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream *json, + google::protobuf::Message *message, + const Json2PbOptions &options, + std::string *error = nullptr, + size_t *parsed_offset = nullptr); + +// Use ZeroCopyStreamReader as input instead of std::string. +// If you need to parse multiple jsons from IOBuf, you should use this +// overload instead of the ZeroCopyInputStream one which bases on this +// and recreates a ZeroCopyStreamReader internally that can't be reused +// between continuous calls. +bool JsonToProtoMessage(ZeroCopyStreamReader *json, google::protobuf::Message* message, const Json2PbOptions& options, - std::string* error = NULL); + std::string* error = nullptr, + size_t* parsed_offset = nullptr); // Using default Json2PbOptions. bool JsonToProtoMessage(const std::string& json, google::protobuf::Message* message, - std::string* error = NULL); + std::string* error = nullptr); bool JsonToProtoMessage(google::protobuf::io::ZeroCopyInputStream* stream, google::protobuf::Message* message, - std::string* error = NULL); + std::string* error = nullptr); } // namespace json2pb #endif // BRPC_JSON2PB_JSON_TO_PB_H diff --git a/src/json2pb/rapidjson.h b/src/json2pb/rapidjson.h index d2cf3b68..fa5d354a 100644 --- a/src/json2pb/rapidjson.h +++ b/src/json2pb/rapidjson.h @@ -39,6 +39,7 @@ #include "butil/third_party/rapidjson/stringbuffer.h" #include "butil/third_party/rapidjson/writer.h" #include "butil/third_party/rapidjson/optimized_writer.h" +#include "butil/third_party/rapidjson/error/en.h" // GetErrorCode_En #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) #pragma GCC diagnostic pop diff --git a/test/addressbook.proto b/test/addressbook.proto index fdbda4fc..068334f8 100644 --- a/test/addressbook.proto +++ b/test/addressbook.proto @@ -44,7 +44,7 @@ message Person { required double datadouble = 8; - required float datafloat = 9; + optional float datafloat = 9; optional uint32 datau32 = 10; diff --git a/test/brpc_protobuf_json_unittest.cpp b/test/brpc_protobuf_json_unittest.cpp index 64af4541..1469f498 100644 --- a/test/brpc_protobuf_json_unittest.cpp +++ b/test/brpc_protobuf_json_unittest.cpp @@ -22,12 +22,15 @@ #include <string> #include <google/protobuf/text_format.h> #include "butil/iobuf.h" +#include "butil/string_printf.h" +#include "butil/strings/string_util.h" #include "butil/third_party/rapidjson/rapidjson.h" #include "butil/time.h" #include "butil/gperftools_profiler.h" #include "json2pb/pb_to_json.h" #include "json2pb/json_to_pb.h" #include "json2pb/encode_decode.h" +#include "json2pb/zero_copy_stream_reader.h" #include "message.pb.h" #include "addressbook1.pb.h" #include "addressbook.pb.h" @@ -1540,4 +1543,107 @@ TEST_F(ProtobufJsonTest, string_to_int64) { ASSERT_EQ(person.data(), 1234567); } +TEST_F(ProtobufJsonTest, parse_multiple_json) { + const int COUNT = 4; + std::vector<std::string> expectedNames = { "tom", "bob", "jerry", "lucy" }; + std::vector<int> expectedIds = { 33, 12, 2432, 435 }; + std::vector<double> expectedData = { 1.0, 2.0, 3.0, 4.0 }; + std::string jsonStr; + butil::IOBuf jsonBuf; + for (int i = 0; i < COUNT; ++i) { + const std::string d = + butil::string_printf(R"( { "name":"%s", "id":%d, "datadouble":%f } )", + expectedNames[i].c_str(), + expectedIds[i], + expectedData[i]); + jsonStr.append(d); + jsonBuf.append(d); + } + + Person req; + json2pb::Json2PbOptions copt; + copt.allow_remaining_bytes_after_parsing = true; + std::string err; + + for (int i = 0; true; ++i) { + req.Clear(); + size_t offset; + if (json2pb::JsonToProtoMessage(jsonStr, &req, copt, &err, &offset)) { + jsonStr = jsonStr.substr(offset); + ASSERT_EQ(expectedNames[i], req.name()); + ASSERT_EQ(expectedIds[i], req.id()); + ASSERT_EQ(expectedData[i], req.datadouble()); + + std::cout << "parsed=" << req.ShortDebugString() << " after_offset=" << jsonStr << std::endl; + } else { + if (err.empty()) { + // document is empty + break; + } + std::cerr << "error=" << err << " offset=" << offset << std::endl; + ASSERT_FALSE(true); + } + } + + butil::IOBufAsZeroCopyInputStream stream(jsonBuf); + json2pb::ZeroCopyStreamReader reader(&stream); + + for (int i = 0; true; ++i) { + req.Clear(); + size_t offset; + auto res = json2pb::JsonToProtoMessage(&reader, &req, copt, &err, &offset); + if (res) { + ASSERT_EQ(expectedNames[i], req.name()); + ASSERT_EQ(expectedIds[i], req.id()); + ASSERT_EQ(expectedData[i], req.datadouble()); + std::string afterOffset; + jsonBuf.copy_to(&afterOffset, (size_t)-1L, offset); + std::cout << "parsed=" << req.ShortDebugString() << " after_offset=" << afterOffset << std::endl; + } else { + if (err.empty()) { + // document is empty + break; + } + std::cerr << "error=" << err << " offset=" << offset << std::endl; + ASSERT_FALSE(true) << i; + } + } } + +TEST_F(ProtobufJsonTest, parse_multiple_json_error) { + std::string jsonStr = R"( { "name":"tom", "id":323, "datadouble":3.2 } abc )"; + butil::IOBuf jsonBuf; + jsonBuf.append(jsonStr); + + Person req; + json2pb::Json2PbOptions copt; + copt.allow_remaining_bytes_after_parsing = true; + std::string err; + size_t offset; + + ASSERT_TRUE(json2pb::JsonToProtoMessage(jsonStr, &req, copt, &err, &offset)); + jsonStr = jsonStr.substr(offset); + ASSERT_STREQ("tom", req.name().c_str()); + ASSERT_EQ(323, req.id()); + ASSERT_EQ(3.2, req.datadouble()); + + req.Clear(); + ASSERT_FALSE(json2pb::JsonToProtoMessage(jsonStr, &req, copt, &err, &offset)); + ASSERT_STREQ("Invalid json: Invalid value. [Person]", err.c_str()); + ASSERT_EQ(2ul, offset); + + butil::IOBufAsZeroCopyInputStream stream(jsonBuf); + json2pb::ZeroCopyStreamReader reader(&stream); + req.Clear(); + ASSERT_TRUE(json2pb::JsonToProtoMessage(&reader, &req, copt, &err, &offset)); + ASSERT_STREQ("tom", req.name().c_str()); + ASSERT_EQ(323, req.id()); + ASSERT_EQ(3.2, req.datadouble()); + + req.Clear(); + ASSERT_FALSE(json2pb::JsonToProtoMessage(&reader, &req, copt, &err, &offset)); + ASSERT_STREQ("Invalid json: Invalid value. [Person]", err.c_str()); + ASSERT_EQ(47ul, offset); +} + +} // namespace --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@brpc.apache.org For additional commands, e-mail: dev-h...@brpc.apache.org