benibus commented on code in PR #14100:
URL: https://github.com/apache/arrow/pull/14100#discussion_r977880082


##########
cpp/src/arrow/json/parser_benchmark.cc:
##########
@@ -30,24 +30,50 @@
 namespace arrow {
 namespace json {
 
-std::shared_ptr<Schema> TestSchema() {
-  return schema({field("int", int32()), field("str", utf8())});
-}
-
-constexpr int seed = 0x432432;
-
-std::string TestJsonData(int num_rows, bool pretty = false) {
-  std::default_random_engine engine(seed);
-  std::string json;
-  for (int i = 0; i < num_rows; ++i) {
-    StringBuffer sb;
-    Writer writer(sb);
-    ABORT_NOT_OK(Generate(TestSchema(), engine, &writer));
-    json += pretty ? PrettyPrint(sb.GetString()) : sb.GetString();
-    json += "\n";
+class JSONGenerator {
+ public:
+  constexpr static int kSeed = 0x432432;
+
+  constexpr explicit JSONGenerator(bool pretty = false) : pretty_(pretty) {}
+
+  template <typename T>
+  std::string operator()(const T& input, int32_t num_rows) const {
+    std::default_random_engine engine(kSeed);
+    std::string json;
+    for (int i = 0; i < num_rows; ++i) {
+      StringBuffer sb;
+      Writer writer(sb);
+      ABORT_NOT_OK(Generate(input, engine, &writer));
+      json += pretty_ ? PrettyPrint(sb.GetString()) : sb.GetString();
+      json += "\n";
+    }
+    return json;
   }
 
-  return json;
+ private:
+  bool pretty_;
+};
+
+constexpr auto JSONString = JSONGenerator{false};
+constexpr auto JSONStringPretty = JSONGenerator{true};
+
+// Both field sets are the worst-case ordering of each other - i.e. the parser 
cannot
+// reliably predict the next field in B given the definition of A.
+FieldVector TestFields1() {
+  return {
+      field("int", int32()),
+      field("str", utf8()),
+  };
+}
+FieldVector TestFields2() {
+  return {
+      field("str", utf8()),
+      field("int", int32()),
+  };
+}

Review Comment:
   Alright, I'll add distinct benchmarks for longer schemas - in which case, 
I'll implement field randomization too.
   
   On another note, it seems worthwhile to get a control case for rapidjson on 
its own - possibly with a dummy handler. Thoughts?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to