kazuyukitanimura commented on code in PR #451:
URL: https://github.com/apache/datafusion-comet/pull/451#discussion_r1607045862


##########
spark/src/test/scala/org/apache/comet/DataGenerator.scala:
##########
@@ -95,4 +102,55 @@ class DataGenerator(r: Random) {
       Range(0, n).map(_ => r.nextLong())
   }
 
+  // Generate a random row according to the schema, the string filed in the 
struct could be
+  // configured to generate strings by passing a stringGen function. Other 
types are delegated
+  // to Spark's RandomDataGenerator.
+  def generateRow(schema: StructType, stringGen: Option[() => String] = None): 
Row = {
+    val fields = mutable.ArrayBuffer.empty[Any]
+    schema.fields.foreach { f =>
+      f.dataType match {
+        case ArrayType(childType, nullable) =>
+          val data = if (f.nullable && r.nextFloat() <= PROBABILITY_OF_NULL) {
+            null
+          } else {
+            val arr = mutable.ArrayBuffer.empty[Any]
+            val n = 1 // rand.nextInt(10)
+            var i = 0
+            val generator = RandomDataGenerator.forType(childType, nullable, r)
+            assert(generator.isDefined, "Unsupported type")
+            val gen = generator.get
+            while (i < n) {
+              arr += gen()
+              i += 1
+            }
+            arr.toSeq
+          }
+          fields += data
+        case StructType(children) =>
+          fields += generateRow(StructType(children))
+        case StringType if stringGen.isDefined =>
+          val gen = stringGen.get
+          val data = if (f.nullable && r.nextFloat() <= PROBABILITY_OF_NULL) {
+            null
+          } else {
+            gen()
+          }
+          fields += data
+        case _ =>
+          val generator = RandomDataGenerator.forType(f.dataType, f.nullable, 
r)
+          assert(generator.isDefined, "Unsupported type")

Review Comment:
   I think we should also check `f.nullable` and return `null` sometimes here 
as well



##########
spark/src/test/scala/org/apache/comet/DataGenerator.scala:
##########
@@ -95,4 +102,55 @@ class DataGenerator(r: Random) {
       Range(0, n).map(_ => r.nextLong())
   }
 
+  // Generate a random row according to the schema, the string filed in the 
struct could be
+  // configured to generate strings by passing a stringGen function. Other 
types are delegated
+  // to Spark's RandomDataGenerator.
+  def generateRow(schema: StructType, stringGen: Option[() => String] = None): 
Row = {
+    val fields = mutable.ArrayBuffer.empty[Any]
+    schema.fields.foreach { f =>
+      f.dataType match {
+        case ArrayType(childType, nullable) =>
+          val data = if (f.nullable && r.nextFloat() <= PROBABILITY_OF_NULL) {
+            null
+          } else {
+            val arr = mutable.ArrayBuffer.empty[Any]
+            val n = 1 // rand.nextInt(10)
+            var i = 0
+            val generator = RandomDataGenerator.forType(childType, nullable, r)
+            assert(generator.isDefined, "Unsupported type")
+            val gen = generator.get
+            while (i < n) {
+              arr += gen()
+              i += 1
+            }
+            arr.toSeq
+          }
+          fields += data
+        case StructType(children) =>
+          fields += generateRow(StructType(children))
+        case StringType if stringGen.isDefined =>
+          val gen = stringGen.get
+          val data = if (f.nullable && r.nextFloat() <= PROBABILITY_OF_NULL) {
+            null
+          } else {
+            gen()
+          }
+          fields += data
+        case _ =>
+          val generator = RandomDataGenerator.forType(f.dataType, f.nullable, 
r)
+          assert(generator.isDefined, "Unsupported type")
+          val gen = generator.get
+          fields += gen()
+      }
+    }
+    Row.fromSeq(fields.toSeq)
+  }
+
+  def generateRows(

Review Comment:
   Is the plan to call this function in other tests?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to