Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/22920#discussion_r230546330 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala --- @@ -158,26 +166,78 @@ object JSONBenchmark extends SqlBasedBenchmark { val ds = spark.read.schema(schema).json(path.getAbsolutePath) - benchmark.addCase(s"Select $colsNum columns + count()", 3) { _ => + benchmark.addCase(s"Select $colsNum columns + count()", numIters) { _ => ds.select("*").filter((_: Row) => true).count() } - benchmark.addCase(s"Select 1 column + count()", 3) { _ => + benchmark.addCase(s"Select 1 column + count()", numIters) { _ => ds.select($"col1").filter((_: Row) => true).count() } - benchmark.addCase(s"count()", 3) { _ => + benchmark.addCase(s"count()", numIters) { _ => ds.count() } benchmark.run() } } + def jsonParserCreation(rowsNum: Int, numIters: Int): Unit = { + val benchmark = new Benchmark("creation of JSON parser per line", rowsNum, output = output) + + withTempPath { path => + prepareDataInfo(benchmark) + + val shortColumnPath = path.getAbsolutePath + "/short" + val shortSchema = writeShortColumn(shortColumnPath, rowsNum) + + val wideColumnPath = path.getAbsolutePath + "/wide" + val wideSchema = writeWideColumn(wideColumnPath, rowsNum) + + benchmark.addCase("Short column without encoding", numIters) { _ => + spark.read + .schema(shortSchema) + .json(shortColumnPath) + .filter((_: Row) => true) + .count() + } + + benchmark.addCase("Short column with UTF-8", numIters) { _ => + spark.read + .option("encoding", "UTF-8") + .schema(shortSchema) + .json(shortColumnPath) + .filter((_: Row) => true) + .count() + } + + benchmark.addCase("Wide column without encoding", numIters) { _ => + spark.read + .schema(wideSchema) + .json(wideColumnPath) + .filter((_: Row) => true) + .count() + } + + benchmark.addCase("Wide column with UTF-8", numIters) { _ => + spark.read + .option("encoding", "UTF-8") + .schema(wideSchema) + .json(wideColumnPath) + .filter((_: Row) => true) + .count() + } + + benchmark.run() + } + } + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + val numIters = 2 --- End diff -- Thank you for updating, @MaxGekk . Do we have a reason to decrease this value from 3 to 2 in this PR? If this is to reduce the time, let's keep the original value. This benchmark is not executed frequently.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org