This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c2525308330 [SPARK-42881][SQL] Codegen Support for get_json_object c2525308330 is described below commit c252530833097759b1f943ff89b05f22025f0dd0 Author: panbingkun <pbk1...@gmail.com> AuthorDate: Wed Oct 11 17:42:48 2023 +0300 [SPARK-42881][SQL] Codegen Support for get_json_object ### What changes were proposed in this pull request? The PR adds Codegen Support for get_json_object. ### Why are the changes needed? Improve codegen coverage and performance. Github benchmark data(https://github.com/panbingkun/spark/actions/runs/4497396473/jobs/7912952710): <img width="879" alt="image" src="https://user-images.githubusercontent.com/15246973/227117793-bab38c42-dcc1-46de-a689-25a87b8f3561.png"> Local benchmark data: <img width="895" alt="image" src="https://user-images.githubusercontent.com/15246973/227098745-9b360e60-fe84-4419-8b7d-073a0530816a.png"> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add new UT. Pass GA. Closes #40506 from panbingkun/json_code_gen. Authored-by: panbingkun <pbk1...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../sql/catalyst/expressions/jsonExpressions.scala | 121 +++++++++++++++++--- sql/core/benchmarks/JsonBenchmark-results.txt | 127 +++++++++++---------- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 28 +++++ .../execution/datasources/json/JsonBenchmark.scala | 15 ++- 4 files changed, 208 insertions(+), 83 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index e7df542ddab..04bc457b66a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -28,7 +28,8 @@ import com.fasterxml.jackson.core.json.JsonReadFeature import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch -import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, CodegenFallback, ExprCode} +import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper import org.apache.spark.sql.catalyst.json._ import org.apache.spark.sql.catalyst.trees.TreePattern.{JSON_TO_STRUCT, TreePattern} import org.apache.spark.sql.catalyst.util._ @@ -125,13 +126,7 @@ private[this] object SharedFactory { group = "json_funcs", since = "1.5.0") case class GetJsonObject(json: Expression, path: Expression) - extends BinaryExpression with ExpectsInputTypes with CodegenFallback { - - import com.fasterxml.jackson.core.JsonToken._ - - import PathInstruction._ - import SharedFactory._ - import WriteStyle._ + extends BinaryExpression with ExpectsInputTypes { override def left: Expression = json override def right: Expression = path @@ -140,18 +135,114 @@ case class GetJsonObject(json: Expression, path: Expression) override def nullable: Boolean = true override def prettyName: String = "get_json_object" - @transient private lazy val parsedPath = parsePath(path.eval().asInstanceOf[UTF8String]) + @transient + private lazy val evaluator = if (path.foldable) { + new GetJsonObjectEvaluator(path.eval().asInstanceOf[UTF8String]) + } else { + new GetJsonObjectEvaluator() + } override def eval(input: InternalRow): Any = { - val jsonStr = json.eval(input).asInstanceOf[UTF8String] + evaluator.setJson(json.eval(input).asInstanceOf[UTF8String]) + if (!path.foldable) { + evaluator.setPath(path.eval(input).asInstanceOf[UTF8String]) + } + evaluator.evaluate() + } + + protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val evaluatorClass = classOf[GetJsonObjectEvaluator].getName + val initEvaluator = path.foldable match { + case true if path.eval() != null => + val cachedPath = path.eval().asInstanceOf[UTF8String] + val refCachedPath = ctx.addReferenceObj("cachedPath", cachedPath) + s"new $evaluatorClass($refCachedPath)" + case _ => s"new $evaluatorClass()" + } + val evaluator = ctx.addMutableState(evaluatorClass, "evaluator", + v => s"""$v = $initEvaluator;""", forceInline = true) + + val jsonEval = json.genCode(ctx) + val pathEval = path.genCode(ctx) + + val setJson = + s""" + |if (${jsonEval.isNull}) { + | $evaluator.setJson(null); + |} else { + | $evaluator.setJson(${jsonEval.value}); + |} + |""".stripMargin + val setPath = if (!path.foldable) { + s""" + |if (${pathEval.isNull}) { + | $evaluator.setPath(null); + |} else { + | $evaluator.setPath(${pathEval.value}); + |} + |""".stripMargin + } else { + "" + } + + val resultType = CodeGenerator.boxedType(dataType) + val resultTerm = ctx.freshName("result") + ev.copy(code = + code""" + |${jsonEval.code} + |${pathEval.code} + |$setJson + |$setPath + |$resultType $resultTerm = ($resultType) $evaluator.evaluate(); + |boolean ${ev.isNull} = $resultTerm == null; + |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; + |if (!${ev.isNull}) { + | ${ev.value} = $resultTerm; + |} + |""".stripMargin + ) + } + + override protected def withNewChildrenInternal( + newLeft: Expression, newRight: Expression): GetJsonObject = + copy(json = newLeft, path = newRight) +} + +class GetJsonObjectEvaluator(cachedPath: UTF8String) { + import com.fasterxml.jackson.core.JsonToken._ + import PathInstruction._ + import SharedFactory._ + import WriteStyle._ + + def this() = this(null) + + @transient + private lazy val parsedPath: Option[List[PathInstruction]] = + parsePath(cachedPath) + + @transient + private var jsonStr: UTF8String = null + + @transient + private var pathStr: UTF8String = null + + def setJson(arg: UTF8String): Unit = { + jsonStr = arg + } + + def setPath(arg: UTF8String): Unit = { + pathStr = arg + } + + def evaluate(): Any = { if (jsonStr == null) { return null } - val parsed = if (path.foldable) { + val parsed = if (cachedPath != null) { parsedPath } else { - parsePath(path.eval(input).asInstanceOf[UTF8String]) + parsePath(pathStr) } if (parsed.isDefined) { @@ -294,7 +385,7 @@ case class GetJsonObject(json: Expression, path: Expression) g.writeRawValue(buf.toString) } else if (dirty == 1) { // remove outer array tokens - g.writeRawValue(buf.substring(1, buf.length()-1)) + g.writeRawValue(buf.substring(1, buf.length() - 1)) } // else do not write anything dirty > 0 @@ -337,10 +428,6 @@ case class GetJsonObject(json: Expression, path: Expression) false } } - - override protected def withNewChildrenInternal( - newLeft: Expression, newRight: Expression): GetJsonObject = - copy(json = newLeft, path = newRight) } // scalastyle:off line.size.limit line.contains.tab diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt index 035e0165ffd..ae4a9ae0c79 100644 --- a/sql/core/benchmarks/JsonBenchmark-results.txt +++ b/sql/core/benchmarks/JsonBenchmark-results.txt @@ -3,127 +3,128 @@ Benchmark for performance of JSON parsing ================================================================================================ Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz JSON schema inferring: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 2858 2897 62 1.7 571.7 1.0X -UTF-8 is set 4281 4291 9 1.2 856.1 0.7X +No encoding 2929 3010 86 1.7 585.9 1.0X +UTF-8 is set 4313 4344 41 1.2 862.5 0.7X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz count a short column: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 3070 3076 5 1.6 614.1 1.0X -UTF-8 is set 4641 4666 22 1.1 928.2 0.7X +No encoding 2797 2857 68 1.8 559.4 1.0X +UTF-8 is set 4262 4281 17 1.2 852.4 0.7X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz count a wide column: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 4258 4424 282 0.2 4258.4 1.0X -UTF-8 is set 6180 6194 18 0.2 6180.0 0.7X +No encoding 4265 4360 88 0.2 4265.4 1.0X +UTF-8 is set 6400 6434 29 0.2 6400.4 0.7X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz select wide row: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 12765 12772 11 0.0 255294.1 1.0X -UTF-8 is set 14144 14209 78 0.0 282874.0 0.9X +No encoding 12301 12381 113 0.0 246024.1 1.0X +UTF-8 is set 13846 13912 57 0.0 276925.6 0.9X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Select a subset of 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Select 10 columns 2352 2372 25 0.4 2352.3 1.0X -Select 1 column 1683 1705 28 0.6 1682.6 1.4X +Select 10 columns 2316 2323 7 0.4 2316.3 1.0X +Select 1 column 1702 1717 17 0.6 1702.0 1.4X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz creation of JSON parser per line: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Short column without encoding 873 890 22 1.1 873.3 1.0X -Short column with UTF-8 1169 1177 14 0.9 1168.5 0.7X -Wide column without encoding 7404 8401 1445 0.1 7404.1 0.1X -Wide column with UTF-8 9207 9222 16 0.1 9207.2 0.1X +Short column without encoding 827 850 22 1.2 827.1 1.0X +Short column with UTF-8 1111 1116 7 0.9 1111.0 0.7X +Wide column without encoding 7409 7447 50 0.1 7409.2 0.1X +Wide column with UTF-8 10580 10616 34 0.1 10580.4 0.1X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz JSON functions: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 81 93 11 12.4 80.8 1.0X -from_json 1824 1866 45 0.5 1823.7 0.0X -json_tuple 1716 1737 23 0.6 1716.2 0.0X -get_json_object 1623 1637 22 0.6 1622.6 0.0X +Text read 88 92 6 11.3 88.3 1.0X +from_json 2083 2091 7 0.5 2083.1 0.0X +json_tuple 2101 2133 42 0.5 2101.4 0.0X +get_json_object wholestage off 2032 2037 8 0.5 2032.0 0.0X +get_json_object wholestage on 1917 1926 10 0.5 1917.3 0.0X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Dataset of json strings: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 342 344 2 14.6 68.3 1.0X -schema inferring 2213 2218 5 2.3 442.6 0.2X -parsing 3734 3740 5 1.3 746.9 0.1X +Text read 351 351 0 14.3 70.1 1.0X +schema inferring 2342 2344 4 2.1 468.3 0.1X +parsing 3728 3751 26 1.3 745.6 0.1X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Json files in the per-line mode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 917 923 6 5.4 183.5 1.0X -Schema inferring 2952 2956 4 1.7 590.4 0.3X -Parsing without charset 3979 3988 10 1.3 795.8 0.2X -Parsing with UTF-8 5459 5464 6 0.9 1091.9 0.2X +Text read 876 883 9 5.7 175.2 1.0X +Schema inferring 3072 3082 14 1.6 614.4 0.3X +Parsing without charset 3870 3877 7 1.3 774.1 0.2X +Parsing with UTF-8 5287 5290 5 0.9 1057.3 0.2X -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Write dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Create a dataset of timestamps 170 171 2 5.9 169.6 1.0X -to_json(timestamp) 1033 1036 4 1.0 1032.6 0.2X -write timestamps to files 925 934 8 1.1 924.9 0.2X -Create a dataset of dates 171 177 6 5.8 171.5 1.0X -to_json(date) 741 743 5 1.4 740.7 0.2X -write dates to files 616 624 11 1.6 616.3 0.3X +Create a dataset of timestamps 193 200 10 5.2 192.5 1.0X +to_json(timestamp) 1034 1044 14 1.0 1033.6 0.2X +write timestamps to files 945 966 26 1.1 945.0 0.2X +Create a dataset of dates 200 205 6 5.0 199.8 1.0X +to_json(date) 757 763 6 1.3 757.0 0.3X +write dates to files 647 660 20 1.5 646.8 0.3X -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Read dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------------------------------- -read timestamp text from files 222 225 4 4.5 221.7 1.0X -read timestamps from files 2595 2634 46 0.4 2595.4 0.1X -infer timestamps from files 6351 6359 8 0.2 6350.7 0.0X -read date text from files 203 207 5 4.9 203.2 1.1X -read date from files 973 978 4 1.0 973.2 0.2X -timestamp strings 220 225 5 4.6 219.7 1.0X -parse timestamps from Dataset[String] 2812 2815 3 0.4 2811.5 0.1X -infer timestamps from Dataset[String] 6520 6523 4 0.2 6519.6 0.0X -date strings 294 304 9 3.4 293.6 0.8X -parse dates from Dataset[String] 1355 1359 6 0.7 1354.5 0.2X -from_json(timestamp) 3797 3800 2 0.3 3797.2 0.1X -from_json(date) 2267 2282 13 0.4 2266.8 0.1X -infer error timestamps from Dataset[String] with default format 1863 1864 1 0.5 1862.5 0.1X -infer error timestamps from Dataset[String] with user-provided format 1849 1855 6 0.5 1849.2 0.1X -infer error timestamps from Dataset[String] with legacy format 1832 1847 24 0.5 1831.7 0.1X +read timestamp text from files 227 231 4 4.4 227.3 1.0X +read timestamps from files 2670 2725 70 0.4 2670.2 0.1X +infer timestamps from files 6703 6714 17 0.1 6703.1 0.0X +read date text from files 201 205 5 5.0 200.8 1.1X +read date from files 944 951 7 1.1 944.0 0.2X +timestamp strings 219 224 6 4.6 218.9 1.0X +parse timestamps from Dataset[String] 2847 2856 8 0.4 2847.3 0.1X +infer timestamps from Dataset[String] 6725 6737 13 0.1 6724.9 0.0X +date strings 300 304 4 3.3 299.6 0.8X +parse dates from Dataset[String] 1230 1245 16 0.8 1230.5 0.2X +from_json(timestamp) 4123 4125 2 0.2 4123.0 0.1X +from_json(date) 2574 2585 9 0.4 2574.4 0.1X +infer error timestamps from Dataset[String] with default format 1871 1878 8 0.5 1870.8 0.1X +infer error timestamps from Dataset[String] with user-provided format 1869 1877 13 0.5 1868.9 0.1X +infer error timestamps from Dataset[String] with legacy format 1847 1875 43 0.5 1847.2 0.1X -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Filters pushdown: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -w/o filters 18958 18970 10 0.0 189581.8 1.0X -pushdown disabled 18640 18656 15 0.0 186401.4 1.0X -w/ filters 874 881 6 0.1 8742.7 21.7X +w/o filters 19347 19382 40 0.0 193474.6 1.0X +pushdown disabled 19320 19329 11 0.0 193196.4 1.0X +w/ filters 897 898 1 0.1 8968.3 21.6X -OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1046-azure +OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Linux 5.15.0-1047-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Partial JSON results: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -parse invalid JSON 3533 3693 239 0.0 353318.7 1.0X +parse invalid JSON 3398 3589 249 0.0 339830.8 1.0X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index b7b34129a95..51e66f40121 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.{SparkException, SparkRuntimeException} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Literal, StructsToJson} import org.apache.spark.sql.catalyst.expressions.Cast._ +import org.apache.spark.sql.execution.WholeStageCodegenExec import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -1397,4 +1398,31 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df.selectExpr("json_object_keys(a)"), expected) checkAnswer(df.select(json_object_keys($"a")), expected) } + + test("function get_json_object - Codegen Support") { + withTempView("GetJsonObjectTable") { + val data = Seq(("1", """{"f1": "value1", "f5": 5.23}""")).toDF("key", "jstring") + data.createOrReplaceTempView("GetJsonObjectTable") + val df = sql("SELECT key, get_json_object(jstring, '$.f1') FROM GetJsonObjectTable") + val plan = df.queryExecution.executedPlan + assert(plan.isInstanceOf[WholeStageCodegenExec]) + checkAnswer(df, Seq(Row("1", "value1"))) + } + } + + test("function get_json_object - path is null") { + val data = Seq(("""{"name": "alice", "age": 5}""", "")).toDF("a", "b") + val df = data.selectExpr("get_json_object(a, null)") + val plan = df.queryExecution.executedPlan + assert(plan.isInstanceOf[WholeStageCodegenExec]) + checkAnswer(df, Row(null)) + } + + test("function get_json_object - json is null") { + val data = Seq(("""{"name": "alice", "age": 5}""", "")).toDF("a", "b") + val df = data.selectExpr("get_json_object(null, '$.name')") + val plan = df.queryExecution.executedPlan + assert(plan.isInstanceOf[WholeStageCodegenExec]) + checkAnswer(df, Row(null)) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala index 5b86543648f..02ed2a16d11 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala @@ -272,9 +272,18 @@ object JsonBenchmark extends SqlBasedBenchmark { json_tuple_ds.noop() } - benchmark.addCase("get_json_object", iters) { _ => - val get_json_object_ds = in.select(get_json_object($"value", "$.a")) - get_json_object_ds.noop() + benchmark.addCase("get_json_object wholestage off", iters) { _ => + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") { + val get_json_object_ds = in.select(get_json_object($"value", "$.a")) + get_json_object_ds.noop() + } + } + + benchmark.addCase("get_json_object wholestage on", iters) { _ => + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { + val get_json_object_ds = in.select(get_json_object($"value", "$.a")) + get_json_object_ds.noop() + } } benchmark.run() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org