This is an automated email from the ASF dual-hosted git repository.
philo-he pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new c3165c1c4c [VL] Fix json_tuple rewrite producing incompatible JSON
path in fallback scenarios (#12038)
c3165c1c4c is described below
commit c3165c1c4c991a49388ce1d6c5076b66c20213a7
Author: Zouxxyy <[email protected]>
AuthorDate: Sat May 9 12:09:40 2026 +0800
[VL] Fix json_tuple rewrite producing incompatible JSON path in fallback
scenarios (#12038)
---
.../gluten/execution/GenerateExecTransformer.scala | 6 +-
.../execution/JsonTuplePathRewriteSuite.scala | 73 ++++++++++++++++++++++
2 files changed, 76 insertions(+), 3 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/execution/GenerateExecTransformer.scala
b/backends-velox/src/main/scala/org/apache/gluten/execution/GenerateExecTransformer.scala
index f69bd25294..267ff3897e 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/execution/GenerateExecTransformer.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/execution/GenerateExecTransformer.scala
@@ -232,16 +232,16 @@ object PullOutGenerateProjectHelper extends
PullOutProjectHelper {
case jsonPath if jsonPath.foldable =>
Option(jsonPath.eval()) match {
case Some(path) =>
- GetJsonObject(jsonObj, Literal.create("$[" + path + "]"))
+ GetJsonObject(jsonObj, Literal.create("$['" + path + "']"))
case _ =>
Literal.create(null)
}
case jsonPath =>
// Build bracket notation uniformly to
- // allow dot-containing field names in JSON paths, e.g., $[a.b]
+ // allow dot-containing field names in JSON paths, e.g.,
$['a.b']
GetJsonObject(
jsonObj,
- Concat(Seq(Literal.create("$["), jsonPath,
Literal.create("]"))))
+ Concat(Seq(Literal.create("$['"), jsonPath,
Literal.create("']"))))
}.toIndexedSeq
}
val preGenerateExprs =
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/JsonTuplePathRewriteSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/JsonTuplePathRewriteSuite.scala
new file mode 100644
index 0000000000..fa9b4ff889
--- /dev/null
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/JsonTuplePathRewriteSuite.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.execution
+
+import org.apache.spark.sql.Row
+
+class JsonTuplePathRewriteSuite extends VeloxWholeStageTransformerSuite {
+ protected val rootPath: String = getClass.getResource("/").getPath
+ override protected val resourcePath: String = "/tpch-data-parquet"
+ override protected val fileFormat: String = "parquet"
+
+ import testImplicits._
+
+ test("Test json_tuple with get_json_object fallback") {
+ withTempView("t") {
+ Seq[(String)](
+ "{\"k\":\"v\",\"a.b\":\"dot_value\",\"x\":\"1\",\"y\":\"2\"}",
+ "{\"k\":\"v2\",\"a.b\":\"dot_value2\",\"x\":\"3\",\"y\":\"4\"}",
+ null
+ ).toDF("json_field")
+ .createOrReplaceTempView("t")
+ withSQLConf("spark.gluten.expression.blacklist" -> "get_json_object") {
+ // Basic single key extraction
+ checkAnswer(
+ sql("SELECT fk from t lateral view json_tuple(json_field, 'k') as
fk"),
+ Seq(Row("v"), Row("v2"), Row(null))
+ )
+
+ // Key containing dot (core scenario for bracket notation)
+ checkAnswer(
+ sql("SELECT fk from t lateral view json_tuple(json_field, 'a.b') as
fk"),
+ Seq(Row("dot_value"), Row("dot_value2"), Row(null))
+ )
+
+ // Multiple keys extraction
+ checkAnswer(
+ sql(
+ "SELECT fx, fy from t lateral view json_tuple(json_field, 'x',
'y') as fx, fy"),
+ Seq(Row("1", "2"), Row("3", "4"), Row(null, null))
+ )
+
+ // Non-existent key returns null
+ checkAnswer(
+ sql(
+ "SELECT fk from t lateral view json_tuple(json_field,
'nonexistent') as fk"),
+ Seq(Row(null), Row(null), Row(null))
+ )
+
+ // Mix of existing and non-existing keys
+ checkAnswer(
+ sql(
+ """SELECT fk, fm from t
+ |lateral view json_tuple(json_field, 'k', 'missing') as fk,
fm""".stripMargin),
+ Seq(Row("v", null), Row("v2", null), Row(null, null))
+ )
+ }
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]