(spark) branch master updated: [SPARK-53470][SQL] ExtractValue expressions should always do type checking

wenchen Sun, 07 Sep 2025 07:01:16 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new c7d8c2b8695a [SPARK-53470][SQL] ExtractValue expressions should always 
do type checking
c7d8c2b8695a is described below

commit c7d8c2b8695abecca4a990dc2e3952d6d9a0fd2d
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Thu Sep 4 10:06:04 2025 +0800

    [SPARK-53470][SQL] ExtractValue expressions should always do type checking
    
    ### What changes were proposed in this pull request?
    
    We skip type checking for these `ExtractValue` expressions because we 
assume they are always created safely by `ExtractValue.apply`. However, plan 
transformations may change the attribute data type and break these expressions. 
This PR adds type checking for these expressions, so that the plan change 
vailidation can capture the problemaitc rule earlier, instead of triggering 
Java cast exception at a late point.
    
    ### Why are the changes needed?
    
    better error reporting
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    new test
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    no
    
    Closes #52216 from cloud-fan/check.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../expressions/collectionOperations.scala         | 26 ++++++----
 .../expressions/complexTypeExtractors.scala        | 58 ++++++++--------------
 .../OptimizerStructuralIntegrityCheckerSuite.scala | 50 ++++++++++++++++++-
 .../sql-tests/analyzer-results/array.sql.out       | 34 ++++++-------
 .../analyzer-results/nonansi/array.sql.out         | 34 ++++++-------
 .../test/resources/sql-tests/results/array.sql.out | 32 ++++++------
 .../sql-tests/results/nonansi/array.sql.out        | 32 ++++++------
 7 files changed, 152 insertions(+), 114 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 59196a959f75..5fe3b10bd04c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -2561,20 +2561,26 @@ case class ArrayPosition(left: Expression, right: 
Expression)
   """,
   since = "3.4.0",
   group = "array_funcs")
-case class Get(
-    left: Expression,
-    right: Expression,
-    replacement: Expression) extends RuntimeReplaceable with 
InheritAnalysisRules {
+case class Get(left: Expression, right: Expression)
+  extends BinaryExpression with RuntimeReplaceable with ImplicitCastInputTypes 
{
 
-  def this(left: Expression, right: Expression) =
-    this(left, right, GetArrayItem(left, right, failOnError = false))
+  override def inputTypes: Seq[AbstractDataType] = left.dataType match {
+    case _: ArrayType => Seq(ArrayType, IntegerType)
+    // Do not apply implicit cast if the first arguement is not array type.
+    case _ => Nil
+  }
 
-  override def prettyName: String = "get"
+  override def checkInputDataTypes(): TypeCheckResult = {
+    ExpectsInputTypes.checkInputDataTypes(Seq(left, right), Seq(ArrayType, 
IntegerType))
+  }
 
-  override def parameters: Seq[Expression] = Seq(left, right)
+  override lazy val replacement: Expression = GetArrayItem(left, right, 
failOnError = false)
 
-  override protected def withNewChildInternal(newChild: Expression): 
Expression =
-    this.copy(replacement = newChild)
+  override def prettyName: String = "get"
+
+  override def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression = {
+    copy(left = newLeft, right = newRight)
+  }
 }
 
 /**
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 5cdbdf3f0e7c..8e612fe95c99 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.QueryContext
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{EXTRACT_VALUE, 
TreePattern}
 import org.apache.spark.sql.catalyst.util.{quoteIdentifier, ArrayData, 
GenericArrayData, MapData, TypeUtils}
@@ -146,7 +145,9 @@ trait ExtractValue extends Expression with QueryErrorsBase {
  * For example, when get field `yEAr` from `<year: int, month: int>`, we 
should pass in `yEAr`.
  */
 case class GetStructField(child: Expression, ordinal: Int, name: 
Option[String] = None)
-  extends UnaryExpression with ExtractValue {
+  extends UnaryExpression with ExtractValue with ExpectsInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StructType, 
IntegralType)
 
   lazy val childSchema = child.dataType.asInstanceOf[StructType]
 
@@ -207,6 +208,13 @@ case class GetArrayStructFields(
     numFields: Int,
     containsNull: Boolean) extends UnaryExpression with ExtractValue {
 
+  override def checkInputDataTypes(): TypeCheckResult = child.dataType match {
+    case ArrayType(_: StructType, _) => TypeCheckResult.TypeCheckSuccess
+    // This should never happen, unless we hit a bug.
+    case other => TypeCheckResult.TypeCheckFailure(
+      "GetArrayStructFields.child must be array of struct type, but got " + 
other)
+  }
+
   override def dataType: DataType = ArrayType(field.dataType, containsNull)
   override def toString: String = s"$child.${field.name}"
   override def sql: String = s"${child.sql}.${quoteIdentifier(field.name)}"
@@ -285,8 +293,7 @@ case class GetArrayItem(
   with ExtractValue
   with SupportQueryContext {
 
-  // We have done type checking for child in `ExtractValue`, so only need to 
check the `ordinal`.
-  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, 
IntegralType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, IntegralType)
 
   override def toString: String = s"$child[$ordinal]"
   override def sql: String = s"${child.sql}[${ordinal.sql}]"
@@ -355,30 +362,6 @@ case class GetArrayItem(
     })
   }
 
-  override def checkInputDataTypes(): TypeCheckResult = {
-    (left.dataType, right.dataType) match {
-      case (_: ArrayType, e2) if !e2.isInstanceOf[IntegralType] =>
-        DataTypeMismatch(
-          errorSubClass = "UNEXPECTED_INPUT_TYPE",
-          messageParameters = Map(
-            "paramIndex" -> ordinalNumber(1),
-            "requiredType" -> toSQLType(IntegralType),
-            "inputSql" -> toSQLExpr(right),
-            "inputType" -> toSQLType(right.dataType))
-        )
-      case (e1, _) if !e1.isInstanceOf[ArrayType] =>
-        DataTypeMismatch(
-          errorSubClass = "UNEXPECTED_INPUT_TYPE",
-          messageParameters = Map(
-            "paramIndex" -> ordinalNumber(0),
-            "requiredType" -> toSQLType(TypeCollection(ArrayType)),
-            "inputSql" -> toSQLExpr(left),
-            "inputType" -> toSQLType(left.dataType))
-        )
-      case _ => TypeCheckResult.TypeCheckSuccess
-    }
-  }
-
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): GetArrayItem =
     copy(child = newLeft, ordinal = newRight)
@@ -507,16 +490,19 @@ case class GetMapValue(child: Expression, key: Expression)
 
   private[catalyst] def keyType = child.dataType.asInstanceOf[MapType].keyType
 
-  override def checkInputDataTypes(): TypeCheckResult = {
-    super.checkInputDataTypes() match {
-      case f if f.isFailure => f
-      case TypeCheckResult.TypeCheckSuccess =>
-        TypeUtils.checkForOrderingExpr(keyType, prettyName)
-    }
+  override def checkInputDataTypes(): TypeCheckResult = child.dataType match {
+    case _: MapType =>
+      super.checkInputDataTypes() match {
+        case f if f.isFailure => f
+        case TypeCheckResult.TypeCheckSuccess =>
+          TypeUtils.checkForOrderingExpr(keyType, prettyName)
+      }
+    // This should never happen, unless we hit a bug.
+    case other => TypeCheckResult.TypeCheckFailure(
+      "GetMapValue.child must be map type, but got " + other)
   }
 
-  // We have done type checking for child in `ExtractValue`, so only need to 
check the `key`.
-  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, keyType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(MapType, keyType)
 
   override def toString: String = s"$child[$key]"
   override def sql: String = s"${child.sql}[${key.sql}]"
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
index 94ed80916eed..09eb1f586421 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
@@ -22,11 +22,13 @@ import 
org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2Sess
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, 
NamedExpression}
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalRelation, 
LogicalPlan, OneRowRelation, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, 
LocalRelation, LogicalPlan, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{BooleanType, StringType, StructType}
 
 
 class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
@@ -39,6 +41,18 @@ class OptimizerStructuralIntegrityCheckerSuite extends 
PlanTest {
       case agg @ Aggregate(Nil, aggregateExpressions, child, _) =>
         // Project cannot host AggregateExpression
         Project(aggregateExpressions, child)
+      case Filter(cond, child) =>
+        val newCond = cond.transform {
+          case g @ GetStructField(a: AttributeReference, _, _) =>
+            g.copy(child = a.withDataType(StringType))
+          case g @ GetArrayStructFields(a: AttributeReference, _, _, _, _) =>
+            g.copy(child = a.withDataType(StringType))
+          case g @ GetArrayItem(a: AttributeReference, _, _) =>
+            g.copy(child = a.withDataType(StringType))
+          case g @ GetMapValue(a: AttributeReference, _) =>
+            g.copy(child = a.withDataType(StringType))
+        }
+        Filter(newCond, child)
     }
   }
 
@@ -79,6 +93,38 @@ class OptimizerStructuralIntegrityCheckerSuite extends 
PlanTest {
     SimpleTestOptimizer.execute(analyzed)
   }
 
+  test("check for invalid plan after execution of rule - bad ExtractValue") {
+    val input = LocalRelation(
+      $"c1".struct(new StructType().add("f1", "boolean")),
+      $"c2".array(new StructType().add("f1", "boolean")),
+      $"c3".array(BooleanType),
+      new DslAttr($"c4").map(StringType, BooleanType)
+    )
+
+    def assertCheckFailed(expr: Expression): Unit = {
+      val analyzed = Filter(expr, input).analyze
+      assert(analyzed.resolved)
+      // Should fail verification with the OptimizeRuleBreakSI rule
+      val message = intercept[SparkException] {
+        Optimize.execute(analyzed)
+      }.getMessage
+      val ruleName = OptimizeRuleBreakSI.ruleName
+      assert(message.contains(s"Rule $ruleName in batch OptimizeRuleBreakSI"))
+      assert(message.contains("generated an invalid plan"))
+    }
+
+    // This resolution validation should be included in the lightweight
+    // validator so that it's validated in production.
+    withSQLConf(
+      SQLConf.PLAN_CHANGE_VALIDATION.key -> "false",
+      SQLConf.LIGHTWEIGHT_PLAN_CHANGE_VALIDATION.key -> "true") {
+      assertCheckFailed($"c1.f1")
+      assertCheckFailed($"c2.f1".getItem(0))
+      assertCheckFailed($"c3".getItem(0))
+      assertCheckFailed($"c4".getItem("key"))
+    }
+  }
+
   test("check for invalid plan before execution of any rule") {
     val analyzed =
       Aggregate(Nil, Seq[NamedExpression](max($"id") as "m"),
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
index 6d6868f700e8..af5b4f9b129e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
@@ -416,7 +416,7 @@ Project [get(array(1, 2, 3), 3) AS get(array(1, 2, 3), 3)#x]
 -- !query
 select get(array(1, 2, 3), null)
 -- !query analysis
-Project [get(array(1, 2, 3), null) AS get(array(1, 2, 3), NULL)#x]
+Project [get(array(1, 2, 3), cast(null as int)) AS get(array(1, 2, 3), NULL)#x]
 +- OneRowRelation
 
 
@@ -438,8 +438,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"INT\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -462,8 +462,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"INT\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -486,8 +486,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -510,8 +510,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -534,8 +534,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -558,8 +558,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -582,8 +582,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[NULL]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, NULL)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -606,8 +606,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"CAST(NULL AS STRING)\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"CAST(NULL AS STRING)[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(CAST(NULL AS STRING), 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/array.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/array.sql.out
index 09b4e93d86ec..c60e2c3737b4 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/array.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/array.sql.out
@@ -416,7 +416,7 @@ Project [get(array(1, 2, 3), 3) AS get(array(1, 2, 3), 3)#x]
 -- !query
 select get(array(1, 2, 3), null)
 -- !query analysis
-Project [get(array(1, 2, 3), null) AS get(array(1, 2, 3), NULL)#x]
+Project [get(array(1, 2, 3), cast(null as int)) AS get(array(1, 2, 3), NULL)#x]
 +- OneRowRelation
 
 
@@ -438,8 +438,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"INT\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -462,8 +462,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"INT\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -486,8 +486,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -510,8 +510,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -534,8 +534,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -558,8 +558,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -582,8 +582,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[NULL]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, NULL)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -606,8 +606,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"CAST(NULL AS STRING)\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"CAST(NULL AS STRING)[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(CAST(NULL AS STRING), 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out 
b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 94994ed29800..90a605734c1a 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -538,8 +538,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"INT\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -564,8 +564,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"INT\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -590,8 +590,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -616,8 +616,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -642,8 +642,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -668,8 +668,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -694,8 +694,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[NULL]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, NULL)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -720,8 +720,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"CAST(NULL AS STRING)\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"CAST(NULL AS STRING)[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(CAST(NULL AS STRING), 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git 
a/sql/core/src/test/resources/sql-tests/results/nonansi/array.sql.out 
b/sql/core/src/test/resources/sql-tests/results/nonansi/array.sql.out
index f653714eabf7..460cb89113ab 100644
--- a/sql/core/src/test/resources/sql-tests/results/nonansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/array.sql.out
@@ -426,8 +426,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"INT\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -452,8 +452,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"INT\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -478,8 +478,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -504,8 +504,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"1\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"1[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(1, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -530,8 +530,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -556,8 +556,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[-1]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, -1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -582,8 +582,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"NULL\"",
     "inputType" : "\"VOID\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"NULL[NULL]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(NULL, NULL)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -608,8 +608,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputSql" : "\"CAST(NULL AS STRING)\"",
     "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "(\"ARRAY\")",
-    "sqlExpr" : "\"CAST(NULL AS STRING)[0]\""
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"get(CAST(NULL AS STRING), 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-53470][SQL] ExtractValue expressions should always do type checking

Reply via email to