[ https://issues.apache.org/jira/browse/SPARK-42655?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Apache Spark reassigned SPARK-42655: ------------------------------------ Assignee: Apache Spark > Incorrect ambiguous column reference error > ------------------------------------------ > > Key: SPARK-42655 > URL: https://issues.apache.org/jira/browse/SPARK-42655 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 3.2.0 > Reporter: Shrikant Prasad > Assignee: Apache Spark > Priority: Major > > val df1 = > sc.parallelize(List((1,2,3,4,5),(1,2,3,4,5))).toDF("id","col2","col3","col4", > "col5") > val op_cols_same_case = List("id","col2","col3","col4", "col5", "id") > val df2 = df1.select(op_cols_same_case.head, op_cols_same_case.tail: _*) > df2.select("id").show() > > This query runs fine. > > But when we change the casing of the op_cols to have mix of upper & lower > case ("id" & "ID") it throws an ambiguous col ref error: > > val df1 = > sc.parallelize(List((1,2,3,4,5),(1,2,3,4,5))).toDF("id","col2","col3","col4", > "col5") > val op_cols_mixed_case = List("id","col2","col3","col4", "col5", "ID") > val df3 = df1.select(op_cols_mixed_case.head, op_cols_mixed_case.tail: _*) > df3.select("id").show() > org.apache.spark.sql.AnalysisException: Reference 'id' is ambiguous, could > be: id, id. > at > org.apache.spark.sql.catalyst.expressions.package$AttributeSeq.resolve(package.scala:363) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveChildren(LogicalPlan.scala:112) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$resolveExpressionByPlanChildren$1(Analyzer.scala:1857) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$resolveExpression$2(Analyzer.scala:1787) > at > org.apache.spark.sql.catalyst.analysis.package$.withPosition(package.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.innerResolve$1(Analyzer.scala:1794) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.resolveExpression(Analyzer.scala:1812) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.resolveExpressionByPlanChildren(Analyzer.scala:1863) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$17.$anonfun$applyOrElse$94(Analyzer.scala:1577) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$1(QueryPlan.scala:193) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:193) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:204) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.$anonfun$mapExpressions$3(QueryPlan.scala:209) > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) > at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) > at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) > at scala.collection.TraversableLike.map(TraversableLike.scala:286) > at scala.collection.TraversableLike.map$(TraversableLike.scala:279) > at scala.collection.AbstractTraversable.map(Traversable.scala:108) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.recursiveTransform$1(QueryPlan.scala:209) > > Since, Spark is case insensitive, it should work for second case also when we > have upper and lower case column names in the column list. > It also works fine in Spark 2.3. > -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org