[ https://issues.apache.org/jira/browse/SPARK-21281?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16072623#comment-16072623 ]
Apache Spark commented on SPARK-21281: -------------------------------------- User 'maropu' has created a pull request for this issue: https://github.com/apache/spark/pull/18516 > cannot create empty typed array column > -------------------------------------- > > Key: SPARK-21281 > URL: https://issues.apache.org/jira/browse/SPARK-21281 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.1.1 > Reporter: Saif Addin > Priority: Minor > > Hi all > I am running this piece of code > {code:java} > val data = spark.read.parquet("somedata.parquet") > data.withColumn("my_new_column", array().cast("array<string>")).show > {code} > and it works fine > {code:java} > +------+---------+--------------------+---+ > |itemid|sentiment| text|my_new_column| > +------+---------+--------------------+---+ > | 1| 0| ...| []| > | 2| 0| ...| []| > | 3| 1| omg...| []| > | 4| 0| .. Omga...| []| > {code} > but when I do > {code:java} > val data = spark.read.parquet("somedata.parquet") > import org.apache.spark.sql.types._ > data.withColumn("my_new_column", array().cast("array<int>").show > {code} > I get: > {code:java} > scala.MatchError: NullType (of class org.apache.spark.sql.types.NullType$) > at org.apache.spark.sql.catalyst.expressions.Cast.castToInt(Cast.scala:264) > at > org.apache.spark.sql.catalyst.expressions.Cast.org$apache$spark$sql$catalyst$expressions$Cast$$cast(Cast.scala:433) > at org.apache.spark.sql.catalyst.expressions.Cast.castArray(Cast.scala:380) > at > org.apache.spark.sql.catalyst.expressions.Cast.org$apache$spark$sql$catalyst$expressions$Cast$$cast(Cast.scala:437) > at > org.apache.spark.sql.catalyst.expressions.Cast.cast$lzycompute(Cast.scala:447) > at org.apache.spark.sql.catalyst.expressions.Cast.cast(Cast.scala:447) > at > org.apache.spark.sql.catalyst.expressions.Cast.nullSafeEval(Cast.scala:449) > at > org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:325) > at > org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1$$anonfun$applyOrElse$1.applyOrElse(expressions.scala:50) > at > org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1$$anonfun$applyOrElse$1.applyOrElse(expressions.scala:43) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:288) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:288) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:287) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:293) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:293) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:331) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:293) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionDown$1(QueryPlan.scala:248) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:258) > at > org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$1.apply(QueryPlan.scala:262) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:262) > at > org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$6.apply(QueryPlan.scala:267) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsDown(QueryPlan.scala:267) > at > org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:43) > at > org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1.applyOrElse(expressions.scala:42) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:288) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:288) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:287) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:293) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:293) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:331) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:293) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:293) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:293) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:331) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:329) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:293) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:277) > at > org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:42) > at > org.apache.spark.sql.catalyst.optimizer.ConstantFolding$.apply(expressions.scala:41) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82) > at > scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57) > at > scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66) > at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74) > at scala.collection.immutable.List.foreach(List.scala:381) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:73) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:73) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:79) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:75) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:84) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:84) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2791) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2112) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2327) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:248) > at org.apache.spark.sql.Dataset.show(Dataset.scala:636) > at org.apache.spark.sql.Dataset.show(Dataset.scala:595) > at org.apache.spark.sql.Dataset.show(Dataset.scala:604) > ... 50 elided > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org