Hi guys,
I want transform Row using NamedExpression.
Below is the code snipped that I am using :
def apply(dataFrame: DataFrame, selectExpressions:
java.util.List[String]): RDD[UnsafeRow] = {
val exprArray = selectExpressions.map(s =>
Column(SqlParser.parseExpression(s)).named
)
val inputSchema = dataFrame.logicalPlan.output
val transformedRDD = dataFrame.mapPartitions(
iter => {
val project = UnsafeProjection.create(exprArray,inputSchema)
iter.map{
row =>
project(InternalRow.fromSeq(row.toSeq))
}
})
transformedRDD
}
The problem is that expression becomes unevaluable :
Caused by: java.lang.UnsupportedOperationException: Cannot evaluate
expression: 'a
at org.apache.spark.sql.catalyst.expressions.Unevaluable$class.
genCode(Expression.scala:233)
at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.g
enCode(unresolved.scala:53)
at org.apache.spark.sql.catalyst.expressions.Expression$$anonfu
n$gen$2.apply(Expression.scala:106)
at org.apache.spark.sql.catalyst.expressions.Expression$$anonfu
n$gen$2.apply(Expression.scala:102)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.sql.catalyst.expressions.Expression.gen(Exp
ression.scala:102)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon
text$$anonfun$generateExpressions$1.apply(CodeGenerator.scala:464)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon
text$$anonfun$generateExpressions$1.apply(CodeGenerator.scala:464)
at scala.collection.TraversableLike$$anonfun$map$1.apply(
TraversableLike.scala:244)
at scala.collection.TraversableLike$$anonfun$map$1.apply(
TraversableLike.scala:244)
at scala.collection.mutable.ResizableArray$class.foreach(Resiza
bleArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.
scala:47)
at scala.collection.TraversableLike$class.map(TraversableLike.
scala:244)
at scala.collection.AbstractTraversable.map(Traversable.scala:105)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon
text.generateExpressions(CodeGenerator.scala:464)
at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
safeProjection$.createCode(GenerateUnsafeProjection.scala:281)
at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
safeProjection$.create(GenerateUnsafeProjection.scala:324)
at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
safeProjection$.create(GenerateUnsafeProjection.scala:317)
at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
safeProjection$.create(GenerateUnsafeProjection.scala:32)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenera
tor.generate(CodeGenerator.scala:635)
at org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.
create(Projection.scala:125)
at org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.
create(Projection.scala:135)
at org.apache.spark.sql.ScalaTransform$$anonfun$3.apply(
ScalaTransform.scala:31)
at org.apache.spark.sql.ScalaTransform$$anonfun$3.apply(
ScalaTransform.scala:30)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$
apply$20.apply(RDD.scala:710)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$
apply$20.apply(RDD.scala:710)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsR
DD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.sca
la:66)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.
scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPool
Executor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoo
lExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
This might be because the Expression is unresolved.
Any help would be appreciated.
Thanks and Regards,
Aviral Agarwal