[ 
https://issues.apache.org/jira/browse/SPARK-3855?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Patrick Wendell resolved SPARK-3855.
------------------------------------
       Resolution: Fixed
    Fix Version/s: 1.2.0

> Binding Exception when running PythonUDFs
> -----------------------------------------
>
>                 Key: SPARK-3855
>                 URL: https://issues.apache.org/jira/browse/SPARK-3855
>             Project: Spark
>          Issue Type: Bug
>          Components: PySpark, SQL
>    Affects Versions: 1.1.0
>            Reporter: Michael Armbrust
>            Assignee: Michael Armbrust
>             Fix For: 1.2.0
>
>
> {code}
> from pyspark import *
> from pyspark.sql import *
> sc = SparkContext()
> sqlContext = SQLContext(sc)
> sqlContext.registerFunction("strlen", lambda string: len(string))
> sqlContext.inferSchema(sc.parallelize([Row(a="test")])).registerTempTable("test")
> srdd = sqlContext.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1")
> print srdd._jschema_rdd.baseSchemaRDD().queryExecution().toString()
> print srdd.collect()
> {code}
> output:
> {code}
> == Parsed Logical Plan ==
> Project ['strlen('a) AS c0#1]
>  Filter ('strlen('a) > 1)
>   UnresolvedRelation None, test, None
> == Analyzed Logical Plan ==
> Project [c0#1]
>  Project [pythonUDF#2 AS c0#1]
>   EvaluatePython PythonUDF#strlen(a#0)
>    Project [a#0]
>     Filter (CAST(pythonUDF#3, DoubleType) > CAST(1, DoubleType))
>      EvaluatePython PythonUDF#strlen(a#0)
>       SparkLogicalPlan (ExistingRdd [a#0], MapPartitionsRDD[7] at 
> mapPartitions at SQLContext.scala:525)
> == Optimized Logical Plan ==
> Project [pythonUDF#2 AS c0#1]
>  EvaluatePython PythonUDF#strlen(a#0)
>   Project [a#0]
>    Filter (CAST(pythonUDF#3, DoubleType) > 1.0)
>     EvaluatePython PythonUDF#strlen(a#0)
>      SparkLogicalPlan (ExistingRdd [a#0], MapPartitionsRDD[7] at 
> mapPartitions at SQLContext.scala:525)
> == Physical Plan ==
> Project [pythonUDF#2 AS c0#1]
>  BatchPythonEvaluation PythonUDF#strlen(a#0), [a#0,pythonUDF#5]
>   Project [a#0]
>    Filter (CAST(pythonUDF#3, DoubleType) > 1.0)
>     BatchPythonEvaluation PythonUDF#strlen(a#0), [a#0,pythonUDF#3]
>      ExistingRdd [a#0], MapPartitionsRDD[7] at mapPartitions at 
> SQLContext.scala:525
> Code Generation: false
> == RDD ==
> 14/10/08 15:03:00 ERROR Executor: Exception in task 1.0 in stage 4.0 (TID 9)
> org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Binding 
> attribute, tree: pythonUDF#2
>       at 
> org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
>       at 
> org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:47)
>       at 
> org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:46)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:144)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:162)
>       at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>       at scala.collection.Iterator$class.foreach(Iterator.scala:727)
>       at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>       at 
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
>       at 
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
>       at 
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
>       at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
>       at scala.collection.AbstractIterator.to(Iterator.scala:1157)
>       at 
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
>       at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
>       at 
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
>       at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:191)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:147)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:135)
>       at 
> org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReference(BoundAttribute.scala:46)
>       at 
> org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection$$anonfun$$init$$2.apply(Projection.scala:52)
>       at 
> org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection$$anonfun$$init$$2.apply(Projection.scala:52)
>       at 
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>       at 
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>       at scala.collection.immutable.List.foreach(List.scala:318)
>       at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
>       at scala.collection.AbstractTraversable.map(Traversable.scala:105)
>       at 
> org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.<init>(Projection.scala:52)
>       at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$newMutableProjection$1.apply(SparkPlan.scala:106)
>       at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$newMutableProjection$1.apply(SparkPlan.scala:106)
>       at 
> org.apache.spark.sql.execution.Project$$anonfun$1.apply(basicOperators.scala:43)
>       at 
> org.apache.spark.sql.execution.Project$$anonfun$1.apply(basicOperators.scala:42)
>       at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:599)
>       at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:599)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>       at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
>       at org.apache.spark.scheduler.Task.run(Task.scala:56)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:182)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Couldn't find pythonUDF#2 in 
> [a#0,pythonUDF#5]
>       at scala.sys.package$.error(package.scala:27)
>       at 
> org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:53)
>       at 
> org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:47)
>       at 
> org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
>       ... 46 more
> 14/10/08 15:03:00 ERROR Executor: Exception in task 0.0 in stage 4.0 (TID 8)
> org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Binding 
> attribute, tree: pythonUDF#2
>       at 
> org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
>       at 
> org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:47)
>       at 
> org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:46)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:144)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:162)
>       at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>       at scala.collection.Iterator$class.foreach(Iterator.scala:727)
>       at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>       at 
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
>       at 
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
>       at 
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
>       at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
>       at scala.collection.AbstractIterator.to(Iterator.scala:1157)
>       at 
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
>       at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
>       at 
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
>       at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:191)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:147)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:135)
>       at 
> org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReference(BoundAttribute.scala:46)
>       at 
> org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection$$anonfun$$init$$2.apply(Projection.scala:52)
>       at 
> org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection$$anonfun$$init$$2.apply(Projection.scala:52)
>       at 
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>       at 
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>       at scala.collection.immutable.List.foreach(List.scala:318)
>       at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
>       at scala.collection.AbstractTraversable.map(Traversable.scala:105)
>       at 
> org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.<init>(Projection.scala:52)
>       at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$newMutableProjection$1.apply(SparkPlan.scala:106)
>       at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$newMutableProjection$1.apply(SparkPlan.scala:106)
>       at 
> org.apache.spark.sql.execution.Project$$anonfun$1.apply(basicOperators.scala:43)
>       at 
> org.apache.spark.sql.execution.Project$$anonfun$1.apply(basicOperators.scala:42)
>       at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:599)
>       at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:599)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>       at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>       at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
>       at org.apache.spark.scheduler.Task.run(Task.scala:56)
>       at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:182)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>       at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.RuntimeException: Couldn't find pythonUDF#2 in 
> [a#0,pythonUDF#5]
>       at scala.sys.package$.error(package.scala:27)
>       at 
> org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:53)
>       at 
> org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:47)
>       at 
> org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
>       ... 46 more
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to