[ https://issues.apache.org/jira/browse/SPARK-16086?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Davies Liu resolved SPARK-16086. -------------------------------- Resolution: Fixed Fix Version/s: 1.6.2 1.5.3 2.0.0 Issue resolved by pull request 13793 [https://github.com/apache/spark/pull/13793] > Python UDF failed when there is no arguments > -------------------------------------------- > > Key: SPARK-16086 > URL: https://issues.apache.org/jira/browse/SPARK-16086 > Project: Spark > Issue Type: Bug > Components: PySpark, SQL > Affects Versions: 1.5.2, 1.6.1 > Reporter: Davies Liu > Assignee: Davies Liu > Fix For: 2.0.0, 1.5.3, 1.6.2 > > > {code} > >>> sqlContext.registerFunction("f", lambda : "a") > >>> sqlContext.sql("select f()").show() > {code} > {code} > org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in > stage 171.0 failed 4 times, most recent failure: Lost task 0.3 in stage 171.0 > (TID 6226, ip-10-0-243-36.us-west-2.compute.internal): > org.apache.spark.api.python.PythonException: Traceback (most recent call > last): > File "/databricks/spark/python/pyspark/worker.py", line 111, in main > process() > File "/databricks/spark/python/pyspark/worker.py", line 106, in process > serializer.dump_stream(func(split_index, iterator), outfile) > File "/databricks/spark/python/pyspark/serializers.py", line 263, in > dump_stream > vs = list(itertools.islice(iterator, batch)) > File "/databricks/spark/python/pyspark/serializers.py", line 139, in > load_stream > yield self._read_with_length(stream) > File "/databricks/spark/python/pyspark/serializers.py", line 164, in > _read_with_length > return self.loads(obj) > File "/databricks/spark/python/pyspark/serializers.py", line 422, in loads > return pickle.loads(obj) > File "/databricks/spark/python/pyspark/sql/types.py", line 1159, in <lambda> > return lambda *a: dataType.fromInternal(a) > File "/databricks/spark/python/pyspark/sql/types.py", line 568, in > fromInternal > return _create_row(self.names, values) > File "/databricks/spark/python/pyspark/sql/types.py", line 1163, in > _create_row > row = Row(*values) > File "/databricks/spark/python/pyspark/sql/types.py", line 1210, in __new__ > raise ValueError("No args or kwargs") > ValueError: (ValueError('No args or kwargs',), <function <lambda> at > 0x7f3bbc463320>, ()) > at > org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:166) > at > org.apache.spark.api.python.PythonRunner$$anon$1.<init>(PythonRDD.scala:207) > at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:125) > at > org.apache.spark.sql.execution.BatchPythonEvaluation$$anonfun$doExecute$1.apply(python.scala:405) > at > org.apache.spark.sql.execution.BatchPythonEvaluation$$anonfun$doExecute$1.apply(python.scala:370) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:72) > at org.apache.spark.scheduler.Task.run(Task.scala:96) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:222) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org