[ https://issues.apache.org/jira/browse/SPARK-15679?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
nalin garg updated SPARK-15679: ------------------------------- Description: {code} from pyspark import SparkContext from pyspark.sql import SQLContext, HiveContext import csv, io, StringIO from pyspark.sql.functions import * from pyspark.sql import Row from pyspark.sql import * from pyspark.sql import functions as F from pyspark.sql.functions import asc, desc sc = SparkContext("local", "Summary Report") sqlContext = SQLContext(sc) class ParallelBuild(object): def myFunc(self, s): l = s.split(',') print l[0], l[1] return l[0] def list_to_csv_str(x): output = StringIO.StringIO("") csv.writer(output).writerow(x) return output.getvalue().strip() def run(self): data = [(1,'a'), (1,'b'), (1,'c'), (2,'d'), (3,'r'), (4,'a'), (2,'t'), (3,'y'), (1,'f')] df = sqlContext.createDataFrame(data, schema= ['uid', 'address_uid']) return df if __name__ == "__main__": df_rdd = ParallelBuild().run().map(lambda line: line).persist() r = df_rdd.map(ParallelBuild().myFunc) r.count() {code} Above code returns JavaPackage not callable exception. was: {code} from pyspark import SparkContext from pyspark.sql import SQLContext, HiveContext import csv, io, StringIO from pyspark.sql.functions import * from pyspark.sql import Row from pyspark.sql import * from pyspark.sql import functions as F from pyspark.sql.functions import asc, desc sc = SparkContext("local", "Summary Report") sqlContext = SQLContext(sc) class ParallelBuild(object): # def myFunc(self, s, i, k): # # words = s.split(" ") # # print words # # return len(words) # k = k + 1 # print s, i, k # return s, i, k def myFunc(self, s): l = s.split(',') print l[0], l[1] return l[0] def list_to_csv_str(x): output = StringIO.StringIO("") csv.writer(output).writerow(x) return output.getvalue().strip() def run(self): data = [(1,'a'), (1,'b'), (1,'c'), (2,'d'), (3,'r'), (4,'a'), (2,'t'), (3,'y'), (1,'f')] df = sqlContext.createDataFrame(data, schema= ['uid', 'address_uid']) return df if __name__ == "__main__": df_rdd = ParallelBuild().run().map(lambda line: line).persist() r = df_rdd.map(ParallelBuild().myFunc) r.count() {code} Above code returns JavaPackage not callable exception. > Passing functions do not work in classes > ---------------------------------------- > > Key: SPARK-15679 > URL: https://issues.apache.org/jira/browse/SPARK-15679 > Project: Spark > Issue Type: Bug > Reporter: nalin garg > > {code} > from pyspark import SparkContext > from pyspark.sql import SQLContext, HiveContext > import csv, io, StringIO > from pyspark.sql.functions import * > from pyspark.sql import Row > from pyspark.sql import * > from pyspark.sql import functions as F > from pyspark.sql.functions import asc, desc > sc = SparkContext("local", "Summary Report") > sqlContext = SQLContext(sc) > class ParallelBuild(object): > def myFunc(self, s): > l = s.split(',') > print l[0], l[1] > return l[0] > def list_to_csv_str(x): > output = StringIO.StringIO("") > csv.writer(output).writerow(x) > return output.getvalue().strip() > def run(self): > data = [(1,'a'), (1,'b'), (1,'c'), (2,'d'), (3,'r'), (4,'a'), > (2,'t'), (3,'y'), (1,'f')] > df = sqlContext.createDataFrame(data, schema= ['uid', 'address_uid']) > return df > if __name__ == "__main__": > df_rdd = ParallelBuild().run().map(lambda line: line).persist() > r = df_rdd.map(ParallelBuild().myFunc) > r.count() > {code} > Above code returns JavaPackage not callable exception. -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org