Re: Pyspark UDF/map fucntion throws pickling exception

2018-02-15 Thread Selvam Raman
pyspark - 2.2.1
spacy - 2.0.7
python - 3.6


Placing full logs here

Traceback (most recent call last):
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 148, in dump
return Pickler.dump(self, obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 409, in dump
self.save(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 751, in save_tuple
save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 805, in _batch_appends
save(x)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 805, in _batch_appends
save(x)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 805, in _batch_appends
save(x)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method 

Pyspark UDF/map fucntion throws pickling exception

2018-02-15 Thread Selvam Raman
import spacy

nlp = spacy.load('en')



def getPhrases(content):
phrases = []
doc = nlp(str(content))
for chunks in doc.noun_chunks:
phrases.append(chunks.text)
return phrases

the above function will retrieve the noun phrases from the content and
return list of phrases.


def f(x) : print(x)


description = 
xmlData.filter(col("dcterms:description").isNotNull()).select(col("dcterms:description").alias("desc"))

description.rdd.flatMap(lambda row: getPhrases(row.desc)).foreach(f)

when i am trying to access getphrases i am getting below exception

"""if islambda(obj) or obj.__code__.co_filename == '' or themodule
is None:
AttributeError: 'builtin_function_or_method' object has no attribute
'__code__' """

Full stack trace is below

Traceback (most recent call last):
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 148, in dump
return Pickler.dump(self, obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 409, in dump
self.save(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 751, in save_tuple
save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 805, in _batch_appends
save(x)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 805, in _batch_appends
save(x)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",