Re: Pyspark UDF/map fucntion throws pickling exception
pyspark - 2.2.1 spacy - 2.0.7 python - 3.6 Placing full logs here Traceback (most recent call last): File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 148, in dump return Pickler.dump(self, obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 409, in dump self.save(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 751, in save_tuple save(element) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 255, in save_function self.save_function_tuple(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 292, in save_function_tuple save((code, closure, base_globals)) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 736, in save_tuple save(element) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 781, in save_list self._batch_appends(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 805, in _batch_appends save(x) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 255, in save_function self.save_function_tuple(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 292, in save_function_tuple save((code, closure, base_globals)) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 736, in save_tuple save(element) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 781, in save_list self._batch_appends(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 805, in _batch_appends save(x) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 255, in save_function self.save_function_tuple(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 292, in save_function_tuple save((code, closure, base_globals)) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 736, in save_tuple save(element) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 781, in save_list self._batch_appends(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 805, in _batch_appends save(x) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 255, in save_function self.save_function_tuple(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 292, in save_function_tuple save((code, closure, base_globals)) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method
Pyspark UDF/map fucntion throws pickling exception
import spacy nlp = spacy.load('en') def getPhrases(content): phrases = [] doc = nlp(str(content)) for chunks in doc.noun_chunks: phrases.append(chunks.text) return phrases the above function will retrieve the noun phrases from the content and return list of phrases. def f(x) : print(x) description = xmlData.filter(col("dcterms:description").isNotNull()).select(col("dcterms:description").alias("desc")) description.rdd.flatMap(lambda row: getPhrases(row.desc)).foreach(f) when i am trying to access getphrases i am getting below exception """if islambda(obj) or obj.__code__.co_filename == '' or themodule is None: AttributeError: 'builtin_function_or_method' object has no attribute '__code__' """ Full stack trace is below Traceback (most recent call last): File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 148, in dump return Pickler.dump(self, obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 409, in dump self.save(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 751, in save_tuple save(element) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 255, in save_function self.save_function_tuple(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 292, in save_function_tuple save((code, closure, base_globals)) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 736, in save_tuple save(element) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 781, in save_list self._batch_appends(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 805, in _batch_appends save(x) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 255, in save_function self.save_function_tuple(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 292, in save_function_tuple save((code, closure, base_globals)) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 736, in save_tuple save(element) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 781, in save_list self._batch_appends(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 805, in _batch_appends save(x) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 255, in save_function self.save_function_tuple(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py", line 292, in save_function_tuple save((code, closure, base_globals)) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 736, in save_tuple save(element) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 476, in save f(self, obj) # Call unbound method with explicit self File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py", line 781, in save_list self._batch_appends(obj) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",