hi on windows, in local mode, using pyspark i got an error about "excessively deep recursion" i'm using some module for lemmatizing/stemming, which uses some dll and some binary files (module is a python wrapper around c code). spark version 1.4.0 any idea what is going on?
--------------------------------------------------------------------------- PicklingError Traceback (most recent call last) <ipython-input-10-f699414a7f1a> in <module>() 1 df1 = df.map(lambda p: lemmatizer.lemmatize('working')) ----> 2 df1.take(1) C:\spark/python\pyspark\rdd.pyc in take(self, num) 1263 1264 p = range(partsScanned, min(partsScanned + numPartsToTry, totalParts)) -> 1265 res = self.context.runJob(self, takeUpToNumLeft, p, True) 1266 1267 items += res C:\spark/python\pyspark\context.pyc in runJob(self, rdd, partitionFunc, partitions, allowLocal) 878 # SparkContext#runJob. 879 mappedRDD = rdd.mapPartitions(partitionFunc) --> 880 port = self._jvm.PythonRDD.runJob(self._jsc.sc(), mappedRDD._jrdd, partitions, 881 allowLocal) 882 return list(_load_from_socket(port, mappedRDD._jrdd_deserializer)) C:\spark/python\pyspark\rdd.pyc in _jrdd(self) 2349 command = (self.func, profiler, self._prev_jrdd_deserializer, 2350 self._jrdd_deserializer) -> 2351 pickled_cmd, bvars, env, includes = _prepare_for_python_RDD(self.ctx, command, self) 2352 python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(), 2353 bytearray(pickled_cmd), C:\spark/python\pyspark\rdd.pyc in _prepare_for_python_RDD(sc, command, obj) 2269 # the serialized command will be compressed by broadcast 2270 ser = CloudPickleSerializer() -> 2271 pickled_command = ser.dumps(command) 2272 if len(pickled_command) > (1 << 20): # 1M 2273 # The broadcast will have same life cycle as created PythonRDD C:\spark/python\pyspark\serializers.pyc in dumps(self, obj) 425 426 def dumps(self, obj): --> 427 return cloudpickle.dumps(obj, 2) 428 429 C:\spark/python\pyspark\cloudpickle.pyc in dumps(obj, protocol) 620 621 cp = CloudPickler(file,protocol) --> 622 cp.dump(obj) 623 624 return file.getvalue() C:\spark/python\pyspark\cloudpickle.pyc in dump(self, obj) 109 if 'recursion' in e.args[0]: 110 msg = """Could not pickle object as excessively deep recursion required.""" --> 111 raise pickle.PicklingError(msg) 112 113 def save_memoryview(self, obj): PicklingError: Could not pickle object as excessively deep recursion required.