Are you running this in local mode or cluster mode ? If you are running in cluster mode have you ensured that numpy is present on all nodes ?
On Tue 5 Jun, 2018, 2:43 AM @Nandan@, <nandanpriyadarshi...@gmail.com> wrote: > Hi , > I am getting error :- > > --------------------------------------------------------------------------- > Py4JError Traceback (most recent call last) > <ipython-input-2-a208cdba2c46> in <module>() > 3 TOTAL = 1000000 > 4 dots = sc.parallelize([2.0 * np.random.random(2) - 1.0 for i in range( > TOTAL)]).cache() > ----> 5 print("Number of random points:", dots.count()) > 6 > 7 stats = dots.stats() > C:\opt\spark\python\pyspark\rdd.py in count(self) > 1039 3 > 1040 """ > -> 1041 return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum() > 1042 > 1043 def stats(self): > C:\opt\spark\python\pyspark\rdd.py in sum(self) > 1030 6.0 > 1031 """ > -> 1032 return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add > ) > 1033 > 1034 def count(self): > C:\opt\spark\python\pyspark\rdd.py in fold(self, zeroValue, op) > 904 # zeroValue provided to each partition is unique from the one provided > 905 # to the final reduce call > --> 906 vals = self.mapPartitions(func).collect() > 907 return reduce(op, vals, zeroValue) > 908 > C:\opt\spark\python\pyspark\rdd.py in collect(self) > 807 """ > 808 with SCCallSiteSync(self.context) as css: > --> 809 port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd()) > 810 return list(_load_from_socket(port, self._jrdd_deserializer)) > 811 > C:\opt\spark\python\pyspark\rdd.py in _jrdd(self) > 2453 > 2454 wrapped_func = _wrap_function(self.ctx, self.func, > self._prev_jrdd_deserializer, > -> 2455 self._jrdd_deserializer, profiler) > 2456 python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(), > wrapped_func, > 2457 self.preservesPartitioning) > C:\opt\spark\python\pyspark\rdd.py in _wrap_function(sc, func, > deserializer, serializer, profiler) > 2388 pickled_command, broadcast_vars, env, includes = > _prepare_for_python_RDD(sc, command) > 2389 return sc._jvm.PythonFunction(bytearray(pickled_command), env, > includes, sc.pythonExec, > -> 2390 sc.pythonVer, broadcast_vars, sc._javaAccumulator) > 2391 > 2392 > C:\ProgramData\Anaconda3\lib\site-packages\py4j\java_gateway.py in > __call__(self, *args) > 1426 answer = self._gateway_client.send_command(command) > 1427 return_value = get_return_value( > -> 1428 answer, self._gateway_client, None, self._fqn) > 1429 > 1430 for temp_arg in temp_args: > C:\opt\spark\python\pyspark\sql\utils.py in deco(*a, **kw) > 61 def deco(*a, **kw): > 62 try: > ---> 63 return f(*a, **kw) > 64 except py4j.protocol.Py4JJavaError as e: > 65 s = e.java_exception.toString() > C:\ProgramData\Anaconda3\lib\site-packages\py4j\protocol.py in > get_return_value(answer, gateway_client, target_id, name) > 322 raise Py4JError( > 323 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n". > --> 324 format(target_id, ".", name, value)) > 325 else: > 326 raise Py4JError( > Py4JError: An error occurred while calling > None.org.apache.spark.api.python.PythonFunction. Trace: > py4j.Py4JException: Constructor > org.apache.spark.api.python.PythonFunction([class [B, class > java.util.HashMap, class java.util.ArrayList, class java.lang.String, class > java.lang.String, class java.util.ArrayList, class > org.apache.spark.api.python.PythonAccumulatorV2]) does not exist > at > py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:179) > at > py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:196) > at py4j.Gateway.invoke(Gateway.java:235) > at > py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80) > at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69) > at py4j.GatewayConnection.run(GatewayConnection.java:214) > at java.lang.Thread.run(Thread.java:745) > > > I installed Spark 2.0.2 on windows 10 and code is as below:- > >> sc = SparkContext.getOrCreate() >> sc >> import numpy as np >> TOTAL = 1000000 >> dots = sc.parallelize([2.0 * np.random.random(2) - 1.0 for i in >> range(TOTAL)]).cache() >> print("Number of random points:", dots.count()) >> stats = dots.stats() >> print('Mean:', stats.mean()) >> print('stdev:', stats.stdev()) > > > Getting error , when I am running Numphy code. > Please tell me what's wrong in this. >