Thanks a lot Arvind for your quick reply! We were using the following way to
execute systemML through spart in ipython notebook. It was running perfectly
for spark_2.1.0 and anaconda 3.5. But with spark 2.1.1 and anaconda 3.6, I'm
getting this error.
import os
import sys
import numpy
spark_path = "C:\spark"
os.environ['SPARK_HOME'] = spark_path
os.environ['HADOOP_HOME'] = spark_path
sys.path.append(spark_path + "/bin")
sys.path.append(spark_path + "/python")
sys.path.append(spark_path + "/python/pyspark/")
sys.path.append(spark_path + "/python/lib")
sys.path.append(spark_path + "/python/lib/pyspark.zip")
sys.path.append(spark_path + "/python/lib/py4j-0.10.4-src.zip")
from pyspark import SparkContext
from pyspark import SparkConf
SparkContext.setSystemProperty('spark.executor.memory', '15g')
sc = SparkContext("local[*]", "test")
from pyspark.sql import SQLContext
import systemml as sml
sqlCtx = SQLContext(sc)
ml = sml.MLContext(sc).setStatistics(True)
Regards,
Arijit
________________________________
From: Arvind Surve <[email protected]>
Sent: Monday, May 29, 2017 12:10:10 AM
To: [email protected]; Dev
Subject: Re: Execution Error
Seems like you have Spark path issue either due to ' ' (Space) in the directory
path or path for spark has not set correctly.
Cannot run program "C:\spark": CreateProcess error=5, Access is denied
at java.lang.ProcessBuilder.start(Unknown Source)
Arvind Surve | Spark Technology Center | http://www.spark.tc/
On Sunday, May 28, 2017, 11:36:16 AM PDT, arijit chakraborty
<[email protected]> wrote:Hi,
I was running our systemML code in spark 2.1.1 and anaconda 3.6, and we start
getting this error. Could not understand why we are getting it. Can anyone
please help.
Thank you!
Arijit
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-8-b016c9dabf34> in <module>()
14 ).output("dml_run","n_trt","max_level")
15
---> 16 beta, n_trt, max_level =
ml.execute(script).get("dml_run","n_trt","max_level")
C:\ProgramData\Anaconda3\lib\site-packages\systemml\mlcontext.py in
execute(self, script)
335 py4j.java_gateway.get_method(script_java, "in")(key, val)
336 else:
--> 337 py4j.java_gateway.get_method(script_java, "in")(key,
_py2java(self._sc, val))
338 for val in script._output:
339 script_java.out(val)
C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\java_gateway.py in __call__(self,
*args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
C:\spark/python\pyspark\sql\utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\protocol.py in
get_return_value(answer, gateway_client, target_id, name)
317 raise Py4JJavaError(
318 "An error occurred while calling {0}{1}{2}.\n".
--> 319 format(target_id, ".", name), value)
320 else:
321 raise Py4JError(
Py4JJavaError: An error occurred while calling o52.in.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 9 in
stage 0.0 failed 1 times, most recent failure: Lost task 9.0 in stage 0.0 (TID
9, localhost, executor driver): java.io.IOException: Cannot run program
"C:\spark": CreateProcess error=5, Access is denied
at java.lang.ProcessBuilder.start(Unknown Source)
at
org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:120)
at
org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:67)
at org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:116)
at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:128)
at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:63)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Caused by: java.io.IOException: CreateProcess error=5, Access is denied
at java.lang.ProcessImpl.create(Native Method)
at java.lang.ProcessImpl.<init>(Unknown Source)
at java.lang.ProcessImpl.start(Unknown Source)
... 35 more
Driver stacktrace:
at
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422)
at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
at scala.Option.foreach(Option.scala:257)
at
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1965)
at org.apache.spark.rdd.RDD.count(RDD.scala:1158)
at
org.apache.spark.api.java.JavaRDDLike$class.count(JavaRDDLike.scala:455)
at
org.apache.spark.api.java.AbstractJavaRDDLike.count(JavaRDDLike.scala:45)
at
org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtils.dataFrameToBinaryBlock(RDDConverterUtils.java:236)
at
org.apache.sysml.api.mlcontext.MLContextConversionUtil.dataFrameToMatrixBinaryBlocks(MLContextConversionUtil.java:430)
at
org.apache.sysml.api.mlcontext.MLContextConversionUtil.dataFrameToMatrixObject(MLContextConversionUtil.java:330)
at
org.apache.sysml.api.mlcontext.MLContextConversionUtil.dataFrameToMatrixObject(MLContextConversionUtil.java:311)
at
org.apache.sysml.api.mlcontext.MLContextUtil.convertInputType(MLContextUtil.java:516)
at org.apache.sysml.api.mlcontext.Script.in(Script.java:347)
at org.apache.sysml.api.mlcontext.Script.in(Script.java:306)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Unknown Source)
Caused by: java.io.IOException: Cannot run program "C:\spark": CreateProcess
error=5, Access is denied
at java.lang.ProcessBuilder.start(Unknown Source)
at
org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:120)
at
org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:67)
at org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:116)
at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:128)
at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:63)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
... 1 more
Caused by: java.io.IOException: CreateProcess error=5, Access is denied
at java.lang.ProcessImpl.create(Native Method)
at java.lang.ProcessImpl.<init>(Unknown Source)
at java.lang.ProcessImpl.start(Unknown Source)
... 35 more