Hi,
I was testing my code with 10,000 observations. But the code is failing. Please
find the log below. The code is working perfectly with smaller datasets. In R
it's taking around 2 hours to run this model.
I'm using 4 core PC and running spark through jupyter notebook.
In python:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-30-9e495a76f74c> in <module>()
----> 1 get_ipython().run_cell_magic(u'time', u'', u'scriptUrl =
"D:/DEV1/DMLREPO/DEV_v2.dml"
#findBestSplitSC,tester,findBestSplitSC_v1\nscript =
sml.dml(scriptUrl).input(dframe = X_df,status = status_df,input_val =
inputs_df,ntree = 300, mtry = 9).output("check_func") # , status =
status_df, input_val = inputs_df\nbeta =
ml.execute(script).get("check_func")\n#beta')
C:\Anaconda2\lib\site-packages\IPython\core\interactiveshell.pyc in
run_cell_magic(self, magic_name, line, cell)
2118 magic_arg_s = self.var_expand(line, stack_depth)
2119 with self.builtin_trap:
-> 2120 result = fn(magic_arg_s, cell)
2121 return result
2122
<decorator-gen-61> in time(self, line, cell, local_ns)
C:\Anaconda2\lib\site-packages\IPython\core\magic.pyc in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
C:\Anaconda2\lib\site-packages\IPython\core\magics\execution.pyc in time(self,
line, cell, local_ns)
1175 else:
1176 st = clock2()
-> 1177 exec(code, glob, local_ns)
1178 end = clock2()
1179 out = None
<timed exec> in <module>()
C:\Anaconda2\lib\site-packages\systemml\mlcontext.pyc in execute(self, script)
338 for val in script._output:
339 script_java.out(val)
--> 340 return MLResults(self._ml.execute(script_java), self._sc)
341
342 def setStatistics(self, statistics):
C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\java_gateway.py in __call__(self,
*args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
C:\spark/python\pyspark\sql\utils.pyc in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\protocol.py in
get_return_value(answer, gateway_client, target_id, name)
317 raise Py4JJavaError(
318 "An error occurred while calling {0}{1}{2}.\n".
--> 319 format(target_id, ".", name), value)
320 else:
321 raise Py4JError(
Py4JJavaError: An error occurred while calling o33.execute.
: java.lang.OutOfMemoryError: Java heap space
The CMD Log Error:
ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java
server (127.0.0.1:57055)
Traceback (most recent call last):
File "C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\java_gateway.py", line
963, in start
self.socket.connect((self.address, self.port))
File "C:\Anaconda2\lib\socket.py", line 228, in meth
return getattr(self._sock,name)(*args)
error: [Errno 10061] No connection could be made because the target machine
actively refused it
Thanks a lot!
Arijit