Hello, 

I am using the Python API to perform a grid search and train models using
LogisticRegressionWithSGD. 
I am using r3.xl machines in EC2, running on top of YARN in cluster mode. 

The training RDD is persisted in memory and on disk. Some of the models
train successfully, but then at some point during the grid search I get an
error. It looks like the Python broadcast is looking for a part of the RDD
which is no longer there. I scanned the logs for further errors but could
not find anything. 

Any ideas of what could be causing this, and what should I be looking for? 

Many thanks. 
Cat

  model = LogisticRegressionWithSGD.train(the_training, iterations=i,
regParam=c, miniBatchFraction=0.8)
  File "/home/hadoop/spark/python/pyspark/mllib/classification.py", line
164, in train
    return _regression_train_wrapper(train, LogisticRegressionModel, data,
initialWeights)
  File "/home/hadoop/spark/python/pyspark/mllib/regression.py", line 140, in
_regression_train_wrapper
    weights, intercept = train_func(data,
_convert_to_vector(initial_weights))
  File "/home/hadoop/spark/python/pyspark/mllib/classification.py", line
162, in train
    bool(intercept))
  File "/home/hadoop/spark/python/pyspark/mllib/common.py", line 120, in
callMLlibFunc
    return callJavaFunc(sc, api, *args)
  File "/home/hadoop/spark/python/pyspark/mllib/common.py", line 113, in
callJavaFunc
    return _java2py(sc, func(*args))
  File
"/home/hadoop/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py",
line 538, in __call__
    self.target_id, self.name)
  File
"/home/hadoop/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line
300, in get_return_value
    format(target_id, '.', name), value)
Py4JJavaError: An error occurred while calling
o271.trainLogisticRegressionModelWithSGD.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task
serialization failed: java.io.FileNotFoundException:
/mnt/spark/spark-b07b34f8-66c3-43ae-a3ed-0c291724409b/pyspark-4196e8e5-8024-4ec5-a7bb-a60b216e6e74/tmpbCjiSR
(No such file or directory)
java.io.FileInputStream.open(Native Method)
java.io.FileInputStream.<init>(FileInputStream.java:146)
org.apache.spark.api.python.PythonBroadcast$$anonfun$writeObject$1.apply$mcJ$sp(PythonRDD.scala:848)
org.apache.spark.api.python.PythonBroadcast$$anonfun$writeObject$1.apply(PythonRDD.scala:847)
org.apache.spark.api.python.PythonBroadcast$$anonfun$writeObject$1.apply(PythonRDD.scala:847)
org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1153)
org.apache.spark.api.python.PythonBroadcast.writeObject(PythonRDD.scala:847)
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
java.lang.reflect.Method.invoke(Method.java:606)
java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:988)
java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1495)
java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431)
java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177)
java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347)
org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:44)
org.apache.spark.serializer.SerializationStream.writeAll(Serializer.scala:110)
org.apache.spark.storage.BlockManager.dataSerializeStream(BlockManager.scala:1176)
org.apache.spark.storage.DiskStore.putIterator(DiskStore.scala:79)
org.apache.spark.storage.DiskStore.putArray(DiskStore.scala:64)
org.apache.spark.storage.BlockManager.dropFromMemory(BlockManager.scala:1028)
org.apache.spark.storage.MemoryStore$$anonfun$ensureFreeSpace$4.apply(MemoryStore.scala:419)
org.apache.spark.storage.MemoryStore$$anonfun$ensureFreeSpace$4.apply(MemoryStore.scala:408)
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
org.apache.spark.storage.MemoryStore.ensureFreeSpace(MemoryStore.scala:408)
org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:263)
org.apache.spark.storage.MemoryStore.putIterator(MemoryStore.scala:136)
org.apache.spark.storage.MemoryStore.putIterator(MemoryStore.scala:114)
org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:786)
org.apache.spark.storage.BlockManager.putIterator(BlockManager.scala:637)
org.apache.spark.storage.BlockManager.putSingle(BlockManager.scala:991)
org.apache.spark.broadcast.TorrentBroadcast.writeBlocks(TorrentBroadcast.scala:98)
org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:84)
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:29)
org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:62)
org.apache.spark.SparkContext.broadcast(SparkContext.scala:1051)
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:839)
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778)
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)




--
View this message in context: 
http://apache-spark-user-list.1001560.n3.nabble.com/Temp-file-missing-when-training-logistic-regression-tp24153.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
For additional commands, e-mail: user-h...@spark.apache.org

Reply via email to