I am trying to run Logistic Regression on the url dataset (from
libsvm) using the exact same code
as the example on a 5 node Yarn-Cluster.
I get a pretty cryptic error that says
"Killed"
Nothing more
Settings:
--master yarn-client"
--verbose"
--driver-memory 24G
--executor-memory 24G
--executor-cores 8
--num-executors 5
I set the akka.frame_size to 200MB.
Script:
ef main(args: Array[String]) {
val conf = new SparkConf()
.setMaster("yarn-client")
.setAppName("Logistic regression SGD fixed")
.set("spark.akka.frameSize", "200")
var sc = new SparkContext(conf)
// Load and parse the data
val dataset = args(0)
val maxIterations = 100
val start_time = System.nanoTime()
val data = MLUtils.loadLibSVMFile(sc, dataset)
// Building the model
var solver = new LogisticRegressionWithSGD()
solver.optimizer.setNumIterations(maxIterations)
solver.optimizer.setRegParam(0.01)
val model = solver.run(data)
// Measure the accuracy. Don't measure the time taken to do this.
val preditionsAndLabels = data.map { point =>
val prediction = model.predict(point.features)
(prediction, point.label)
}
val accuracy = (preditionsAndLabels.filter(r => r._1 ==
r._2).count.toDouble) / data.count
val elapsed_time = (System.nanoTime() - start_time) / 1e9
// User the last known accuracy
println(dataset + ",spark-sgd," + maxIterations + "," +
elapsed_time + "," + accuracy)
System.exit(0)
}