I am trying to run Logistic Regression on the url dataset (from libsvm) using the exact same code as the example on a 5 node Yarn-Cluster.
I get a pretty cryptic error that says "Killed" Nothing more Settings: --master yarn-client" --verbose" --driver-memory 24G --executor-memory 24G --executor-cores 8 --num-executors 5 I set the akka.frame_size to 200MB. Script: ef main(args: Array[String]) { val conf = new SparkConf() .setMaster("yarn-client") .setAppName("Logistic regression SGD fixed") .set("spark.akka.frameSize", "200") var sc = new SparkContext(conf) // Load and parse the data val dataset = args(0) val maxIterations = 100 val start_time = System.nanoTime() val data = MLUtils.loadLibSVMFile(sc, dataset) // Building the model var solver = new LogisticRegressionWithSGD() solver.optimizer.setNumIterations(maxIterations) solver.optimizer.setRegParam(0.01) val model = solver.run(data) // Measure the accuracy. Don't measure the time taken to do this. val preditionsAndLabels = data.map { point => val prediction = model.predict(point.features) (prediction, point.label) } val accuracy = (preditionsAndLabels.filter(r => r._1 == r._2).count.toDouble) / data.count val elapsed_time = (System.nanoTime() - start_time) / 1e9 // User the last known accuracy println(dataset + ",spark-sgd," + maxIterations + "," + elapsed_time + "," + accuracy) System.exit(0) }