Hi there, Are you sure that the cluster nodes where the executors run have network connectivity to the elastic cluster?
Speaking of which, why don't you use: https://github.com/elastic/elasticsearch-hadoop#apache-spark ? Cheers, Anastasios On Fri, Feb 3, 2017 at 7:10 PM, Dmitry Goldenberg <dgoldenberg...@gmail.com> wrote: > Hi, > > Any reason why we might be getting this error? The code seems to work > fine in the non-distributed mode but the same code when run from a Spark > job is not able to get to Elastic. > > Spark version: 2.0.1 built for Hadoop 2.4, Scala 2.11 > Elastic version: 2.3.1 > > I've verified the Elastic hosts and the cluster name. > > The spot in the code where this happens is: > > ClusterHealthResponse clusterHealthResponse = client.admin().cluster() > > .prepareHealth() > > .setWaitForGreenStatus() > > .setTimeout(TimeValue.*timeValueSeconds*(10)) > > .get(); > > > Stack trace: > > > Driver stacktrace: > > at org.apache.spark.scheduler.DAGScheduler.org > <http://org.apache.spark.scheduler.dagscheduler.org/>$apache$spark$ > scheduler$DAGScheduler$$failJobAndIndependentStages(DAGSched > uler.scala:1454) > > at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$ > 1.apply(DAGScheduler.scala:1442) > > at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$ > 1.apply(DAGScheduler.scala:1441) > > at scala.collection.mutable.ResizableArray$class.foreach(Resiza > bleArray.scala:59) > > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer. > scala:48) > > at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGSchedu > ler.scala:1441) > > at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskS > etFailed$1.apply(DAGScheduler.scala:811) > > at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskS > etFailed$1.apply(DAGScheduler.scala:811) > > at scala.Option.foreach(Option.scala:257) > > at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( > DAGScheduler.scala:811) > > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOn > Receive(DAGScheduler.scala:1667) > > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onRe > ceive(DAGScheduler.scala:1622) > > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onRe > ceive(DAGScheduler.scala:1611) > > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > > at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler. > scala:632) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1890) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1903) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1916) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1930) > > at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply( > RDD.scala:902) > > at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply( > RDD.scala:900) > > at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperati > onScope.scala:151) > > at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperati > onScope.scala:112) > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:358) > > at org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:900) > > at org.apache.spark.api.java.JavaRDDLike$class.foreachPartition > (JavaRDDLike.scala:218) > > at org.apache.spark.api.java.AbstractJavaRDDLike.foreachPartiti > on(JavaRDDLike.scala:45) > > at com.myco.MyDriver$3.call(com.myco.MyDriver.java:214) > > at com.myco.MyDriver$3.call(KafkaSparkStreamingDriver.java:201) > > at org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun > $foreachRDD$1.apply(JavaDStreamLike.scala:272) > > at org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun > $foreachRDD$1.apply(JavaDStreamLike.scala:272) > > at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachR > DD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:627) > > at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachR > DD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:627) > > at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$ > 1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:51) > > at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$ > 1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51) > > at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$ > 1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51) > > at org.apache.spark.streaming.dstream.DStream.createRDDWithLoca > lProperties(DStream.scala:415) > > at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$ > 1.apply$mcV$sp(ForEachDStream.scala:50) > > at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$ > 1.apply(ForEachDStream.scala:50) > > at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$ > 1.apply(ForEachDStream.scala:50) > > at scala.util.Try$.apply(Try.scala:192) > > at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39) > > at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler > $$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:247) > > at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler > $$anonfun$run$1.apply(JobScheduler.scala:247) > > at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler > $$anonfun$run$1.apply(JobScheduler.scala:247) > > at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) > > at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler > .run(JobScheduler.scala:246) > > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPool > Executor.java:1142) > > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoo > lExecutor.java:617) > > at java.lang.Thread.run(Thread.java:745) > > Caused by: NoNodeAvailableException[None of the configured nodes are > available: [{#transport#-1}{XX.XXX.XXX.XX}{XX.XXX.XXX.XX:9300}]] > > at org.elasticsearch.client.transport.TransportClientNodesServi > ce.ensureNodesAreAvailable(TransportClientNodesService.java:290) > > at org.elasticsearch.client.transport.TransportClientNodesServi > ce.execute(TransportClientNodesService.java:207) > > at org.elasticsearch.client.transport.support.TransportProxyCli > ent.execute(TransportProxyClient.java:55) > > at org.elasticsearch.client.transport.TransportClient.doExecute > (TransportClient.java:288) > > at org.elasticsearch.client.support.AbstractClient.execute( > AbstractClient.java:359) > > at org.elasticsearch.client.support.AbstractClient$ClusterAdmin > .execute(AbstractClient.java:853) > > at org.elasticsearch.action.ActionRequestBuilder.execute(Action > RequestBuilder.java:86) > > at org.elasticsearch.action.ActionRequestBuilder.execute(Action > RequestBuilder.java:56) > > at org.elasticsearch.action.ActionRequestBuilder.get(ActionRequ > estBuilder.java:64) > > at com.myco.MyDriver.work() > > at org.apache.spark.api.java.JavaRDDLike$$anonfun$foreachPartit > ion$1.apply(JavaRDDLike.scala:218) > > at org.apache.spark.api.java.JavaRDDLike$$anonfun$foreachPartit > ion$1.apply(JavaRDDLike.scala:218) > > at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfu > n$apply$28.apply(RDD.scala:902) > > at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfu > n$apply$28.apply(RDD.scala:902) > > at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkC > ontext.scala:1916) > > at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkC > ontext.scala:1916) > > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.sca > la:70) > > at org.apache.spark.scheduler.Task.run(Task.scala:86) > > at org.apache.spark.executor.Executor$TaskRunner.run(Executor. > scala:274) > -- -- Anastasios Zouzias <a...@zurich.ibm.com>