Hey guyz, I've got this issue (see bottom) with Spark, deployed in Standalone mode on a local docker environment. I know that I need to raise the ulimit (only 1024 now) but in the meantime I was just wondering how this could happen. My gut feeling is because I'm mounting a lot in memory and Spark tries to dump some RDDs on the FS, and then boom.
Also, I was wondering if it cannot be a clue that my job is maybe to eager in memory? How is it something quite normal which such a low ulimit on workers? Thanks a lot (in advance ^^) Cheers, andy 14/02/21 08:32:15 ERROR Executor: Exception in task ID 472 org.jboss.netty.channel.ChannelException: Failed to create a selector. at org.jboss.netty.channel.socket.nio.AbstractNioSelector.openSelector(AbstractNioSelector.java:337) at org.jboss.netty.channel.socket.nio.AbstractNioSelector.<init>(AbstractNioSelector.java:95) at org.jboss.netty.channel.socket.nio.AbstractNioWorker.<init>(AbstractNioWorker.java:53) at org.jboss.netty.channel.socket.nio.NioWorker.<init>(NioWorker.java:45) at org.jboss.netty.channel.socket.nio.NioWorkerPool.createWorker(NioWorkerPool.java:45) at org.jboss.netty.channel.socket.nio.NioWorkerPool.createWorker(NioWorkerPool.java:28) at org.jboss.netty.channel.socket.nio.AbstractNioWorkerPool.newWorker(AbstractNioWorkerPool.java:99) at org.jboss.netty.channel.socket.nio.AbstractNioWorkerPool.init(AbstractNioWorkerPool.java:69) at org.jboss.netty.channel.socket.nio.NioWorkerPool.<init>(NioWorkerPool.java:39) at org.jboss.netty.channel.socket.nio.NioWorkerPool.<init>(NioWorkerPool.java:33) at org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory.<init>(NioClientSocketChannelFactory.java:151) at org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory.<init>(NioClientSocketChannelFactory.java:116) at com.datastax.driver.core.Connection$Factory.<init>(Connection.java:349) at com.datastax.driver.core.Connection$Factory.<init>(Connection.java:360) at com.datastax.driver.core.Cluster$Manager.<init>(Cluster.java:857) at com.datastax.driver.core.Cluster$Manager.<init>(Cluster.java:806) at com.datastax.driver.core.Cluster.<init>(Cluster.java:76) at com.datastax.driver.core.Cluster.buildFrom(Cluster.java:132) at com.datastax.driver.core.Cluster$Builder.build(Cluster.java:771) at com.virdata.core.batch.sample.Timeseries$$anonfun$storeInCassandra$1$1$$anonfun$apply$1$$anonfun$apply$2.apply(Timeseries.scala:45) at com.virdata.core.batch.sample.Timeseries$$anonfun$storeInCassandra$1$1$$anonfun$apply$1$$anonfun$apply$2.apply(Timeseries.scala:38) at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:595) at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:595) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:884) at org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:884) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:109) at org.apache.spark.scheduler.Task.run(Task.scala:53) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213) at org.apache.spark.deploy.SparkHadoopUtil$$anon$1.run(SparkHadoopUtil.scala:46) at org.apache.spark.deploy.SparkHadoopUtil$$anon$1.run(SparkHadoopUtil.scala:45) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121) at org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:45) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) at java.lang.Thread.run(Thread.java:722) Caused by: java.io.IOException: Too many open files at sun.nio.ch.IOUtil.makePipe(Native Method) at sun.nio.ch.EPollSelectorImpl.<init>(EPollSelectorImpl.java:65) at sun.nio.ch.EPollSelectorProvider.openSelector(EPollSelectorProvider.java:36) at java.nio.channels.Selector.open(Selector.java:227) at org.jboss.netty.channel.socket.nio.AbstractNioSelector.openSelector(AbstractNioSelector.java:335) ... 37 more 14/02/21 08:32:53 WARN BlockManagerMaster: Error sending message to BlockManagerMaster in 1 attempts java.util.concurrent.TimeoutException: Futures timed out after [30 seconds] at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:107) at scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) at scala.concurrent.Await$.result(package.scala:107) at org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:162) at org.apache.spark.storage.BlockManagerMaster.sendHeartBeat(BlockManagerMaster.scala:52) at org.apache.spark.storage.BlockManager.org $apache$spark$storage$BlockManager$$heartBeat(BlockManager.scala:97) at org.apache.spark.storage.BlockManager$$anonfun$initialize$1.apply$mcV$sp(BlockManager.scala:135) at akka.actor.Scheduler$$anon$9.run(Scheduler.scala:80) at akka.actor.LightArrayRevolverScheduler$$anon$3$$anon$2.run(Scheduler.scala:241) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) at java.lang.Thread.run(Thread.java:722) 14/02/21 08:33:26 WARN BlockManagerMaster: Error sending message to BlockManagerMaster in 2 attempts java.util.concurrent.TimeoutException: Futures timed out after [30 seconds] at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:107) at scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) at scala.concurrent.Await$.result(package.scala:107) at org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:162) at org.apache.spark.storage.BlockManagerMaster.sendHeartBeat(BlockManagerMaster.scala:52) at org.apache.spark.storage.BlockManager.org $apache$spark$storage$BlockManager$$heartBeat(BlockManager.scala:97) at org.apache.spark.storage.BlockManager$$anonfun$initialize$1.apply$mcV$sp(BlockManager.scala:135) at akka.actor.Scheduler$$anon$9.run(Scheduler.scala:80) at akka.actor.LightArrayRevolverScheduler$$anon$3$$anon$2.run(Scheduler.scala:241) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) at java.lang.Thread.run(Thread.java:722) 14/02/21 08:33:59 WARN BlockManagerMaster: Error sending message to BlockManagerMaster in 3 attempts java.util.concurrent.TimeoutException: Futures timed out after [30 seconds] at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:107) at scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) at scala.concurrent.Await$.result(package.scala:107) at org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:162) at org.apache.spark.storage.BlockManagerMaster.sendHeartBeat(BlockManagerMaster.scala:52) at org.apache.spark.storage.BlockManager.org $apache$spark$storage$BlockManager$$heartBeat(BlockManager.scala:97) at org.apache.spark.storage.BlockManager$$anonfun$initialize$1.apply$mcV$sp(BlockManager.scala:135) at akka.actor.Scheduler$$anon$9.run(Scheduler.scala:80) at akka.actor.LightArrayRevolverScheduler$$anon$3$$anon$2.run(Scheduler.scala:241) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) at java.lang.Thread.run(Thread.java:722) 14/02/21 08:34:03 ERROR Executor: Uncaught exception in thread Thread[Connection manager future execution context-0,5,main] java.lang.Error: org.apache.spark.SparkException: Error sending message to BlockManagerMaster [message = HeartBeat(BlockManagerId(1, 172.17.0.4, 52780, 0))] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1116) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) at java.lang.Thread.run(Thread.java:722) Caused by: org.apache.spark.SparkException: Error sending message to BlockManagerMaster [message = HeartBeat(BlockManagerId(1, 172.17.0.4, 52780, 0))] at org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:176) at org.apache.spark.storage.BlockManagerMaster.sendHeartBeat(BlockManagerMaster.scala:52) at org.apache.spark.storage.BlockManager.org $apache$spark$storage$BlockManager$$heartBeat(BlockManager.scala:97) at org.apache.spark.storage.BlockManager$$anonfun$initialize$1.apply$mcV$sp(BlockManager.scala:135) at akka.actor.Scheduler$$anon$9.run(Scheduler.scala:80) at akka.actor.LightArrayRevolverScheduler$$anon$3$$anon$2.run(Scheduler.scala:241) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) ... 2 more Caused by: java.util.concurrent.TimeoutException: Futures timed out after [30 seconds] at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:107) at scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) at scala.concurrent.Await$.result(package.scala:107) at org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:162) ... 8 more
