This looks to me like a kube-dns error that's causing the driver DNS address to not resolve. It would be worth double checking that kube-dns is indeed running (in the kube-system namespace). Often, with environments like minikube, kube-dns may exit/crashloop due to lack of resource.
On Tue, May 29, 2018 at 3:18 PM, purna pradeep <purna2prad...@gmail.com> wrote: > Hello, > > I’m getting below error when I spark-submit a Spark 2.3 app on Kubernetes > *v1.8.3* , some of the executor pods were killed with below error as > soon as they come up > > Exception in thread "main" java.lang.reflect.UndeclaredThrowableException > > at org.apache.hadoop.security.UserGroupInformation.doAs( > UserGroupInformation.java:1713) > > at org.apache.spark.deploy.SparkHadoopUtil.runAsSparkUser( > SparkHadoopUtil.scala:64) > > at org.apache.spark.executor. > CoarseGrainedExecutorBackend$.run(CoarseGrainedExecutorBackend.scala:188) > > at org.apache.spark.executor. > CoarseGrainedExecutorBackend$.main(CoarseGrainedExecutorBackend.scala:293) > > at org.apache.spark.executor.CoarseGrainedExecutorBackend. > main(CoarseGrainedExecutorBackend.scala) > > Caused by: org.apache.spark.SparkException: Exception thrown in > awaitResult: > > at org.apache.spark.util.ThreadUtils$.awaitResult( > ThreadUtils.scala:205) > > at org.apache.spark.rpc.RpcTimeout.awaitResult( > RpcTimeout.scala:75) > > at org.apache.spark.rpc.RpcEnv. > setupEndpointRefByURI(RpcEnv.scala:101) > > at org.apache.spark.executor. > CoarseGrainedExecutorBackend$$anonfun$run$1.apply$mcV$sp( > CoarseGrainedExecutorBackend.scala:201) > > at org.apache.spark.deploy.SparkHadoopUtil$$anon$2.run( > SparkHadoopUtil.scala:65) > > at org.apache.spark.deploy.SparkHadoopUtil$$anon$2.run( > SparkHadoopUtil.scala:64) > > at java.security.AccessController.doPrivileged(Native > Method) > > at javax.security.auth.Subject.doAs(Subject.java:422) > > at org.apache.hadoop.security.UserGroupInformation.doAs( > UserGroupInformation.java:1698) > > ... 4 more > > Caused by: java.io.IOException: Failed to connect to > spark-1527629824987-driver-svc.spark.svc:7078 > > at org.apache.spark.network.client.TransportClientFactory. > createClient(TransportClientFactory.java:245) > > at org.apache.spark.network.client.TransportClientFactory. > createClient(TransportClientFactory.java:187) > > at org.apache.spark.rpc.netty.NettyRpcEnv.createClient( > NettyRpcEnv.scala:198) > > at org.apache.spark.rpc.netty.Outbox$$anon$1.call(Outbox. > scala:194) > > at org.apache.spark.rpc.netty.Outbox$$anon$1.call(Outbox. > scala:190) > > at java.util.concurrent.FutureTask.run(FutureTask. > java:266) > > at java.util.concurrent.ThreadPoolExecutor.runWorker( > ThreadPoolExecutor.java:1149) > > at java.util.concurrent.ThreadPoolExecutor$Worker.run( > ThreadPoolExecutor.java:624) > > at java.lang.Thread.run(Thread.java:748) > > Caused by: java.net.UnknownHostException: spark-1527629824987-driver- > svc.spark.svc > > at java.net.InetAddress.getAllByName0(InetAddress. > java:1280) > > at java.net.InetAddress.getAllByName(InetAddress.java: > 1192) > > at java.net.InetAddress.getAllByName(InetAddress.java: > 1126) > > at java.net.InetAddress.getByName(InetAddress.java:1076) > > at io.netty.util.internal.SocketUtils$8.run(SocketUtils. > java:146) > > at io.netty.util.internal.SocketUtils$8.run(SocketUtils. > java:143) > > at java.security.AccessController.doPrivileged(Native > Method) > > at io.netty.util.internal.SocketUtils.addressByName( > SocketUtils.java:143) > > at io.netty.resolver.DefaultNameResolver.doResolve( > DefaultNameResolver.java:43) > > at io.netty.resolver.SimpleNameResolver.resolve( > SimpleNameResolver.java:63) > > at io.netty.resolver.SimpleNameResolver.resolve( > SimpleNameResolver.java:55) > > at io.netty.resolver.InetSocketAddressResolver.doResolve( > InetSocketAddressResolver.java:57) > > at io.netty.resolver.InetSocketAddressResolver.doResolve( > InetSocketAddressResolver.java:32) > > at io.netty.resolver.AbstractAddressResolver.resolve( > AbstractAddressResolver.java:108) > > at io.netty.bootstrap.Bootstrap.doResolveAndConnect0( > Bootstrap.java:208) > > at io.netty.bootstrap.Bootstrap. > access$000(Bootstrap.java:49) > > at io.netty.bootstrap.Bootstrap$ > 1.operationComplete(Bootstrap.java:188) > > at io.netty.bootstrap.Bootstrap$ > 1.operationComplete(Bootstrap.java:174) > > at io.netty.util.concurrent.DefaultPromise. > notifyListener0(DefaultPromise.java:507) > > at io.netty.util.concurrent.DefaultPromise. > notifyListenersNow(DefaultPromise.java:481) > > at io.netty.util.concurrent.DefaultPromise. > notifyListeners(DefaultPromise.java:420) > > at io.netty.util.concurrent.DefaultPromise.trySuccess( > DefaultPromise.java:104) > > at io.netty.channel.DefaultChannelPromise.trySuccess( > DefaultChannelPromise.java:82) > > at io.netty.channel.AbstractChannel$ > AbstractUnsafe.safeSetSuccess(AbstractChannel.java:978) > > at io.netty.channel.AbstractChannel$ > AbstractUnsafe.register0(AbstractChannel.java:512) > > at io.netty.channel.AbstractChannel$ > AbstractUnsafe.access$200(AbstractChannel.java:423) > > at io.netty.channel.AbstractChannel$AbstractUnsafe$1.run( > AbstractChannel.java:482) > > at io.netty.util.concurrent.AbstractEventExecutor. > safeExecute(AbstractEventExecutor.java:163) > > at io.netty.util.concurrent.SingleThreadEventExecutor. > runAllTasks(SingleThreadEventExecutor.java:403) > > at io.netty.channel.nio.NioEventLoop.run(NioEventLoop. > java:463) > > at io.netty.util.concurrent.SingleThreadEventExecutor$5. > run(SingleThreadEventExecutor.java:858) > > at io.netty.util.concurrent.DefaultThreadFactory$ > DefaultRunnableDecorator.run(DefaultThreadFactory.java:138) > > ... 1 more >