It's only happening for Hadoop config. The exceptions trace are different for each time it gets died. And Jobs run for couple hours then worker dies.
Another Reason: *20/05/02 02:26:34 ERROR SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[ExecutorRunner for app-20200501213234-9846/3,5,main]* *java.lang.OutOfMemoryError: Java heap space* * at org.apache.xerces.xni.XMLString.toString(Unknown Source)* at org.apache.xerces.parsers.AbstractDOMParser.characters(Unknown Source) at org.apache.xerces.xinclude.XIncludeHandler.characters(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanContent(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl$FragmentContentDispatcher.dispatch(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source) at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) at org.apache.xerces.parsers.DOMParser.parse(Unknown Source) at org.apache.xerces.jaxp.DocumentBuilderImpl.parse(Unknown Source) at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:150) at org.apache.hadoop.conf.Configuration.parse(Configuration.java:2480) at org.apache.hadoop.conf.Configuration.parse(Configuration.java:2468) at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2539) at org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:2492) at org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2405) at org.apache.hadoop.conf.Configuration.set(Configuration.java:1143) at org.apache.hadoop.conf.Configuration.set(Configuration.java:1115) at org.apache.spark.deploy.SparkHadoopUtil$.org$apache$spark$deploy$SparkHadoopUtil$$appendS3AndSparkHadoopConfigurations(SparkHadoopUtil.scala:464) at org.apache.spark.deploy.SparkHadoopUtil$.newConfiguration(SparkHadoopUtil.scala:436) at org.apache.spark.deploy.SparkHadoopUtil.newConfiguration(SparkHadoopUtil.scala:114) at org.apache.spark.SecurityManager.<init>(SecurityManager.scala:114) at org.apache.spark.deploy.worker.ExecutorRunner.org $apache$spark$deploy$worker$ExecutorRunner$$fetchAndRunExecutor(ExecutorRunner.scala:149) at org.apache.spark.deploy.worker.ExecutorRunner$$anon$1.run(ExecutorRunner.scala:73) *20/05/02 02:26:37 ERROR SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[dispatcher-event-loop-3,5,main]* *java.lang.OutOfMemoryError: Java heap space* * at java.lang.Class.newInstance(Class.java:411)* at sun.reflect.MethodAccessorGenerator$1.run(MethodAccessorGenerator.java:403) at sun.reflect.MethodAccessorGenerator$1.run(MethodAccessorGenerator.java:394) at java.security.AccessController.doPrivileged(Native Method) at sun.reflect.MethodAccessorGenerator.generate(MethodAccessorGenerator.java:393) at sun.reflect.MethodAccessorGenerator.generateSerializationConstructor(MethodAccessorGenerator.java:112) at sun.reflect.ReflectionFactory.generateConstructor(ReflectionFactory.java:398) at sun.reflect.ReflectionFactory.newConstructorForSerialization(ReflectionFactory.java:360) at java.io.ObjectStreamClass.getSerializableConstructor(ObjectStreamClass.java:1520) at java.io.ObjectStreamClass.access$1500(ObjectStreamClass.java:79) at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:507) at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:482) at java.security.AccessController.doPrivileged(Native Method) at java.io.ObjectStreamClass.<init>(ObjectStreamClass.java:482) at java.io.ObjectStreamClass.lookup(ObjectStreamClass.java:379) at java.io.ObjectStreamClass.<init>(ObjectStreamClass.java:478) at java.io.ObjectStreamClass.lookup(ObjectStreamClass.java:379) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1134) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548) at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348) at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:43) at org.apache.spark.rpc.netty.RequestMessage.serialize(NettyRpcEnv.scala:565) at org.apache.spark.rpc.netty.NettyRpcEnv.send(NettyRpcEnv.scala:193) at org.apache.spark.rpc.netty.NettyRpcEndpointRef.send(NettyRpcEnv.scala:528) at org.apache.spark.deploy.worker.Worker.org $apache$spark$deploy$worker$Worker$$sendToMaster(Worker.scala:658) *20/05/02 02:26:34 ERROR SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[spark-shuffle-directory-cleaner-4-1,5,main]* *java.lang.OutOfMemoryError: Java heap space* * at java.io.UnixFileSystem.resolve(UnixFileSystem.java:108)* * at java.io.File.<init>(File.java:262)* * at java.io.File.listFiles(File.java:1253)* at org.apache.spark.network.util.JavaUtils.listFilesSafely(JavaUtils.java:177) at org.apache.spark.network.util.JavaUtils.deleteRecursivelyUsingJavaIO(JavaUtils.java:140) at org.apache.spark.network.util.JavaUtils.deleteRecursively(JavaUtils.java:118) at org.apache.spark.network.util.JavaUtils.deleteRecursivelyUsingJavaIO(JavaUtils.java:128) at org.apache.spark.network.util.JavaUtils.deleteRecursively(JavaUtils.java:118) at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.deleteNonShuffleFiles(ExternalShuffleBlockResolver.java:269) at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.lambda$executorRemoved$1(ExternalShuffleBlockResolver.java:235) at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver$$Lambda$19/1657523067.run(Unknown Source) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138) at java.lang.Thread.run(Thread.java:748) 20/05/02 02:27:03 INFO ExecutorRunner: Killing pro Another Reason 20/05/02 22:15:21 INFO DriverRunner: Copying user jar http://XX.XX.XXX.19:90/jar/hc-job-1.0-SNAPSHOT.jar to /grid/1/spark/work/driver-20200502221520-1101/hc-job-1.0-SNAPSHOT.jar *20/05/02 22:15:50 WARN TransportChannelHandler: Exception in connection from /XX.XX.XXX.19:7077* *java.lang.OutOfMemoryError: Java heap space* * at java.util.Arrays.copyOf(Arrays.java:3332)* * at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:124)* * at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:448)* at java.lang.StringBuilder.append(StringBuilder.java:136) at java.io.ObjectStreamField.getClassSignature(ObjectStreamField.java:322) at java.io.ObjectStreamField.<init>(ObjectStreamField.java:140) at java.io.ObjectStreamClass.getDefaultSerialFields(ObjectStreamClass.java:1789) at java.io.ObjectStreamClass.getSerialFields(ObjectStreamClass.java:1705) at java.io.ObjectStreamClass.access$800(ObjectStreamClass.java:79) at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:496) at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:482) at java.security.AccessController.doPrivileged(Native Method) at java.io.ObjectStreamClass.<init>(ObjectStreamClass.java:482) at java.io.ObjectStreamClass.lookup(ObjectStreamClass.java:379) at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:669) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1883) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1749) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2040) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1571) at java.io.ObjectInputStream.readObject(ObjectInputStream.java:431) at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75) at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:108) at org.apache.spark.rpc.netty.NettyRpcEnv$$anonfun$deserialize$1$$anonfun$apply$1.apply(NettyRpcEnv.scala:271) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) at org.apache.spark.rpc.netty.NettyRpcEnv.deserialize(NettyRpcEnv.scala:320) at org.apache.spark.rpc.netty.NettyRpcEnv$$anonfun$deserialize$1.apply(NettyRpcEnv.scala:270) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) at org.apache.spark.rpc.netty.NettyRpcEnv.deserialize(NettyRpcEnv.scala:269) at org.apache.spark.rpc.netty.RequestMessage$.apply(NettyRpcEnv.scala:611) at org.apache.spark.rpc.netty.NettyRpcHandler.internalReceive(NettyRpcEnv.scala:662) at org.apache.spark.rpc.netty.NettyRpcHandler.receive(NettyRpcEnv.scala:654) at org.apache.spark.network.server.TransportRequestHandler.processOneWayMessage(TransportRequestHandler.java:275) *20/05/02 22:15:50 ERROR SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[DriverRunner for driver-20200502221520-1100,5,main]* *java.lang.OutOfMemoryError: Java heap space* * at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2627)* at org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:2492) at org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2405) at org.apache.hadoop.conf.Configuration.set(Configuration.java:1143) at org.apache.hadoop.conf.Configuration.set(Configuration.java:1115) at org.apache.spark.deploy.SparkHadoopUtil$.org$apache$spark$deploy$SparkHadoopUtil$$appendS3AndSparkHadoopConfigurations(SparkHadoopUtil.scala:464) at org.apache.spark.deploy.SparkHadoopUtil$.newConfiguration(SparkHadoopUtil.scala:436) at org.apache.spark.deploy.SparkHadoopUtil.newConfiguration(SparkHadoopUtil.scala:114) at org.apache.spark.deploy.worker.DriverRunner.downloadUserJar(DriverRunner.scala:160) at org.apache.spark.deploy.worker.DriverRunner.prepareAndRunDriver(DriverRunner.scala:173) at org.apache.spark.deploy.worker.DriverRunner$$anon$1.run(DriverRunner.scala:92) *20/05/02 22:15:51 ERROR SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[dispatcher-event-loop-7,5,main]* *java.lang.OutOfMemoryError: Java heap space* * at org.apache.spark.deploy.worker.Worker.receive(Worker.scala:443)* * at org.apache.spark.rpc.netty.Inbox$$anonfun$process$1.apply$mcV$sp(Inbox.scala:117)* * at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:205)* at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:101) at org.apache.spark.rpc.netty.Dispatcher$MessageLoop.run(Dispatcher.scala:221) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 20/05/02 22:16:05 INFO ExecutorRunner: Killing process! On Thu, May 7, 2020 at 7:48 PM Jeff Evans <jeffrey.wayne.ev...@gmail.com> wrote: > You might want to double check your Hadoop config files. From the stack > trace it looks like this is happening when simply trying to load > configuration (XML files). Make sure they're well formed. > > On Thu, May 7, 2020 at 6:12 AM Hrishikesh Mishra <sd.hri...@gmail.com> > wrote: > >> Hi >> >> I am getting out of memory error in worker log in streaming jobs in every >> couple of hours. After this worker dies. There is no shuffle, no >> aggression, no. caching in job, its just a transformation. >> I'm not able to identify where is the problem, driver or executor. And >> why worker getting dead after the OOM streaming job should die. Am I >> missing something. >> >> Driver Memory: 2g >> Executor memory: 4g >> >> Spark Version: 2.4 >> Kafka Direct Stream >> Spark Standalone Cluster. >> >> >> 20/05/06 12:52:20 INFO SecurityManager: SecurityManager: authentication >> disabled; ui acls disabled; users with view permissions: Set(root); groups >> with view permissions: Set(); users with modify permissions: Set(root); >> groups with modify permissions: Set() >> >> 20/05/06 12:53:03 ERROR SparkUncaughtExceptionHandler: Uncaught exception >> in thread Thread[ExecutorRunner for app-20200506124717-10226/0,5,main] >> >> java.lang.OutOfMemoryError: Java heap space >> >> at org.apache.xerces.util.XMLStringBuffer.append(Unknown Source) >> >> at org.apache.xerces.impl.XMLEntityScanner.scanData(Unknown Source) >> >> at org.apache.xerces.impl.XMLScanner.scanComment(Unknown Source) >> >> at >> org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanComment(Unknown >> Source) >> >> at >> org.apache.xerces.impl.XMLDocumentFragmentScannerImpl$FragmentContentDispatcher.dispatch(Unknown >> Source) >> >> at >> org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown >> Source) >> >> at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) >> >> at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) >> >> at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) >> >> at org.apache.xerces.parsers.DOMParser.parse(Unknown Source) >> >> at org.apache.xerces.jaxp.DocumentBuilderImpl.parse(Unknown Source) >> >> at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:150) >> >> at org.apache.hadoop.conf.Configuration.parse(Configuration.java:2480) >> >> at org.apache.hadoop.conf.Configuration.parse(Configuration.java:2468) >> >> at >> org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2539) >> >> at >> org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:2492) >> >> at org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2405) >> >> at org.apache.hadoop.conf.Configuration.set(Configuration.java:1143) >> >> at org.apache.hadoop.conf.Configuration.set(Configuration.java:1115) >> >> at >> org.apache.spark.deploy.SparkHadoopUtil$.org$apache$spark$deploy$SparkHadoopUtil$$appendS3AndSparkHadoopConfigurations(SparkHadoopUtil.scala:464) >> >> at >> org.apache.spark.deploy.SparkHadoopUtil$.newConfiguration(SparkHadoopUtil.scala:436) >> >> at >> org.apache.spark.deploy.SparkHadoopUtil.newConfiguration(SparkHadoopUtil.scala:114) >> >> at org.apache.spark.SecurityManager.<init>(SecurityManager.scala:114) >> >> at org.apache.spark.deploy.worker.ExecutorRunner.org >> $apache$spark$deploy$worker$ExecutorRunner$$fetchAndRunExecutor(ExecutorRunner.scala:149) >> >> at >> org.apache.spark.deploy.worker.ExecutorRunner$$anon$1.run(ExecutorRunner.scala:73) >> >> 20/05/06 12:53:38 INFO DriverRunner: Worker shutting down, killing driver >> driver-20200505181719-1187 >> >> 20/05/06 12:53:38 INFO DriverRunner: Killing driver process! >> >> >> >> >> Regards >> Hrishi >> >