[ https://issues.apache.org/jira/browse/SPARK-44582?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Hyukjin Kwon reassigned SPARK-44582: ------------------------------------ Assignee: Wan Kun > JVM crash caused by SMJ and WindowExec > -------------------------------------- > > Key: SPARK-44582 > URL: https://issues.apache.org/jira/browse/SPARK-44582 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 3.5.0 > Reporter: Wan Kun > Assignee: Wan Kun > Priority: Major > Attachments: screenshot-1.png, screenshot-2.png > > > After inner SMJ early cleanup offheap memory, when the SMJ call the left > Window next method, the JVM may crash due to accessing unallocated memory. > !screenshot-1.png! > !screenshot-2.png! > {code} > # > # A fatal error has been detected by the Java Runtime Environment: > # > # SIGSEGV (0xb) at pc=0x00007fb5e0052954, pid=21756, tid=0x00007fa9e8f65640 > # > # JRE version: OpenJDK Runtime Environment (Zulu 8.68.0.20-SA-linux64) > (8.0_362-b08) (build 1.8.0_362-b08) > # Java VM: OpenJDK 64-Bit Server VM (25.362-b08 mixed mode linux-amd64 ) > # Problematic frame: > # v ~StubRoutines::jlong_disjoint_arraycopy > # > # Core dump written. Default location: > /hadoop/7/yarn/local/usercache/b_carmel/appcache/application_1684894519955_24406/container_e2311_1684894519955_24406_01_005660/core > or core.21756 > # > # If you would like to submit a bug report, please visit: > # http://www.azul.com/support/ > # > --------------- T H R E A D --------------- > Current thread (0x00007fb5d8f0c800): JavaThread "Executor 2802 task launch > worker for task 128116463, task 101.3 in stage 452404.0 of app > application_1684894519955_24406" daemon [_thread_in_Java, id=22042, > stack(0x00007fa9e8766000,0x00007fa9e8f66000)] > siginfo: si_signo: 11 (SIGSEGV), si_code: 1 (SEGV_MAPERR), si_addr: > 0x00007fa9750deac0 > Registers: > RAX=0x00007fa9750deae8, RBX=0x0000000000000018, RCX=0x00007fb581f0f0a8, > RDX=0xffffffffffffffff > RSP=0x00007fa9e8f63a80, RBP=0x00007fa9e8f63a80, RSI=0x00007fb581f0f088, > RDI=0x00007fa9750deae0 > R8 =0x00007fb581f0f070, R9 =0x0000000097446ed2, R10=0x00007fb5e0053500, > R11=0x00007fb581f0f0b0 > R12=0x00007fb585ff17b0, R13=0x00007fa9750deac0, R14=0x0000000000000000, > R15=0x00007fb5d8f0c800 > RIP=0x00007fb5e0052954, EFLAGS=0x0000000000010297, CSGSFS=0x002b000000000033, > ERR=0x0000000000000004 > TRAPNO=0x000000000000000e > Top of Stack: (sp=0x00007fa9e8f63a80) > 0x00007fa9e8f63a80: 0000000000000028 00007fb5e127ee97 > 0x00007fa9e8f63a90: 0000000000000000 00007fa9e8f63b20 > 0x00007fa9e8f63aa0: 00007fa9e8f63b00 00007fb60882cf70 > 0x00007fa9e8f63ab0: 00007faab2dda9e0 00007fb581f0f070 > 0x00007fa9e8f63ac0: 00007fb551801188 00007fb54f8c0ef8 > Stack: [0x00007f8a03800000,0x00007f8a04000000], sp=0x00007f8a03ffd620, free > space=8181k > Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native > code) > v ~StubRoutines::jint_disjoint_arraycopy > J 36127 C2 > org.apache.spark.sql.execution.ExternalAppendOnlyUnsafeRowArray.add(Lorg/apache/spark/sql/catalyst/expressions/UnsafeRow;)V > (188 bytes) @ 0x00007f966187ac9f [0x00007f966187a820+0x47f] > J 36146 C2 > org.apache.spark.sql.execution.window.WindowExec$$anon$1.next()Ljava/lang/Object; > (5 bytes) @ 0x00007f9661a8eefc [0x00007f9661a8dd60+0x119c] > J 36153 C2 > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.processNext()V > (381 bytes) @ 0x00007f966180185c [0x00007f9661801760+0xfc] > J 36246 C2 > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage7.smj_findNextJoinRows_0$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage7;Lscala/collection/Iterator;Lscala/collection/Iterator;)Z > (392 bytes) @ 0x00007f96607388f0 [0x00007f96607381e0+0x710] > J 36249 C1 > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage7.processNext()V > (109 bytes) @ 0x00007f965fa8ee64 [0x00007f965fa8e560+0x904] > J 35645 C2 > org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$2.hasNext()Z (31 > bytes) @ 0x00007f965fbc58e4 [0x00007f965fbc58a0+0x44] > j > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(Lscala/collection/Iterator;Lorg/apache/spark/sql/execution/datasources/FileFormatDataWriter;)Lorg/apache/spark/sql/execution/datasources/WriteTaskResult;+1 > j > org.apache.spark.sql.execution.datasources.FileFormatWriter$$$Lambda$4398.apply()Ljava/lang/Object;+8 > j > org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Lscala/Function0;Lscala/Function0;Lscala/Function0;)Ljava/lang/Object;+4 > j > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(Lorg/apache/spark/sql/execution/datasources/WriteJobDescription;JIIILorg/apache/spark/internal/io/FileCommitProtocol;ILscala/collection/Iterator;)Lorg/apache/spark/sql/execution/datasources/WriteTaskResult;+258 > J 30523 C1 > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$23(Lorg/apache/spark/sql/execution/datasources/WriteJobDescription;JLorg/apache/spark/internal/io/FileCommitProtocol;Lscala/runtime/IntRef;Lscala/collection/immutable/Map;Lorg/apache/spark/TaskContext;Lscala/collection/Iterator;)Lorg/apache/spark/sql/execution/datasources/WriteTaskResult; > (61 bytes) @ 0x00007f966066b004 [0x00007f966066a7a0+0x864] > J 30529 C1 > org.apache.spark.sql.execution.datasources.FileFormatWriter$$$Lambda$3569.apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object; > (32 bytes) @ 0x00007f965f79bd1c [0x00007f965f79baa0+0x27c] > J 29322 C1 > org.apache.spark.scheduler.ResultTask.runTask(Lorg/apache/spark/TaskContext;)Ljava/lang/Object; > (210 bytes) @ 0x00007f966094bd0c [0x00007f96609497a0+0x256c] > J 24071 C1 > org.apache.spark.scheduler.Task.run(JILorg/apache/spark/metrics/MetricsSystem;Lscala/collection/immutable/Map;)Ljava/lang/Object; > (536 bytes) @ 0x00007f965fca493c [0x00007f965fca1000+0x393c] > J 23198 C1 > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Lorg/apache/spark/executor/Executor$TaskRunner;Lscala/runtime/BooleanRef;)Ljava/lang/Object; > (43 bytes) @ 0x00007f965f86373c [0x00007f965f8634e0+0x25c] > J 23196 C1 > org.apache.spark.executor.Executor$TaskRunner$$Lambda$984.apply()Ljava/lang/Object; > (12 bytes) @ 0x00007f965f860e44 [0x00007f965f860dc0+0x84] > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org