[ 
https://issues.apache.org/jira/browse/HBASE-25929?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17352284#comment-17352284
 ] 

Yi Mei commented on HBASE-25929:
--------------------------------

I add a UT to reproduce the problem. And I will upload a patch later.

> RegionServer JVM crash when compaction
> --------------------------------------
>
>                 Key: HBASE-25929
>                 URL: https://issues.apache.org/jira/browse/HBASE-25929
>             Project: HBase
>          Issue Type: Bug
>          Components: Compaction
>    Affects Versions: 3.0.0-alpha-1, 2.5.0, 2.3.5, 2.4.3
>            Reporter: Yi Mei
>            Assignee: Yi Mei
>            Priority: Major
>         Attachments: hs_err_pid27712.log, hs_err_pid28814.log
>
>
> In our cluster, we found region servers may be crashed in several cases.
> In hs_err_pid27712.log:
> {code:java}
> Java frames: (J=compiled Java code, j=interpreted, Vv=VM code)
> J 2687  sun.misc.Unsafe.copyMemory(Ljava/lang/Object;JLjava/lang/Object;JJ)V 
> (0 bytes) @ 0x00007f85c987eda7 [0x00007f85c987ed40+0x67]
> J 5884 C1 
> org.apache.hadoop.hbase.util.UnsafeAccess.unsafeCopy(Ljava/lang/Object;JLjava/lang/Object;JJ)V
>  (62 bytes) @ 0x00007f85c93fd904 [0x00007f85c93fd780+0x184]
> J 4274 C1 
> org.apache.hadoop.hbase.util.UnsafeAccess.copy(Ljava/nio/ByteBuffer;I[BII)V 
> (73 bytes) @ 0x00007f85c9d57a94 [0x00007f85c9d574a0+0x5f4]
> J 5211 C2 
> org.apache.hadoop.hbase.util.ByteBufferUtils.copyFromBufferToArray([BLjava/nio/ByteBuffer;III)V
>  (69 bytes) @ 0x00007f85ca039a34 [0x00007f85ca0399a0+0x94]
> J 5985 C1 
> org.apache.hadoop.hbase.CellUtil.copyQualifierTo(Lorg/apache/hadoop/hbase/Cell;[BI)I
>  (59 bytes) @ 0x00007f85c9296a34 [0x00007f85c92964c0+0x574]
> J 6011 C1 org.apache.hadoop.hbase.ByteBufferKeyValue.getQualifierArray()[B (5 
> bytes) @ 0x00007f85c913e094 [0x00007f85c913d4c0+0xbd4]
> J 6004 C1 
> org.apache.hadoop.hbase.CellUtil.getCellKeyAsString(Lorg/apache/hadoop/hbase/Cell;Ljava/util/function/Function;)Ljava/lang/String;
>  (211 bytes) @ 0x00007f85c93737b4 [0x00007f85c93722e0+0x14d4]
> J 6000 C1 
> org.apache.hadoop.hbase.CellUtil.getCellKeyAsString(Lorg/apache/hadoop/hbase/Cell;)Ljava/lang/String;
>  (10 bytes) @ 0x00007f85c9854d14 [0x00007f85c9854ba0+0x174]
> j  
> org.apache.hadoop.hbase.io.hfile.HFileWriterImpl.getMidpoint(Lorg/apache/hadoop/hbase/CellComparator;Lorg/apache/hadoop/hbase/Cell;Lorg/apache/hadoop/hbase/Cell;Lorg/apache/hadoop/hbase/io/hfile/HFileContext;)Lorg/apache/hadoop/hbase/Cell;+132
> j  org.apache.hadoop.hbase.io.hfile.HFileWriterImpl.finishBlock()V+102
> j  org.apache.hadoop.hbase.io.hfile.HFileWriterImpl.checkBlockBoundary()V+32
> j  
> org.apache.hadoop.hbase.io.hfile.HFileWriterImpl.append(Lorg/apache/hadoop/hbase/Cell;)V+77
> j  
> org.apache.hadoop.hbase.regionserver.StoreFileWriter.append(Lorg/apache/hadoop/hbase/Cell;)V+20
> j  
> org.apache.hadoop.hbase.regionserver.compactions.Compactor.performCompaction(Lorg/apache/hadoop/hbase/regionserver/compactions/Compactor$FileDetails;Lorg/apache/hadoop/hbase/regionserver/InternalScanner;Lorg/apache/hadoop/hbase/regionserver/CellSink;JZLorg/apache/hadoop/hbase/regionserver/throttle/ThroughputController;ZI)Z+318
> j  
> org.apache.hadoop.hbase.regionserver.compactions.Compactor.compact(Lorg/apache/hadoop/hbase/regionserver/compactions/CompactionRequestImpl;Lorg/apache/hadoop/hbase/regionserver/compactions/Compactor$InternalScannerFactory;Lorg/apache/hadoop/hbase/regionserver/compactions/Compactor$CellSinkFactory;Lorg/apache/hadoop/hbase/regionserver/throttle/ThroughputController;Lorg/apache/hadoop/hbase/security/User;)Ljava/util/List;+221
> j  
> org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor.compact(Lorg/apache/hadoop/hbase/regionserver/compactions/CompactionRequestImpl;Lorg/apache/hadoop/hbase/regionserver/throttle/ThroughputController;Lorg/apache/hadoop/hbase/security/User;)Ljava/util/List;+12
> j  
> org.apache.hadoop.hbase.regionserver.DefaultStoreEngine$DefaultCompactionContext.compact(Lorg/apache/hadoop/hbase/regionserver/throttle/ThroughputController;Lorg/apache/hadoop/hbase/security/User;)Ljava/util/List;+16
> j  
> org.apache.hadoop.hbase.regionserver.HStore.compact(Lorg/apache/hadoop/hbase/regionserver/compactions/CompactionContext;Lorg/apache/hadoop/hbase/regionserver/throttle/ThroughputController;Lorg/apache/hadoop/hbase/security/User;)Ljava/util/List;+194
> {code}
> In hs_err_pid28814.log:
> {code:java}
> Stack: [0x00007f6d8e69b000,0x00007f6d8e6dc000],  sp=0x00007f6d8e6d9e88,  free 
> space=251k
> Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native 
> code)
> V  [libjvm.so+0x747fa0]
> J 2989  sun.misc.Unsafe.copyMemory(Ljava/lang/Object;JLjava/lang/Object;JJ)V 
> (0 bytes) @ 0x00007f751db756e1 [0x00007f751db75600+0xe1]
> j  
> org.apache.hadoop.hbase.util.UnsafeAccess.unsafeCopy(Ljava/lang/Object;JLjava/lang/Object;JJ)V+36
> j  
> org.apache.hadoop.hbase.util.UnsafeAccess.copy(Ljava/nio/ByteBuffer;I[BII)V+69
> j  
> org.apache.hadoop.hbase.util.ByteBufferUtils.copyFromBufferToArray([BLjava/nio/ByteBuffer;III)V+39
> j  
> org.apache.hadoop.hbase.CellUtil.copyQualifierTo(Lorg/apache/hadoop/hbase/Cell;[BI)I+31
> J 12082 C2 org.apache.hadoop.hbase.ByteBufferKeyValue.getQualifierArray()[B 
> (5 bytes) @ 0x00007f751ef15fbc [0x00007f751ef15dc0+0x1fc]
> J 16584 C2 
> org.apache.hadoop.hbase.CellUtil.getCellKeyAsString(Lorg/apache/hadoop/hbase/Cell;Ljava/util/function/Function;)Ljava/lang/String;
>  (211 bytes) @ 0x00007f751fe320b8 [0x00007f751fe31b80+0x538]
> J 17007 C2 
> org.apache.hadoop.hbase.regionserver.StoreFileWriter.append(Lorg/apache/hadoop/hbase/Cell;)V
>  (31 bytes) @ 0x00007f751fc2c0f4 [0x00007f751fc2aac0+0x1634]
> J 17178 C2 
> org.apache.hadoop.hbase.regionserver.compactions.Compactor.performCompaction(Lorg/apache/hadoop/hbase/regionserver/compactions/Compactor$FileDetails;Lorg/apache/hadoop/hbase/regionserver/InternalScanner;Lorg/apache/hadoop/hbase/regionserver/CellSink;JZLorg/apache/hadoop/hbase/regionserver/throttle/ThroughputController;ZI)Z
>  (767 bytes) @ 0x00007f751f8e330c [0x00007f751f8e2960+0x9ac]
> j  
> org.apache.hadoop.hbase.regionserver.compactions.Compactor.compact(Lorg/apache/hadoop/hbase/regionserver/compactions/CompactionRequestImpl;Lorg/apache/hadoop/hbase/regionserver/compactions/Compactor$InternalScannerFactory;Lorg/apache/hadoop/hbase/regionserver/compactions/Compactor$CellSinkFactory;Lorg/apache/hadoop/hbase/regionserver/throttle/ThroughputController;Lorg/apache/hadoop/hbase/security/User;)Ljava/util/List;+221
> j  
> org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor.compact(Lorg/apache/hadoop/hbase/regionserver/compactions/CompactionRequestImpl;Lorg/apache/hadoop/hbase/regionserver/throttle/ThroughputController;Lorg/apache/hadoop/hbase/security/User;)Ljava/util/List;+12
> j  
> org.apache.hadoop.hbase.regionserver.DefaultStoreEngine$DefaultCompactionContext.compact(Lorg/apache/hadoop/hbase/regionserver/throttle/ThroughputController;Lorg/apache/hadoop/hbase/security/User;)Ljava/util/List;+16
> j  
> org.apache.hadoop.hbase.regionserver.HStore.compact(Lorg/apache/hadoop/hbase/regionserver/compactions/CompactionContext;Lorg/apache/hadoop/hbase/regionserver/throttle/ThroughputController;Lorg/apache/hadoop/hbase/security/User;)Ljava/util/List;+194
> {code}
> Sometimes, RS is not crashed but we can see the following logs:
> {code:java}
> 2021-05-27T12:53:54,465 ERROR 
> [RpcServer.default.FPBQ.Fifo.handler=2,queue=0,port=40769-shortCompactions-2] 
> regionserver.CompactSplit$CompactionRunner(640): Compaction failed 
> Request=regionName=t1,user00000000000000000080,1622091224198.cf38ea5f2ea0d90163b53c2b2fd329d2.,
>  storeName=A, fileCount=2, fileSize=2.0 M (1009.7 K, 1009.7 K), priority=1, 
> time=16220912334172021-05-27T12:53:54,465 ERROR 
> [RpcServer.default.FPBQ.Fifo.handler=2,queue=0,port=40769-shortCompactions-2] 
> regionserver.CompactSplit$CompactionRunner(640): Compaction failed 
> Request=regionName=t1,user00000000000000000080,1622091224198.cf38ea5f2ea0d90163b53c2b2fd329d2.,
>  storeName=A, fileCount=2, fileSize=2.0 M (1009.7 K, 1009.7 K), priority=1, 
> time=1622091233417java.lang.IllegalArgumentException: Left byte array sorts 
> after right row; left=user00000000000000000080, 
> right=user00000000000000000001 at 
> org.apache.hadoop.hbase.io.hfile.HFileWriterImpl.getMinimumMidpointArray(HFileWriterImpl.java:445)
>  ~[classes/:?] at 
> org.apache.hadoop.hbase.io.hfile.HFileWriterImpl.getMidpoint(HFileWriterImpl.java:390)
>  ~[classes/:?]
> {code}
> Because byte buffers of cells have been already released, but RS still need 
> to use these cells.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to