[ 
https://issues.apache.org/jira/browse/HDDS-11220?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sammi Chen reassigned HDDS-11220:
---------------------------------

    Assignee: Sammi Chen

> [HBase Replication] RS down due to "ManagedChannelOrphanWrapper: Previous 
> channel was not shutdown properly"
> ------------------------------------------------------------------------------------------------------------
>
>                 Key: HDDS-11220
>                 URL: https://issues.apache.org/jira/browse/HDDS-11220
>             Project: Apache Ozone
>          Issue Type: Bug
>            Reporter: Pratyush Bhatt
>            Assignee: Sammi Chen
>            Priority: Major
>
> *Scenario:* Bidirectional HBase replication, with HBase on Ozone on both the 
> clusters.
> Affected RS went down with 
> {_}org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper: 
> *~*~*~ Previous channel ManagedChannelImpl\{logId=167987, 
> target=10.140.176.6:9859} was not shutdown properly!!!{_}: 
> {code:java}
> 2024-07-19 19:08:26,365 ERROR 
> org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper: 
> *~*~*~ Previous channel ManagedChannelImpl{logId=167987, 
> target=10.140.176.6:9859} was not shutdown properly!!! ~*~*~*
>     Make sure to call shutdown()/shutdownNow() and wait until 
> awaitTermination() returns true.
> java.lang.RuntimeException: ManagedChannel allocation site
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper$ManagedChannelReference.<init>(ManagedChannelOrphanWrapper.java:102)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:60)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:51)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelImplBuilder.build(ManagedChannelImplBuilder.java:631)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.AbstractManagedChannelImplBuilder.build(AbstractManagedChannelImplBuilder.java:297)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientGrpc.connectToDatanode(XceiverClientGrpc.java:182)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientGrpc.connect(XceiverClientGrpc.java:161)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientManager$2.call(XceiverClientManager.java:256)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientManager$2.call(XceiverClientManager.java:237)
>         at 
> org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4938)
>         at 
> org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3576)
>         at 
> org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2318)
>         at 
> org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2191)
>         at 
> org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2081)
>         at 
> org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache.get(LocalCache.java:4019)
>         at 
> org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4933)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientManager.getClient(XceiverClientManager.java:237)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientManager.acquireClient(XceiverClientManager.java:184)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientManager.acquireClientForReadData(XceiverClientManager.java:161)
>         at 
> org.apache.hadoop.hdds.scm.storage.BlockInputStream.acquireClient(BlockInputStream.java:342)
>         at 
> org.apache.hadoop.hdds.scm.storage.BlockInputStream.getBlockData(BlockInputStream.java:258)
>         at 
> org.apache.hadoop.hdds.scm.storage.BlockInputStream.initialize(BlockInputStream.java:164)
>         at 
> org.apache.hadoop.hdds.scm.storage.BlockInputStream.readWithStrategy(BlockInputStream.java:370)
>         at 
> org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66)
>         at 
> org.apache.hadoop.hdds.scm.storage.ByteBufferReader.readFromBlock(ByteBufferReader.java:56)
>         at 
> org.apache.hadoop.hdds.scm.storage.MultipartInputStream.readWithStrategy(MultipartInputStream.java:96)
>         at 
> org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66)
>         at 
> org.apache.hadoop.fs.ozone.OzoneFSInputStream.readInTrace(OzoneFSInputStream.java:136)
>         at 
> org.apache.hadoop.fs.ozone.OzoneFSInputStream.lambda$read$0(OzoneFSInputStream.java:126)
>         at 
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:169)
>         at 
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:159)
>         at 
> org.apache.hadoop.fs.ozone.OzoneFSInputStream.read(OzoneFSInputStream.java:125)
>         at 
> org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:152)
> at 
> org.apache.hadoop.hbase.io.util.BlockIOUtils.readFully(BlockIOUtils.java:78)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1481)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1699)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1528)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1322)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1242)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.readAndUpdateNewBlock(HFileReaderImpl.java:946)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.seekTo(HFileReaderImpl.java:935)
>         at 
> org.apache.hadoop.hbase.regionserver.StoreFileScanner.seekAtOrAfter(StoreFileScanner.java:311)
>         at 
> org.apache.hadoop.hbase.regionserver.StoreFileScanner.seek(StoreFileScanner.java:214)
>         at 
> org.apache.hadoop.hbase.regionserver.StoreScanner.seekScanners(StoreScanner.java:412)
>         at 
> org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:323)
>         at 
> org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:289)
>         at 
> org.apache.hadoop.hbase.regionserver.compactions.Compactor.createScanner(Compactor.java:530)
>         at 
> org.apache.hadoop.hbase.regionserver.compactions.Compactor$1.createScanner(Compactor.java:259)
>         at 
> org.apache.hadoop.hbase.regionserver.compactions.Compactor.compact(Compactor.java:346)
>         at 
> org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor.compact(DefaultCompactor.java:66)
>         at 
> org.apache.hadoop.hbase.regionserver.DefaultStoreEngine$DefaultCompactionContext.compact(DefaultStoreEngine.java:122)
>         at 
> org.apache.hadoop.hbase.regionserver.HStore.compact(HStore.java:1189)
>         at 
> org.apache.hadoop.hbase.regionserver.HRegion.compact(HRegion.java:2261)
>         at 
> org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.doCompaction(CompactSplit.java:625)
>         at 
> org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.run(CompactSplit.java:673)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>         at java.lang.Thread.run(Thread.java:748)
> 2024-07-19 19:08:26,365 ERROR 
> org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper: 
> *~*~*~ Previous channel ManagedChannelImpl{logId=167995, 
> target=10.140.139.7:9859} was not shutdown properly!!! ~*~*~*
>     Make sure to call shutdown()/shutdownNow() and wait until 
> awaitTermination() returns true.
> java.lang.RuntimeException: ManagedChannel allocation site
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper$ManagedChannelReference.<init>(ManagedChannelOrphanWrapper.java:102)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:60)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:51)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelImplBuilder.build(ManagedChannelImplBuilder.java:631)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.AbstractManagedChannelImplBuilder.build(AbstractManagedChannelImplBuilder.java:297)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientGrpc.connectToDatanode(XceiverClientGrpc.java:182)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientGrpc.reconnect(XceiverClientGrpc.java:618)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientGrpc.checkOpen(XceiverClientGrpc.java:609)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommandAsync(XceiverClientGrpc.java:539)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommandWithRetry(XceiverClientGrpc.java:415)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientGrpc.lambda$sendCommandWithTraceIDAndRetry$0(XceiverClientGrpc.java:352)
>         at 
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:169)
>  
> at 
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:159)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommandWithTraceIDAndRetry(XceiverClientGrpc.java:344)
>         at 
> org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommand(XceiverClientGrpc.java:325)
>         at 
> org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.getBlock(ContainerProtocolCalls.java:235)
>         at 
> org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.lambda$getBlock$0(ContainerProtocolCalls.java:209)
>         at 
> org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.tryEachDatanode(ContainerProtocolCalls.java:158)
>         at 
> org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.getBlock(ContainerProtocolCalls.java:208)
>         at 
> org.apache.hadoop.hdds.scm.storage.BlockInputStream.getBlockDataUsingClient(BlockInputStream.java:288)
>         at 
> org.apache.hadoop.hdds.scm.storage.BlockInputStream.getBlockData(BlockInputStream.java:260)
>         at 
> org.apache.hadoop.hdds.scm.storage.BlockInputStream.initialize(BlockInputStream.java:164)
>         at 
> org.apache.hadoop.hdds.scm.storage.BlockInputStream.readWithStrategy(BlockInputStream.java:370)
>         at 
> org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66)
>         at 
> org.apache.hadoop.hdds.scm.storage.ByteBufferReader.readFromBlock(ByteBufferReader.java:56)
>         at 
> org.apache.hadoop.hdds.scm.storage.MultipartInputStream.readWithStrategy(MultipartInputStream.java:96)
>         at 
> org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66)
>         at 
> org.apache.hadoop.fs.ozone.OzoneFSInputStream.readInTrace(OzoneFSInputStream.java:136)
>         at 
> org.apache.hadoop.fs.ozone.OzoneFSInputStream.lambda$read$0(OzoneFSInputStream.java:126)
>         at 
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:169)
>         at 
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:159)
>         at 
> org.apache.hadoop.fs.ozone.OzoneFSInputStream.read(OzoneFSInputStream.java:125)
>         at 
> org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:152)
>         at 
> org.apache.hadoop.hbase.io.util.BlockIOUtils.readFully(BlockIOUtils.java:78)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1481)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1699)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1528)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlock(HFileBlock.java:1423)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlockWithBlockType(HFileBlock.java:1436)
>         at 
> org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:368)
>         at 
> org.apache.hadoop.hbase.regionserver.HStoreFile.open(HStoreFile.java:368)
>         at 
> org.apache.hadoop.hbase.regionserver.HStoreFile.initReader(HStoreFile.java:485)
>         at 
> org.apache.hadoop.hbase.regionserver.StoreEngine.createStoreFileAndReader(StoreEngine.java:224)
>         at 
> org.apache.hadoop.hbase.regionserver.StoreEngine.createStoreFileAndReader(StoreEngine.java:217)
>         at 
> org.apache.hadoop.hbase.regionserver.StoreEngine.validateStoreFile(StoreEngine.java:236)
>         at 
> org.apache.hadoop.hbase.regionserver.StoreEngine.commitStoreFiles(StoreEngine.java:422)
>         at 
> org.apache.hadoop.hbase.regionserver.HStore.doCompaction(HStore.java:1200)
>         at 
> org.apache.hadoop.hbase.regionserver.HStore.compact(HStore.java:1188)
>         at 
> org.apache.hadoop.hbase.regionserver.HRegion.compact(HRegion.java:2261)
>         at 
> org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.doCompaction(CompactSplit.java:625)
>         at 
> org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.run(CompactSplit.java:673)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>         at java.lang.Thread.run(Thread.java:748)
> 2024-07-19 19:08:30,744 WARN 
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader: 
> Failed to read stream of replication entries{code}
> And then Error logs with java.io.EOFException followed:
> {code:java}
> java.io.EOFException: EOF encountered at pos: 3439495 for key: 
> hbase/WALs/ccycloud-5.ozn-hbaserepl2.xyz,22101,1721293282050/ccycloud-5.ozn-hbaserepl2.xyz%2C22101%2C1721293282050.ccycloud-5.ozn-hbaserepl2.root.xyz%2C22101%2C1721293282050.regiongroup-0.1721415920990
>         at 
> org.apache.hadoop.hdds.scm.storage.MultipartInputStream.seek(MultipartInputStream.java:139)
>         at 
> org.apache.hadoop.fs.ozone.OzoneFSInputStream.seek(OzoneFSInputStream.java:99)
>         at 
> org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:70)
>         at 
> org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.seekOnFs(ProtobufLogReader.java:505)
>         at 
> org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.readNext(ProtobufLogReader.java:427)
>         at 
> org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:95)
>         at 
> org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:83)
>         at 
> org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.readNextEntryAndRecordReaderPosition(WALEntryStream.java:258)
>         at 
> org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.tryAdvanceEntry(WALEntryStream.java:172)
>         at 
> org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.hasNext(WALEntryStream.java:101)
>         at 
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.readWALEntries(ReplicationSourceWALReader.java:212)
>         at 
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.run(ReplicationSourceWALReader.java:148)
> 2024-07-19 19:08:32,912 WARN 
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader: 
> Failed to read stream of replication entries
> java.io.EOFException: EOF encountered at pos: 3439495 for key: 
> hbase/WALs/ccycloud-5.ozn-hbaserepl2.xyz,22101,1721293282050/ccycloud-5.ozn-hbaserepl2.xyz%2C22101%2C1721293282050.ccycloud-5.ozn-hbaserepl2.xyz%2C22101%2C1721293282050.regiongroup-0.1721415920990
>         at 
> org.apache.hadoop.hdds.scm.storage.MultipartInputStream.seek(MultipartInputStream.java:139)
>         at 
> org.apache.hadoop.fs.ozone.OzoneFSInputStream.seek(OzoneFSInputStream.java:99)
>         at 
> org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:70)
>         at 
> org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.seekOnFs(ProtobufLogReader.java:505)
>         at 
> org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.readNext(ProtobufLogReader.java:427)
>         at 
> org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:95)
>         at 
> org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:83)
>         at 
> org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.readNextEntryAndRecordReaderPosition(WALEntryStream.java:258)
>         at 
> org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.tryAdvanceEntry(WALEntryStream.java:172)
>         at 
> org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.hasNext(WALEntryStream.java:101)
>         at 
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.readWALEntries(ReplicationSourceWALReader.java:212)
>         at 
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.run(ReplicationSourceWALReader.java:148)
> 2024-07-19 19:08:35,180 WARN 
> org.apache.hadoop.hbase.client.AsyncRequestFutureImpl: id=2, 
> table=newtableloadtest2, attempt=6/5, failureCount=192ops, last 
> exception=org.apache.hadoop.hbase.regionserver.RegionServerAbortedException: 
> org.apache.hadoop.hbase.regionserver.RegionServerAbortedException: Server 
> ccycloud-1.ozn-hbaserepl2.xyz,22101,1721293283469 aborting
>         at 
> org.apache.hadoop.hbase.regionserver.RSRpcServices.checkOpen(RSRpcServices.java:1524)
>         at 
> org.apache.hadoop.hbase.regionserver.RSRpcServices.multi(RSRpcServices.java:2691)
>         at 
> org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:45961)
>         at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:387)
>         at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:139) 
> {code}
> cc: [~sammichen] [~weichiu] [~ashishk] 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org
For additional commands, e-mail: issues-h...@ozone.apache.org

Reply via email to