[ https://issues.apache.org/jira/browse/HDDS-11220?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sammi Chen reassigned HDDS-11220: --------------------------------- Assignee: Sammi Chen > [HBase Replication] RS down due to "ManagedChannelOrphanWrapper: Previous > channel was not shutdown properly" > ------------------------------------------------------------------------------------------------------------ > > Key: HDDS-11220 > URL: https://issues.apache.org/jira/browse/HDDS-11220 > Project: Apache Ozone > Issue Type: Bug > Reporter: Pratyush Bhatt > Assignee: Sammi Chen > Priority: Major > > *Scenario:* Bidirectional HBase replication, with HBase on Ozone on both the > clusters. > Affected RS went down with > {_}org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper: > *~*~*~ Previous channel ManagedChannelImpl\{logId=167987, > target=10.140.176.6:9859} was not shutdown properly!!!{_}: > {code:java} > 2024-07-19 19:08:26,365 ERROR > org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper: > *~*~*~ Previous channel ManagedChannelImpl{logId=167987, > target=10.140.176.6:9859} was not shutdown properly!!! ~*~*~* > Make sure to call shutdown()/shutdownNow() and wait until > awaitTermination() returns true. > java.lang.RuntimeException: ManagedChannel allocation site > at > org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper$ManagedChannelReference.<init>(ManagedChannelOrphanWrapper.java:102) > at > org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:60) > at > org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:51) > at > org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelImplBuilder.build(ManagedChannelImplBuilder.java:631) > at > org.apache.ratis.thirdparty.io.grpc.internal.AbstractManagedChannelImplBuilder.build(AbstractManagedChannelImplBuilder.java:297) > at > org.apache.hadoop.hdds.scm.XceiverClientGrpc.connectToDatanode(XceiverClientGrpc.java:182) > at > org.apache.hadoop.hdds.scm.XceiverClientGrpc.connect(XceiverClientGrpc.java:161) > at > org.apache.hadoop.hdds.scm.XceiverClientManager$2.call(XceiverClientManager.java:256) > at > org.apache.hadoop.hdds.scm.XceiverClientManager$2.call(XceiverClientManager.java:237) > at > org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4938) > at > org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3576) > at > org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2318) > at > org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2191) > at > org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2081) > at > org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache.get(LocalCache.java:4019) > at > org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4933) > at > org.apache.hadoop.hdds.scm.XceiverClientManager.getClient(XceiverClientManager.java:237) > at > org.apache.hadoop.hdds.scm.XceiverClientManager.acquireClient(XceiverClientManager.java:184) > at > org.apache.hadoop.hdds.scm.XceiverClientManager.acquireClientForReadData(XceiverClientManager.java:161) > at > org.apache.hadoop.hdds.scm.storage.BlockInputStream.acquireClient(BlockInputStream.java:342) > at > org.apache.hadoop.hdds.scm.storage.BlockInputStream.getBlockData(BlockInputStream.java:258) > at > org.apache.hadoop.hdds.scm.storage.BlockInputStream.initialize(BlockInputStream.java:164) > at > org.apache.hadoop.hdds.scm.storage.BlockInputStream.readWithStrategy(BlockInputStream.java:370) > at > org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66) > at > org.apache.hadoop.hdds.scm.storage.ByteBufferReader.readFromBlock(ByteBufferReader.java:56) > at > org.apache.hadoop.hdds.scm.storage.MultipartInputStream.readWithStrategy(MultipartInputStream.java:96) > at > org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66) > at > org.apache.hadoop.fs.ozone.OzoneFSInputStream.readInTrace(OzoneFSInputStream.java:136) > at > org.apache.hadoop.fs.ozone.OzoneFSInputStream.lambda$read$0(OzoneFSInputStream.java:126) > at > org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:169) > at > org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:159) > at > org.apache.hadoop.fs.ozone.OzoneFSInputStream.read(OzoneFSInputStream.java:125) > at > org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:152) > at > org.apache.hadoop.hbase.io.util.BlockIOUtils.readFully(BlockIOUtils.java:78) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1481) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1699) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1528) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1322) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1242) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.readAndUpdateNewBlock(HFileReaderImpl.java:946) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.seekTo(HFileReaderImpl.java:935) > at > org.apache.hadoop.hbase.regionserver.StoreFileScanner.seekAtOrAfter(StoreFileScanner.java:311) > at > org.apache.hadoop.hbase.regionserver.StoreFileScanner.seek(StoreFileScanner.java:214) > at > org.apache.hadoop.hbase.regionserver.StoreScanner.seekScanners(StoreScanner.java:412) > at > org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:323) > at > org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:289) > at > org.apache.hadoop.hbase.regionserver.compactions.Compactor.createScanner(Compactor.java:530) > at > org.apache.hadoop.hbase.regionserver.compactions.Compactor$1.createScanner(Compactor.java:259) > at > org.apache.hadoop.hbase.regionserver.compactions.Compactor.compact(Compactor.java:346) > at > org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor.compact(DefaultCompactor.java:66) > at > org.apache.hadoop.hbase.regionserver.DefaultStoreEngine$DefaultCompactionContext.compact(DefaultStoreEngine.java:122) > at > org.apache.hadoop.hbase.regionserver.HStore.compact(HStore.java:1189) > at > org.apache.hadoop.hbase.regionserver.HRegion.compact(HRegion.java:2261) > at > org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.doCompaction(CompactSplit.java:625) > at > org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.run(CompactSplit.java:673) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > 2024-07-19 19:08:26,365 ERROR > org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper: > *~*~*~ Previous channel ManagedChannelImpl{logId=167995, > target=10.140.139.7:9859} was not shutdown properly!!! ~*~*~* > Make sure to call shutdown()/shutdownNow() and wait until > awaitTermination() returns true. > java.lang.RuntimeException: ManagedChannel allocation site > at > org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper$ManagedChannelReference.<init>(ManagedChannelOrphanWrapper.java:102) > at > org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:60) > at > org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:51) > at > org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelImplBuilder.build(ManagedChannelImplBuilder.java:631) > at > org.apache.ratis.thirdparty.io.grpc.internal.AbstractManagedChannelImplBuilder.build(AbstractManagedChannelImplBuilder.java:297) > at > org.apache.hadoop.hdds.scm.XceiverClientGrpc.connectToDatanode(XceiverClientGrpc.java:182) > at > org.apache.hadoop.hdds.scm.XceiverClientGrpc.reconnect(XceiverClientGrpc.java:618) > at > org.apache.hadoop.hdds.scm.XceiverClientGrpc.checkOpen(XceiverClientGrpc.java:609) > at > org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommandAsync(XceiverClientGrpc.java:539) > at > org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommandWithRetry(XceiverClientGrpc.java:415) > at > org.apache.hadoop.hdds.scm.XceiverClientGrpc.lambda$sendCommandWithTraceIDAndRetry$0(XceiverClientGrpc.java:352) > at > org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:169) > > at > org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:159) > at > org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommandWithTraceIDAndRetry(XceiverClientGrpc.java:344) > at > org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommand(XceiverClientGrpc.java:325) > at > org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.getBlock(ContainerProtocolCalls.java:235) > at > org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.lambda$getBlock$0(ContainerProtocolCalls.java:209) > at > org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.tryEachDatanode(ContainerProtocolCalls.java:158) > at > org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.getBlock(ContainerProtocolCalls.java:208) > at > org.apache.hadoop.hdds.scm.storage.BlockInputStream.getBlockDataUsingClient(BlockInputStream.java:288) > at > org.apache.hadoop.hdds.scm.storage.BlockInputStream.getBlockData(BlockInputStream.java:260) > at > org.apache.hadoop.hdds.scm.storage.BlockInputStream.initialize(BlockInputStream.java:164) > at > org.apache.hadoop.hdds.scm.storage.BlockInputStream.readWithStrategy(BlockInputStream.java:370) > at > org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66) > at > org.apache.hadoop.hdds.scm.storage.ByteBufferReader.readFromBlock(ByteBufferReader.java:56) > at > org.apache.hadoop.hdds.scm.storage.MultipartInputStream.readWithStrategy(MultipartInputStream.java:96) > at > org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66) > at > org.apache.hadoop.fs.ozone.OzoneFSInputStream.readInTrace(OzoneFSInputStream.java:136) > at > org.apache.hadoop.fs.ozone.OzoneFSInputStream.lambda$read$0(OzoneFSInputStream.java:126) > at > org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:169) > at > org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:159) > at > org.apache.hadoop.fs.ozone.OzoneFSInputStream.read(OzoneFSInputStream.java:125) > at > org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:152) > at > org.apache.hadoop.hbase.io.util.BlockIOUtils.readFully(BlockIOUtils.java:78) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1481) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1699) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1528) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlock(HFileBlock.java:1423) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlockWithBlockType(HFileBlock.java:1436) > at > org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:368) > at > org.apache.hadoop.hbase.regionserver.HStoreFile.open(HStoreFile.java:368) > at > org.apache.hadoop.hbase.regionserver.HStoreFile.initReader(HStoreFile.java:485) > at > org.apache.hadoop.hbase.regionserver.StoreEngine.createStoreFileAndReader(StoreEngine.java:224) > at > org.apache.hadoop.hbase.regionserver.StoreEngine.createStoreFileAndReader(StoreEngine.java:217) > at > org.apache.hadoop.hbase.regionserver.StoreEngine.validateStoreFile(StoreEngine.java:236) > at > org.apache.hadoop.hbase.regionserver.StoreEngine.commitStoreFiles(StoreEngine.java:422) > at > org.apache.hadoop.hbase.regionserver.HStore.doCompaction(HStore.java:1200) > at > org.apache.hadoop.hbase.regionserver.HStore.compact(HStore.java:1188) > at > org.apache.hadoop.hbase.regionserver.HRegion.compact(HRegion.java:2261) > at > org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.doCompaction(CompactSplit.java:625) > at > org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.run(CompactSplit.java:673) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > 2024-07-19 19:08:30,744 WARN > org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader: > Failed to read stream of replication entries{code} > And then Error logs with java.io.EOFException followed: > {code:java} > java.io.EOFException: EOF encountered at pos: 3439495 for key: > hbase/WALs/ccycloud-5.ozn-hbaserepl2.xyz,22101,1721293282050/ccycloud-5.ozn-hbaserepl2.xyz%2C22101%2C1721293282050.ccycloud-5.ozn-hbaserepl2.root.xyz%2C22101%2C1721293282050.regiongroup-0.1721415920990 > at > org.apache.hadoop.hdds.scm.storage.MultipartInputStream.seek(MultipartInputStream.java:139) > at > org.apache.hadoop.fs.ozone.OzoneFSInputStream.seek(OzoneFSInputStream.java:99) > at > org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:70) > at > org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.seekOnFs(ProtobufLogReader.java:505) > at > org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.readNext(ProtobufLogReader.java:427) > at > org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:95) > at > org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:83) > at > org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.readNextEntryAndRecordReaderPosition(WALEntryStream.java:258) > at > org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.tryAdvanceEntry(WALEntryStream.java:172) > at > org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.hasNext(WALEntryStream.java:101) > at > org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.readWALEntries(ReplicationSourceWALReader.java:212) > at > org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.run(ReplicationSourceWALReader.java:148) > 2024-07-19 19:08:32,912 WARN > org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader: > Failed to read stream of replication entries > java.io.EOFException: EOF encountered at pos: 3439495 for key: > hbase/WALs/ccycloud-5.ozn-hbaserepl2.xyz,22101,1721293282050/ccycloud-5.ozn-hbaserepl2.xyz%2C22101%2C1721293282050.ccycloud-5.ozn-hbaserepl2.xyz%2C22101%2C1721293282050.regiongroup-0.1721415920990 > at > org.apache.hadoop.hdds.scm.storage.MultipartInputStream.seek(MultipartInputStream.java:139) > at > org.apache.hadoop.fs.ozone.OzoneFSInputStream.seek(OzoneFSInputStream.java:99) > at > org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:70) > at > org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.seekOnFs(ProtobufLogReader.java:505) > at > org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.readNext(ProtobufLogReader.java:427) > at > org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:95) > at > org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:83) > at > org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.readNextEntryAndRecordReaderPosition(WALEntryStream.java:258) > at > org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.tryAdvanceEntry(WALEntryStream.java:172) > at > org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.hasNext(WALEntryStream.java:101) > at > org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.readWALEntries(ReplicationSourceWALReader.java:212) > at > org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.run(ReplicationSourceWALReader.java:148) > 2024-07-19 19:08:35,180 WARN > org.apache.hadoop.hbase.client.AsyncRequestFutureImpl: id=2, > table=newtableloadtest2, attempt=6/5, failureCount=192ops, last > exception=org.apache.hadoop.hbase.regionserver.RegionServerAbortedException: > org.apache.hadoop.hbase.regionserver.RegionServerAbortedException: Server > ccycloud-1.ozn-hbaserepl2.xyz,22101,1721293283469 aborting > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.checkOpen(RSRpcServices.java:1524) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.multi(RSRpcServices.java:2691) > at > org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:45961) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:387) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:139) > {code} > cc: [~sammichen] [~weichiu] [~ashishk] -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org For additional commands, e-mail: issues-h...@ozone.apache.org