刘珍 created IOTDB-4966: ------------------------- Summary: Remove datanode to support graceful exit of processes Key: IOTDB-4966 URL: https://issues.apache.org/jira/browse/IOTDB-4966 Project: Apache IoTDB Issue Type: Improvement Components: mpp-cluster Reporter: 刘珍 Assignee: Gaofei Cao
缩容datanode,进程退出不够优雅,有一些报错(可加入处理计划,优先级可低): 2022-11-16 15:54:13,378 [pool-24-IoTDB-DataNodeInternalRPC-Processor-51] ERROR o.a.i.c.s.ThriftService:168 - IoTDB: close MPP Data exchange manager failed because: java.lang.InterruptedException: null at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1302) at java.util.concurrent.CountDownLatch.await(CountDownLatch.java:231) at org.apache.iotdb.commons.service.ThriftService.stopService(ThriftService.java:163) at org.apache.iotdb.commons.service.ThriftService.stop(ThriftService.java:74) at org.apache.iotdb.db.mpp.execution.exchange.MPPDataExchangeService.stop(MPPDataExchangeService.java:128) at org.apache.iotdb.commons.service.IService.waitAndStop(IService.java:36) at org.apache.iotdb.commons.service.RegisterManager.deregisterAll(RegisterManager.java:60) at org.apache.iotdb.db.service.DataNode.deactivate(DataNode.java:683) at org.apache.iotdb.db.service.DataNode.stop(DataNode.java:651) at org.apache.iotdb.db.service.thrift.impl.DataNodeInternalRPCServiceImpl.stopDataNode(DataNodeInternalRPCServiceImpl.java:1493) at org.apache.iotdb.mpp.rpc.thrift.IDataNodeRPCService$Processor$stopDataNode.getResult(IDataNodeRPCService.java:4082) at org.apache.iotdb.mpp.rpc.thrift.IDataNodeRPCService$Processor$stopDataNode.getResult(IDataNodeRPCService.java:4062) at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:38) at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:38) at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:248) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 2022-11-16 15:54:13,388 [pool-24-IoTDB-DataNodeInternalRPC-Processor-51] ERROR o.a.i.c.s.RegisterManager:63 - Failed to stop Storage Engine ServerService because: org.apache.iotdb.db.exception.runtime.StorageEngineFailureException: StorageEngine failed to stop because of Timed-Flush-Unseq-Memtable. at org.apache.iotdb.db.utils.ThreadUtils.stopThreadPool(ThreadUtils.java:47) at org.apache.iotdb.db.engine.StorageEngineV2.stop(StorageEngineV2.java:389) at org.apache.iotdb.commons.service.IService.waitAndStop(IService.java:36) at org.apache.iotdb.commons.service.RegisterManager.deregisterAll(RegisterManager.java:60) at org.apache.iotdb.db.service.DataNode.deactivate(DataNode.java:683) at org.apache.iotdb.db.service.DataNode.stop(DataNode.java:651) at org.apache.iotdb.db.service.thrift.impl.DataNodeInternalRPCServiceImpl.stopDataNode(DataNodeInternalRPCServiceImpl.java:1493) at org.apache.iotdb.mpp.rpc.thrift.IDataNodeRPCService$Processor$stopDataNode.getResult(IDataNodeRPCService.java:4082) at org.apache.iotdb.mpp.rpc.thrift.IDataNodeRPCService$Processor$stopDataNode.getResult(IDataNodeRPCService.java:4062) at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:38) at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:38) at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:248) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.InterruptedException: null at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2067) at java.util.concurrent.ThreadPoolExecutor.awaitTermination(ThreadPoolExecutor.java:1475) at java.util.concurrent.Executors$DelegatedExecutorService.awaitTermination(Executors.java:675) at org.apache.iotdb.commons.concurrent.threadpool.WrappedSingleThreadScheduledExecutor.awaitTermination(WrappedSingleThreadScheduledExecutor.java:99) at org.apache.iotdb.db.utils.ThreadUtils.stopThreadPool(ThreadUtils.java:40) ... 14 common frames omitted 2022-11-16 15:54:13,389 [pool-24-IoTDB-DataNodeInternalRPC-Processor-51] ERROR o.a.i.d.w.b.WALBuffer:521 - Fail to put CLOSE_SIGNAL to walEntries. java.lang.InterruptedException: null at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireInterruptibly(AbstractQueuedSynchronizer.java:1220) at java.util.concurrent.locks.ReentrantLock.lockInterruptibly(ReentrantLock.java:335) at java.util.concurrent.ArrayBlockingQueue.put(ArrayBlockingQueue.java:350) at org.apache.iotdb.db.wal.buffer.WALBuffer.close(WALBuffer.java:519) at org.apache.iotdb.db.wal.node.WALNode.close(WALNode.java:753) at org.apache.iotdb.db.wal.allocation.FirstCreateStrategy.clear(FirstCreateStrategy.java:117) at org.apache.iotdb.db.wal.WALManager.clear(WALManager.java:245) at org.apache.iotdb.db.wal.WALManager.stop(WALManager.java:220) at org.apache.iotdb.commons.service.IService.waitAndStop(IService.java:36) at org.apache.iotdb.commons.service.RegisterManager.deregisterAll(RegisterManager.java:60) at org.apache.iotdb.db.service.DataNode.deactivate(DataNode.java:683) at org.apache.iotdb.db.service.DataNode.stop(DataNode.java:651) at org.apache.iotdb.db.service.thrift.impl.DataNodeInternalRPCServiceImpl.stopDataNode(DataNodeInternalRPCServiceImpl.java:1493) at org.apache.iotdb.mpp.rpc.thrift.IDataNodeRPCService$Processor$stopDataNode.getResult(IDataNodeRPCService.java:4082) at org.apache.iotdb.mpp.rpc.thrift.IDataNodeRPCService$Processor$stopDataNode.getResult(IDataNodeRPCService.java:4062) at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:38) at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:38) at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:248) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 2022-11-16 15:54:33,345 [Thread-527] INFO o.a.i.d.s.t.i.DataNodeInternalRPCServiceImpl:1486 - Executing system.exit(0) in stopDataNode RPC method after 20 seconds -- This message was sent by Atlassian Jira (v8.20.10#820010)