This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 23e0ce72d2 HDDS-8223. SCM delete block service shoud run wait for
safemode to exit. (#4432)
23e0ce72d2 is described below
commit 23e0ce72d24e02f41b93d91b28bebc9bf061a3a3
Author: hao guo <[email protected]>
AuthorDate: Fri Mar 24 17:24:55 2023 +0800
HDDS-8223. SCM delete block service shoud run wait for safemode to exit.
(#4432)
---
.../hadoop/hdds/scm/block/BlockManagerImpl.java | 17 ++-------
.../hdds/scm/block/SCMBlockDeletingService.java | 42 ++++++++++++++++++----
.../hdds/scm/server/StorageContainerManager.java | 8 ++++-
3 files changed, 45 insertions(+), 22 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
index c4e5d1a0d1..70c98b5ac7 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
@@ -18,13 +18,11 @@ package org.apache.hadoop.hdds.scm.block;
import javax.management.ObjectName;
import java.io.IOException;
-import java.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
-import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.hadoop.hdds.client.BlockID;
@@ -33,7 +31,6 @@ import org.apache.hadoop.hdds.client.ReplicationConfig;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.conf.StorageUnit;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
-import org.apache.hadoop.hdds.scm.ScmConfig;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock;
import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
@@ -50,8 +47,6 @@ import org.apache.hadoop.util.StringUtils;
import static
org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes.INVALID_BLOCK_SIZE;
import static org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator.LOCAL_ID;
-import static
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_TIMEOUT;
-import static
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT;
import org.apache.ratis.protocol.exceptions.NotLeaderException;
import org.slf4j.Logger;
@@ -110,18 +105,12 @@ public class BlockManagerImpl implements BlockManager,
BlockmanagerMXBean {
scm.getScmContext(),
scm.getSequenceIdGen(),
metrics);
- Duration svcInterval = conf.getObject(
- ScmConfig.class).getBlockDeletionInterval();
- long serviceTimeout =
- conf.getTimeDuration(
- OZONE_BLOCK_DELETING_SERVICE_TIMEOUT,
- OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT,
- TimeUnit.MILLISECONDS);
+
blockDeletingService =
new SCMBlockDeletingService(deletedBlockLog,
scm.getScmNodeManager(), scm.getEventQueue(), scm.getScmContext(),
- scm.getSCMServiceManager(), svcInterval, serviceTimeout, conf,
- metrics);
+ scm.getSCMServiceManager(), conf,
+ metrics, scm.getSystemClock());
}
/**
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
index abb77abefd..0a1222e380 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
@@ -17,7 +17,7 @@
package org.apache.hadoop.hdds.scm.block;
import java.io.IOException;
-import java.time.Duration;
+import java.time.Clock;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
@@ -29,6 +29,7 @@ import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
+import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
@@ -56,6 +57,9 @@ import
org.apache.ratis.protocol.exceptions.NotLeaderException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_TIMEOUT;
+import static
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT;
+
/**
* A background service running in SCM to delete blocks. This service scans
* block deletion log in certain interval and caches block deletion commands
@@ -84,15 +88,29 @@ public class SCMBlockDeletingService extends
BackgroundService
private final Lock serviceLock = new ReentrantLock();
private ServiceStatus serviceStatus = ServiceStatus.PAUSING;
+ private long safemodeExitMillis = 0;
+ private final long safemodeExitRunDelayMillis;
+ private final Clock clock;
+
@SuppressWarnings("parameternumber")
public SCMBlockDeletingService(DeletedBlockLog deletedBlockLog,
NodeManager nodeManager, EventPublisher eventPublisher,
SCMContext scmContext, SCMServiceManager serviceManager,
- Duration interval, long serviceTimeout,
ConfigurationSource conf,
- ScmBlockDeletingServiceMetrics metrics) {
- super("SCMBlockDeletingService", interval.toMillis(),
TimeUnit.MILLISECONDS,
- BLOCK_DELETING_SERVICE_CORE_POOL_SIZE, serviceTimeout);
+ ScmBlockDeletingServiceMetrics metrics,
+ Clock clock) {
+ super("SCMBlockDeletingService",
+ conf.getObject(ScmConfig.class).getBlockDeletionInterval().toMillis(),
+ TimeUnit.MILLISECONDS, BLOCK_DELETING_SERVICE_CORE_POOL_SIZE,
+ conf.getTimeDuration(OZONE_BLOCK_DELETING_SERVICE_TIMEOUT,
+ OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT,
+ TimeUnit.MILLISECONDS));
+
+ this.safemodeExitRunDelayMillis = conf.getTimeDuration(
+ HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT,
+ HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT_DEFAULT,
+ TimeUnit.MILLISECONDS);
+ this.clock = clock;
this.deletedBlockLog = deletedBlockLog;
this.nodeManager = nodeManager;
this.eventPublisher = eventPublisher;
@@ -211,7 +229,9 @@ public class SCMBlockDeletingService extends
BackgroundService
public void notifyStatusChanged() {
serviceLock.lock();
try {
- if (scmContext.isLeaderReady()) {
+ if (scmContext.isLeaderReady() && !scmContext.isInSafeMode() &&
+ serviceStatus != ServiceStatus.RUNNING) {
+ safemodeExitMillis = clock.millis();
serviceStatus = ServiceStatus.RUNNING;
} else {
serviceStatus = ServiceStatus.PAUSING;
@@ -225,7 +245,15 @@ public class SCMBlockDeletingService extends
BackgroundService
public boolean shouldRun() {
serviceLock.lock();
try {
- return serviceStatus == ServiceStatus.RUNNING;
+ long alreadyWaitTimeInMillis = clock.millis() - safemodeExitMillis;
+ boolean run = serviceStatus == ServiceStatus.RUNNING &&
+ (alreadyWaitTimeInMillis >= safemodeExitRunDelayMillis);
+ LOG.debug(
+ "Check scm block delete run: {} serviceStatus: {} " +
+ "safemodeExitRunDelayMillis: {} alreadyWaitTimeInMillis: {}",
+ run, serviceStatus, safemodeExitRunDelayMillis,
+ alreadyWaitTimeInMillis);
+ return run;
} finally {
serviceLock.unlock();
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index 8cc0018252..ad29a1679e 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -303,6 +303,8 @@ public final class StorageContainerManager extends
ServiceRuntimeInfoImpl
OZONE_ADMINISTRATORS
);
+ private Clock systemClock;
+
/**
* Creates a new StorageContainerManager. Configuration will be
* updated with information on the actual listening addresses used
@@ -597,7 +599,7 @@ public final class StorageContainerManager extends
ServiceRuntimeInfoImpl
SCMConfigurator configurator) throws IOException {
// Use SystemClock when data is persisted
// and used again after system restarts.
- Clock systemClock = Clock.system(ZoneOffset.UTC);
+ systemClock = Clock.system(ZoneOffset.UTC);
if (configurator.getNetworkTopology() != null) {
clusterMap = configurator.getNetworkTopology();
@@ -906,6 +908,10 @@ public final class StorageContainerManager extends
ServiceRuntimeInfoImpl
scmCertificateClient = client;
}
+ public Clock getSystemClock() {
+ return systemClock;
+ }
+
private ContainerTokenSecretManager createContainerTokenSecretManager(
OzoneConfiguration conf) throws IOException {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]