This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch HDDS-14496-zdu
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/HDDS-14496-zdu by this push:
new c9fe669c994 HDDS-14672. Remove SCM finalization checkpoints (#9955)
c9fe669c994 is described below
commit c9fe669c99438423ade47409e96dcb2c393c837b
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Tue Mar 24 11:59:40 2026 +0000
HDDS-14672. Remove SCM finalization checkpoints (#9955)
---
.../org/apache/hadoop/hdds/scm/ha/SCMContext.java | 32 ---
.../apache/hadoop/hdds/scm/ha/SCMStateMachine.java | 1 -
.../hadoop/hdds/scm/node/SCMNodeManager.java | 15 +-
.../hdds/scm/server/StorageContainerManager.java | 1 -
.../scm/server/upgrade/FinalizationCheckpoint.java | 87 ------
.../scm/server/upgrade/FinalizationManager.java | 9 +-
.../server/upgrade/FinalizationManagerImpl.java | 44 ---
.../server/upgrade/FinalizationStateManager.java | 15 --
.../upgrade/FinalizationStateManagerImpl.java | 138 ++--------
.../scm/server/upgrade/SCMUpgradeFinalizer.java | 37 +--
.../hadoop/hdds/scm/node/TestNodeStateManager.java | 3 -
.../hadoop/hdds/scm/node/TestSCMNodeManager.java | 31 +--
.../hdds/scm/upgrade/TestScmFinalization.java | 299 ---------------------
.../hadoop/hdds/upgrade/TestHDDSUpgrade.java | 11 +-
.../hadoop/hdds/upgrade/TestHddsUpgradeUtils.java | 16 +-
.../TestScmDataDistributionFinalization.java | 19 +-
.../hadoop/hdds/upgrade/TestScmHAFinalization.java | 176 +-----------
17 files changed, 66 insertions(+), 868 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
index 1b371b7fa04..c603fda491e 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
@@ -24,7 +24,6 @@
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager.SafeModeStatus;
import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
-import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationCheckpoint;
import org.apache.ratis.protocol.exceptions.NotLeaderException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -64,16 +63,10 @@ public final class SCMContext {
private final OzoneStorageContainerManager scm;
private final ReadWriteLock lock = new ReentrantReadWriteLock();
- /**
- * Tracks the last crossed SCM upgrade finalization checkpoint.
- */
- private volatile FinalizationCheckpoint finalizationCheckpoint;
-
private SCMContext(Builder b) {
isLeader = b.isLeader;
term = b.term;
safeModeStatus = b.safeModeStatus;
- finalizationCheckpoint = b.finalizationCheckpoint;
scm = b.scm;
threadNamePrefix = b.threadNamePrefix;
}
@@ -128,15 +121,6 @@ public void setLeaderReady() {
}
}
- public void setFinalizationCheckpoint(FinalizationCheckpoint checkpoint) {
- lock.writeLock().lock();
- try {
- this.finalizationCheckpoint = checkpoint;
- } finally {
- lock.writeLock().unlock();
- }
- }
-
/**
* Check whether current SCM is leader or not.
*
@@ -246,15 +230,6 @@ public boolean isPreCheckComplete() {
}
}
- public FinalizationCheckpoint getFinalizationCheckpoint() {
- lock.readLock().lock();
- try {
- return this.finalizationCheckpoint;
- } finally {
- lock.readLock().unlock();
- }
- }
-
/**
* @return StorageContainerManager
*/
@@ -278,7 +253,6 @@ public static class Builder {
private long term = INVALID_TERM;
private SafeModeStatus safeModeStatus = SafeModeStatus.OUT_OF_SAFE_MODE;
private OzoneStorageContainerManager scm = null;
- private FinalizationCheckpoint finalizationCheckpoint =
FinalizationCheckpoint.FINALIZATION_COMPLETE;
private String threadNamePrefix = "";
public Builder setLeader(boolean leader) {
@@ -302,12 +276,6 @@ public Builder setSCM(
return this;
}
- public Builder setFinalizationCheckpoint(
- FinalizationCheckpoint checkpoint) {
- this.finalizationCheckpoint = checkpoint;
- return this;
- }
-
public SCMContext.Builder setThreadNamePrefix(String prefix) {
this.threadNamePrefix = prefix;
return this;
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
index d702bb2a5d4..1e99a49a403 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
@@ -391,7 +391,6 @@ public void notifyLeaderReady() {
// leader ready in SCMContext.
scm.getScmContext().setLeaderReady();
scm.getSCMServiceManager().notifyStatusChanged();
- scm.getFinalizationManager().onLeaderReady();
}
@Override
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
index fedcb5fb17d..7dd196f3ec1 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
@@ -760,19 +760,14 @@ protected void
sendFinalizeToDatanodeIfNeeded(DatanodeDetails datanodeDetails,
datanodeDetails.getHostName(), dnSlv, scmSlv);
}
- if (FinalizationManager.shouldTellDatanodesToFinalize(
- scmContext.getFinalizationCheckpoint())) {
- // Because we have crossed the MLV_EQUALS_SLV checkpoint, SCM metadata
- // layout version will not change. We can now compare it to the
- // datanodes' metadata layout versions to tell them to finalize.
+ if
(FinalizationManager.shouldTellDatanodesToFinalize(scmLayoutVersionManager)) {
+ // Because the finalizationManager / versionManager says finalization is
not needed
+ // it means any DN that is reporting a metadata layout version less than
the SCM's metadata layout version
+ // can be finalized.
int scmMlv = scmLayoutVersionManager.getMetadataLayoutVersion();
- // If the datanode mlv < scm mlv, it can not be allowed to be part of
- // any pipeline. However it can be allowed to join the cluster
if (dnMlv < scmMlv) {
- LOG.warn("Data node {} can not be used in any pipeline in the " +
- "cluster. " + "DataNode MetadataLayoutVersion = {}, SCM " +
- "MetadataLayoutVersion = {}",
+ LOG.warn("Data node {} has a MetadataLayoutVersion = {}, SCM
MetadataLayoutVersion = {}. Sending finalize",
datanodeDetails.getHostName(), dnMlv, scmMlv);
FinalizeNewLayoutVersionCommand finalizeCmd =
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index 3c4e0c7b171..543190140e9 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -730,7 +730,6 @@ private void initializeSystemManagers(OzoneConfiguration
conf,
.setSafeModeStatus(SCMSafeModeManager.SafeModeStatus.INITIAL)
.setSCM(this)
.setThreadNamePrefix(threadNamePrefix)
- .setFinalizationCheckpoint(finalizationManager.getCheckpoint())
.build();
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationCheckpoint.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationCheckpoint.java
deleted file mode 100644
index d1d422c2e0e..00000000000
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationCheckpoint.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hdds.scm.server.upgrade;
-
-import org.apache.hadoop.ozone.upgrade.UpgradeFinalization;
-
-/**
- * A finalization checkpoint is an abstraction over SCM's disk state,
- * indicating where finalization left off so it can be resumed on leader
- * change or restart. Currently the checkpoint is derived from two properties:
- * 1. The presence of a finalizing key in the database to indicate that
- * finalization is in progress.
- * 2. Whether SCM's metadata layout version is less than its software
- * layout version.
- */
-public enum FinalizationCheckpoint {
- FINALIZATION_REQUIRED(false, true,
- UpgradeFinalization.Status.FINALIZATION_REQUIRED),
- FINALIZATION_STARTED(true, true,
- UpgradeFinalization.Status.FINALIZATION_IN_PROGRESS),
- MLV_EQUALS_SLV(true, false,
- UpgradeFinalization.Status.FINALIZATION_IN_PROGRESS),
- FINALIZATION_COMPLETE(false, false,
- UpgradeFinalization.Status.FINALIZATION_DONE);
-
- private final boolean needsFinalizingMark;
- private final boolean needsMlvBehindSlv;
- // The upgrade status that should be reported back to the client when this
- // checkpoint is crossed.
- private final UpgradeFinalization.Status status;
-
- FinalizationCheckpoint(boolean needsFinalizingMark,
- boolean needsMlvBehindSlv,
- UpgradeFinalization.Status status) {
- this.needsFinalizingMark = needsFinalizingMark;
- this.needsMlvBehindSlv = needsMlvBehindSlv;
- this.status = status;
- }
-
- /**
- * Given external state, determines whether that corresponds to this
- * checkpoint.
- *
- * @param hasFinalizationMark true if finalization mark is present in the
- * DB.
- * @param hasMlvBehindSlv true if the metadata layout version is less
- * than the software layout version
- * @return true if the provided state corresponds to this checkpoint.
- * False otherwise.
- */
- public boolean isCurrent(boolean hasFinalizationMark,
- boolean hasMlvBehindSlv) {
- return hasFinalizationMark == needsFinalizingMark &&
- hasMlvBehindSlv == needsMlvBehindSlv;
- }
-
- public boolean needsFinalizingMark() {
- return needsFinalizingMark;
- }
-
- public boolean needsMlvBehindSlv() {
- return needsMlvBehindSlv;
- }
-
- public boolean hasCrossed(FinalizationCheckpoint query) {
- return this.compareTo(query) >= 0;
- }
-
- public UpgradeFinalization.Status getStatus() {
- return status;
- }
-}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationManager.java
index 27e26541eca..8bad970a86d 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationManager.java
@@ -42,18 +42,13 @@ UpgradeFinalization.StatusAndMessages
queryUpgradeFinalizationProgress(
BasicUpgradeFinalizer<SCMUpgradeFinalizationContext,
HDDSLayoutVersionManager>
getUpgradeFinalizer();
- FinalizationCheckpoint getCheckpoint();
-
void buildUpgradeContext(NodeManager nodeManager,
SCMContext scmContext);
void reinitialize(Table<String, String> finalizationStore) throws
IOException;
- void onLeaderReady();
-
- static boolean shouldTellDatanodesToFinalize(
- FinalizationCheckpoint checkpoint) {
- return checkpoint.hasCrossed(FinalizationCheckpoint.MLV_EQUALS_SLV);
+ static boolean shouldTellDatanodesToFinalize(HDDSLayoutVersionManager
versionManager) {
+ return !versionManager.needsFinalization();
}
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationManagerImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationManagerImpl.java
index 4de0abd7a09..d1870ff49c3 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationManagerImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationManagerImpl.java
@@ -18,12 +18,9 @@
package org.apache.hadoop.hdds.scm.server.upgrade;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.IOException;
import java.util.Collections;
import java.util.Objects;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ThreadFactory;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.ha.SCMHAManager;
import org.apache.hadoop.hdds.scm.node.NodeManager;
@@ -34,22 +31,16 @@
import org.apache.hadoop.ozone.upgrade.DefaultUpgradeFinalizationExecutor;
import org.apache.hadoop.ozone.upgrade.UpgradeFinalization;
import org.apache.hadoop.ozone.upgrade.UpgradeFinalizationExecutor;
-import org.apache.ratis.util.ExitUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
* Class to initiate SCM finalization and query its progress.
*/
public class FinalizationManagerImpl implements FinalizationManager {
- private static final Logger LOG = LoggerFactory
- .getLogger(FinalizationManagerImpl.class);
private SCMUpgradeFinalizer upgradeFinalizer;
private SCMUpgradeFinalizationContext context;
private SCMStorageConfig storage;
private final FinalizationStateManager finalizationStateManager;
- private ThreadFactory threadFactory;
/**
* For test classes to inject their own state manager.
@@ -88,11 +79,6 @@ public void buildUpgradeContext(NodeManager nodeManager,
.build();
finalizationStateManager.setUpgradeContext(this.context);
-
- String prefix = scmContext != null ? scmContext.threadNamePrefix() : "";
- this.threadFactory = new ThreadFactoryBuilder()
- .setNameFormat(prefix + "FinalizationManager-%d")
- .build();
}
@Override
@@ -121,42 +107,12 @@ HDDSLayoutVersionManager> getUpgradeFinalizer() {
return upgradeFinalizer;
}
- @Override
- public FinalizationCheckpoint getCheckpoint() {
- return finalizationStateManager.getFinalizationCheckpoint();
- }
-
@Override
public void reinitialize(Table<String, String> finalizationStore)
throws IOException {
finalizationStateManager.reinitialize(finalizationStore);
}
- @Override
- public void onLeaderReady() {
- // Launch a background thread to drive finalization.
- Executors.newSingleThreadExecutor(threadFactory).submit(() -> {
- FinalizationCheckpoint currentCheckpoint = getCheckpoint();
- if (currentCheckpoint.hasCrossed(
- FinalizationCheckpoint.FINALIZATION_STARTED) &&
- !currentCheckpoint.hasCrossed(
- FinalizationCheckpoint.FINALIZATION_COMPLETE)) {
- LOG.info("SCM became leader. Resuming upgrade finalization from" +
- " current checkpoint {}.", currentCheckpoint);
- try {
- finalizeUpgrade("resume-finalization-as-leader");
- } catch (IOException ex) {
- ExitUtils.terminate(1,
- "Resuming upgrade finalization failed on SCM leader change.",
- ex, true, LOG);
- }
- } else if (LOG.isDebugEnabled()) {
- LOG.debug("SCM became leader. No upgrade finalization action" +
- " required for current checkpoint {}", currentCheckpoint);
- }
- });
- }
-
/**
* Builds a {@link FinalizationManagerImpl}.
*/
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationStateManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationStateManager.java
index 84cb6389ea6..18329495c6e 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationStateManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationStateManager.java
@@ -26,25 +26,10 @@
*/
public interface FinalizationStateManager {
- @Replicate
- void addFinalizingMark() throws IOException;
-
- @Replicate
- void removeFinalizingMark() throws IOException;
-
@Replicate
void finalizeLayoutFeatures(Integer toLayoutVersion)
throws IOException;
- /**
- * @param query The checkpoint to check for being crossed.
- * @return true if SCM's disk state indicates this checkpoint has been
- * crossed. False otherwise.
- */
- boolean crossedCheckpoint(FinalizationCheckpoint query);
-
- FinalizationCheckpoint getFinalizationCheckpoint();
-
void setUpgradeContext(SCMUpgradeFinalizationContext context);
/**
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationStateManagerImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationStateManagerImpl.java
index 5ffa8062a5e..e062c1ab7ff 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationStateManagerImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/FinalizationStateManagerImpl.java
@@ -29,7 +29,8 @@
import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.OzoneConsts;
-import org.apache.ratis.util.ExitUtils;
+import org.apache.hadoop.ozone.upgrade.LayoutFeature;
+import org.apache.hadoop.ozone.upgrade.UpgradeFinalization;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -47,9 +48,6 @@ public class FinalizationStateManagerImpl implements
FinalizationStateManager {
// Ensures that we are not in the process of updating checkpoint state as
// we read it to determine the current checkpoint.
private final ReadWriteLock checkpointLock;
- // SCM transaction buffer flushes asynchronously, so we must keep the most
- // up-to-date DB information in memory as well for reads.
- private volatile boolean hasFinalizingMark;
private SCMUpgradeFinalizationContext upgradeContext;
private final SCMUpgradeFinalizer upgradeFinalizer;
@@ -59,50 +57,17 @@ protected FinalizationStateManagerImpl(Builder builder)
throws IOException {
this.upgradeFinalizer = builder.upgradeFinalizer;
this.versionManager = this.upgradeFinalizer.getVersionManager();
this.checkpointLock = new ReentrantReadWriteLock();
- initialize();
- }
-
- private void initialize() throws IOException {
- this.hasFinalizingMark =
- finalizationStore.isExist(OzoneConsts.FINALIZING_KEY);
- }
-
- private void publishCheckpoint(FinalizationCheckpoint checkpoint) {
- // Move the upgrade status according to this checkpoint. This is sent
- // back to the client if they query for the current upgrade status.
- versionManager.setUpgradeState(checkpoint.getStatus());
- // Set the checkpoint in the SCM context so other components can read it.
- upgradeContext.getSCMContext().setFinalizationCheckpoint(checkpoint);
}
@Override
public void setUpgradeContext(SCMUpgradeFinalizationContext context) {
this.upgradeContext = context;
- FinalizationCheckpoint checkpoint = getFinalizationCheckpoint();
- upgradeContext.getSCMContext().setFinalizationCheckpoint(checkpoint);
- // Set the version manager's upgrade status (sent back to the client to
- // identify upgrade progress) based on the current checkpoint.
- versionManager.setUpgradeState(checkpoint.getStatus());
- }
-
- @Override
- public void addFinalizingMark() throws IOException {
- checkpointLock.writeLock().lock();
- try {
- hasFinalizingMark = true;
- } finally {
- checkpointLock.writeLock().unlock();
- }
- transactionBuffer.addToBuffer(finalizationStore,
- OzoneConsts.FINALIZING_KEY, "");
- publishCheckpoint(FinalizationCheckpoint.FINALIZATION_STARTED);
}
@Override
public void finalizeLayoutFeatures(Integer toVersion) throws IOException {
- int startLayoutVersion = versionManager.getMetadataLayoutVersion() + 1;
- for (int version = startLayoutVersion; version <= toVersion; version++) {
- finalizeLayoutFeatureLocal(version);
+ for (LayoutFeature feature : versionManager.unfinalizedFeatures()) {
+ finalizeLayoutFeatureLocal((HDDSLayoutFeature) feature);
}
}
@@ -110,7 +75,7 @@ public void finalizeLayoutFeatures(Integer toVersion) throws
IOException {
* A version of finalizeLayoutFeature without the {@link Replicate}
* annotation that can be called by followers to finalize from a snapshot.
*/
- private void finalizeLayoutFeatureLocal(Integer layoutVersion)
+ private void finalizeLayoutFeatureLocal(HDDSLayoutFeature layoutFeature)
throws IOException {
checkpointLock.writeLock().lock();
try {
@@ -118,93 +83,25 @@ private void finalizeLayoutFeatureLocal(Integer
layoutVersion)
// version. This is updated in the replicated finalization steps.
// Layout version will be written to the DB as well so followers can
// finalize from a snapshot.
- if (versionManager.getMetadataLayoutVersion() >= layoutVersion) {
+ if (versionManager.getMetadataLayoutVersion() >=
layoutFeature.layoutVersion()) {
LOG.warn("Attempting to finalize layout feature for layout version {},
but " +
"current metadata layout version is {}. Skipping finalization for
this layout version.",
- layoutVersion, versionManager.getMetadataLayoutVersion());
+ layoutFeature.layoutVersion(),
versionManager.getMetadataLayoutVersion());
} else {
- HDDSLayoutFeature feature =
- (HDDSLayoutFeature) versionManager.getFeature(layoutVersion);
- upgradeFinalizer.replicatedFinalizationSteps(feature, upgradeContext);
+ upgradeFinalizer.replicatedFinalizationSteps(layoutFeature,
upgradeContext);
}
} finally {
checkpointLock.writeLock().unlock();
}
- if (!versionManager.needsFinalization()) {
- publishCheckpoint(FinalizationCheckpoint.MLV_EQUALS_SLV);
+ if (!versionManager.needsFinalization() &&
!upgradeContext.getSCMContext().isLeader()) {
+ // Only the followers complete finalize here, the leader must wait until
the DNs
+ // have finalized before making finalization done, otherwise a polling
client could
+ // be told it is complete too early.
+
versionManager.setUpgradeState(UpgradeFinalization.Status.FINALIZATION_DONE);
}
transactionBuffer.addToBuffer(finalizationStore,
- OzoneConsts.LAYOUT_VERSION_KEY, String.valueOf(layoutVersion));
- }
-
- @Override
- public void removeFinalizingMark() throws IOException {
- checkpointLock.writeLock().lock();
- try {
- hasFinalizingMark = false;
- } finally {
- checkpointLock.writeLock().unlock();
- }
- transactionBuffer.removeFromBuffer(finalizationStore,
- OzoneConsts.FINALIZING_KEY);
-
- // All prior checkpoints should have been crossed when this method is
- // called, leaving us at the finalization complete checkpoint.
- // If this is not the case, this SCM (leader or follower) has encountered
- // a bug leaving it in an inconsistent upgrade finalization state.
- // It should terminate to avoid further damage.
- FinalizationCheckpoint checkpoint = getFinalizationCheckpoint();
- if (checkpoint != FinalizationCheckpoint.FINALIZATION_COMPLETE) {
- String errorMessage = String.format("SCM upgrade finalization " +
- "is in an unknown state. Expected %s but was %s",
- FinalizationCheckpoint.FINALIZATION_COMPLETE, checkpoint);
- ExitUtils.terminate(1, errorMessage, LOG);
- }
-
- publishCheckpoint(FinalizationCheckpoint.FINALIZATION_COMPLETE);
- }
-
- @Override
- public boolean crossedCheckpoint(FinalizationCheckpoint query) {
- return getFinalizationCheckpoint().hasCrossed(query);
- }
-
- @Override
- public FinalizationCheckpoint getFinalizationCheckpoint() {
- // Get a point-in-time snapshot of the finalization state under the lock,
- // then use this to determine which checkpoint we were on at that time.
- boolean mlvBehindSlvSnapshot;
- boolean hasFinalizingMarkSnapshot;
- checkpointLock.readLock().lock();
- try {
- mlvBehindSlvSnapshot = versionManager.needsFinalization();
- hasFinalizingMarkSnapshot = hasFinalizingMark;
- } finally {
- checkpointLock.readLock().unlock();
- }
-
- FinalizationCheckpoint currentCheckpoint = null;
- for (FinalizationCheckpoint checkpoint: FinalizationCheckpoint.values()) {
- if (checkpoint.isCurrent(hasFinalizingMarkSnapshot,
- mlvBehindSlvSnapshot)) {
- currentCheckpoint = checkpoint;
- break;
- }
- }
-
- // SCM cannot function if it does not know which finalization checkpoint
- // it is on, so it must terminate. This should only happen in the case of
- // a serious bug.
- if (currentCheckpoint == null) {
- String errorMessage = String.format("SCM upgrade finalization " +
- "is in an unknown state.%nFinalizing mark present? %b%n" +
- "Metadata layout version behind software layout version? %b",
- hasFinalizingMarkSnapshot, mlvBehindSlvSnapshot);
- ExitUtils.terminate(1, errorMessage, LOG);
- }
-
- return currentCheckpoint;
+ OzoneConsts.LAYOUT_VERSION_KEY,
String.valueOf(layoutFeature.layoutVersion()));
}
/**
@@ -216,7 +113,6 @@ public void reinitialize(Table<String, String>
newFinalizationStore)
checkpointLock.writeLock().lock();
try {
this.finalizationStore = newFinalizationStore;
- initialize();
int dbLayoutVersion = getDBLayoutVersion();
int currentLayoutVersion = versionManager.getMetadataLayoutVersion();
@@ -232,12 +128,8 @@ public void reinitialize(Table<String, String>
newFinalizationStore)
// that the UpgradeFinalizationExecutor contains. Just run the
// upgrade actions for the layout features, set the finalization
// checkpoint, and increase the version in the VERSION file.
- for (int version = currentLayoutVersion + 1; version <=
dbLayoutVersion;
- version++) {
- finalizeLayoutFeatureLocal(version);
- }
+ finalizeLayoutFeatures(dbLayoutVersion);
}
- publishCheckpoint(getFinalizationCheckpoint());
} catch (Exception ex) {
LOG.error("Failed to reinitialize finalization state", ex);
throw new IOException(ex);
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java
index 773ade68a2f..8c74506ffd2 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/upgrade/SCMUpgradeFinalizer.java
@@ -17,6 +17,8 @@
package org.apache.hadoop.hdds.scm.server.upgrade;
+import static
org.apache.hadoop.ozone.upgrade.UpgradeFinalization.Status.FINALIZATION_DONE;
+
import java.io.IOException;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto;
@@ -43,31 +45,11 @@ public class SCMUpgradeFinalizer extends
BasicUpgradeFinalizer<SCMUpgradeFinalizationContext,
HDDSLayoutVersionManager> {
- public SCMUpgradeFinalizer(HDDSLayoutVersionManager versionManager) {
- super(versionManager);
- }
-
public SCMUpgradeFinalizer(HDDSLayoutVersionManager versionManager,
UpgradeFinalizationExecutor<SCMUpgradeFinalizationContext> executor) {
super(versionManager, executor);
}
- private void logCheckpointCrossed(FinalizationCheckpoint checkpoint) {
- LOG.info("SCM Finalization has crossed checkpoint {}", checkpoint);
- }
-
- @Override
- public void preFinalizeUpgrade(SCMUpgradeFinalizationContext context)
- throws IOException {
- FinalizationStateManager stateManager =
- context.getFinalizationStateManager();
- if (!stateManager.crossedCheckpoint(
- FinalizationCheckpoint.FINALIZATION_STARTED)) {
- context.getFinalizationStateManager().addFinalizingMark();
- }
- logCheckpointCrossed(FinalizationCheckpoint.FINALIZATION_STARTED);
- }
-
@Override
public void finalizeLayoutFeature(LayoutFeature lf,
SCMUpgradeFinalizationContext context) throws UpgradeException {
@@ -111,18 +93,9 @@ void replicatedFinalizationSteps(HDDSLayoutFeature lf,
}
@Override
- public void postFinalizeUpgrade(SCMUpgradeFinalizationContext context)
- throws IOException {
- // If we reached this phase of finalization, all layout features should
- // be finalized.
- logCheckpointCrossed(FinalizationCheckpoint.MLV_EQUALS_SLV);
- FinalizationStateManager stateManager =
- context.getFinalizationStateManager();
- if (!stateManager.crossedCheckpoint(
- FinalizationCheckpoint.FINALIZATION_COMPLETE)) {
- waitForDatanodesToFinalize(context);
- stateManager.removeFinalizingMark();
- }
+ public void postFinalizeUpgrade(SCMUpgradeFinalizationContext context)
throws IOException {
+ waitForDatanodesToFinalize(context);
+ getVersionManager().setUpgradeState(FINALIZATION_DONE);
}
/**
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java
index 65f00730230..db0425fed74 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java
@@ -38,7 +38,6 @@
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.node.states.NodeAlreadyExistsException;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
-import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationCheckpoint;
import org.apache.hadoop.hdds.server.events.Event;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdds.utils.HddsServerUtil;
@@ -78,8 +77,6 @@ public char[] getPassword(String key) throws IOException {
// Make NodeStateManager behave as if SCM has completed finalization,
// unless a test changes the value of this variable.
SCMContext scmContext = SCMContext.emptyContext();
- scmContext.setFinalizationCheckpoint(
- FinalizationCheckpoint.FINALIZATION_COMPLETE);
eventPublisher = new MockEventPublisher();
nsm = new NodeStateManager(conf, eventPublisher, scmContext);
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
index 0e6a1ff4142..a93fbd4aa99 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
@@ -97,7 +97,6 @@
import
org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.NodeReportFromDatanode;
import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
-import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationCheckpoint;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager;
@@ -120,7 +119,6 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
-import org.junit.jupiter.params.provider.EnumSource;
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.ArgumentCaptor;
@@ -732,17 +730,15 @@ void testScmHandleJvmPause() throws Exception {
}
}
- @ParameterizedTest
- @EnumSource(FinalizationCheckpoint.class)
- public void testProcessLayoutVersion(FinalizationCheckpoint checkpoint)
throws IOException {
- LOG.info("Testing with SCM finalization checkpoint {}", checkpoint);
- testProcessLayoutVersionLowerMlv(checkpoint);
- testProcessLayoutVersionReportHigherMlv(checkpoint);
+ @Test
+ public void testProcessLayoutVersion() throws IOException {
+ testProcessLayoutVersionLowerMlv(true);
+ testProcessLayoutVersionLowerMlv(false);
+ testProcessLayoutVersionReportHigherMlv();
}
// Currently invoked by testProcessLayoutVersion.
- public void testProcessLayoutVersionReportHigherMlv(
- FinalizationCheckpoint currentCheckpoint)
+ public void testProcessLayoutVersionReportHigherMlv()
throws IOException {
final int healthCheckInterval = 200; // milliseconds
final int heartbeatInterval = 1; // seconds
@@ -759,7 +755,6 @@ public void testProcessLayoutVersionReportHigherMlv(
HDDSLayoutVersionManager lvm =
new HDDSLayoutVersionManager(scmStorageConfig.getLayoutVersion());
SCMContext nodeManagerContext = SCMContext.emptyContext();
- nodeManagerContext.setFinalizationCheckpoint(currentCheckpoint);
SCMNodeManager nodeManager = new SCMNodeManager(conf,
scmStorageConfig, eventPublisher, new NetworkTopologyImpl(conf),
nodeManagerContext, lvm);
@@ -784,16 +779,18 @@ scmStorageConfig, eventPublisher, new
NetworkTopologyImpl(conf),
}
// Currently invoked by testProcessLayoutVersion.
- public void testProcessLayoutVersionLowerMlv(FinalizationCheckpoint
- currentCheckpoint) throws IOException {
+ public void testProcessLayoutVersionLowerMlv(boolean mvlLessThanSlv) throws
IOException {
OzoneConfiguration conf = new OzoneConfiguration();
SCMStorageConfig scmStorageConfig = mock(SCMStorageConfig.class);
when(scmStorageConfig.getClusterID()).thenReturn("xyz111");
EventPublisher eventPublisher = mock(EventPublisher.class);
- HDDSLayoutVersionManager lvm =
- new HDDSLayoutVersionManager(scmStorageConfig.getLayoutVersion());
+ int currentVersion = HDDSLayoutVersionManager.maxLayoutVersion();
+ if (mvlLessThanSlv) {
+ currentVersion -= 1;
+ }
+ HDDSLayoutVersionManager lvm = new
HDDSLayoutVersionManager(currentVersion);
+
SCMContext nodeManagerContext = SCMContext.emptyContext();
- nodeManagerContext.setFinalizationCheckpoint(currentCheckpoint);
SCMNodeManager nodeManager = new SCMNodeManager(conf,
scmStorageConfig, eventPublisher, new NetworkTopologyImpl(conf),
nodeManagerContext, lvm);
@@ -811,7 +808,7 @@ scmStorageConfig, eventPublisher, new
NetworkTopologyImpl(conf),
ArgumentCaptor<CommandForDatanode> captor =
ArgumentCaptor.forClass(CommandForDatanode.class);
- if (currentCheckpoint.hasCrossed(FinalizationCheckpoint.MLV_EQUALS_SLV)) {
+ if (!lvm.needsFinalization()) {
// If the mlv equals slv checkpoint passed, datanodes with older mlvs
// should be instructed to finalize.
verify(eventPublisher, times(1))
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmFinalization.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmFinalization.java
deleted file mode 100644
index 9364b9ff37c..00000000000
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmFinalization.java
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hdds.scm.upgrade;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.mockito.Mockito.eq;
-import static org.mockito.Mockito.inOrder;
-import static org.mockito.Mockito.matches;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.never;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.when;
-
-import java.util.UUID;
-import org.apache.hadoop.hdds.scm.ha.SCMContext;
-import org.apache.hadoop.hdds.scm.ha.SCMHAManager;
-import org.apache.hadoop.hdds.scm.ha.SCMHAManagerStub;
-import org.apache.hadoop.hdds.scm.ha.SCMRatisServer;
-import org.apache.hadoop.hdds.scm.metadata.DBTransactionBuffer;
-import org.apache.hadoop.hdds.scm.node.NodeManager;
-import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
-import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationCheckpoint;
-import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationManager;
-import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationStateManager;
-import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizationContext;
-import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizer;
-import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature;
-import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager;
-import org.apache.hadoop.hdds.utils.db.Table;
-import org.apache.hadoop.ozone.OzoneConsts;
-import org.apache.hadoop.ozone.upgrade.UpgradeFinalization;
-import org.apache.hadoop.ozone.upgrade.UpgradeFinalization.StatusAndMessages;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.EnumSource;
-import org.mockito.InOrder;
-import org.mockito.verification.VerificationMode;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Tests SCM finalization operations on mocked upgrade state.
- */
-public class TestScmFinalization {
- private static final Logger LOG =
- LoggerFactory.getLogger(TestScmFinalization.class);
-
- /**
- * Order of finalization checkpoints within the enum is used to determine
- * which ones have been passed. If ordering within the enum is changed
- * finalization will not behave correctly.
- */
- @Test
- public void testCheckpointOrder() {
- FinalizationCheckpoint[] checkpoints = FinalizationCheckpoint.values();
- assertEquals(4, checkpoints.length);
- assertEquals(checkpoints[0],
- FinalizationCheckpoint.FINALIZATION_REQUIRED);
- assertEquals(checkpoints[1],
- FinalizationCheckpoint.FINALIZATION_STARTED);
- assertEquals(checkpoints[2],
- FinalizationCheckpoint.MLV_EQUALS_SLV);
- assertEquals(checkpoints[3],
- FinalizationCheckpoint.FINALIZATION_COMPLETE);
- }
-
- /**
- * Tests that the correct checkpoint is returned based on the value of
- * SCM's layout version and the presence of the finalizing key.
- */
- @Test
- public void testUpgradeStateToCheckpointMapping() throws Exception {
- HDDSLayoutVersionManager versionManager =
- new HDDSLayoutVersionManager(
- HDDSLayoutFeature.INITIAL_VERSION.layoutVersion());
- // State manager keeps upgrade information in memory as well as writing
- // it to disk, so we can mock the classes that handle disk ops for this
- // test.
- FinalizationStateManager stateManager =
- new FinalizationStateManagerTestImpl.Builder()
- .setFinalizationStore(mock(Table.class))
- .setRatisServer(mock(SCMRatisServer.class))
- .setTransactionBuffer(mock(DBTransactionBuffer.class))
- .setUpgradeFinalizer(new SCMUpgradeFinalizer(versionManager))
- .build();
-
- // In the actual flow, this would be handled by the FinalizationManager.
- SCMContext scmContext = SCMContext.emptyContext();
- scmContext.setFinalizationCheckpoint(
- stateManager.getFinalizationCheckpoint());
- SCMUpgradeFinalizationContext context =
- new SCMUpgradeFinalizationContext.Builder()
- .setFinalizationStateManager(stateManager)
- .setStorage(mock(SCMStorageConfig.class))
- .setSCMContext(scmContext)
- .setNodeManager(mock(NodeManager.class))
- .build();
- stateManager.setUpgradeContext(context);
-
- assertCurrentCheckpoint(scmContext, stateManager,
FinalizationCheckpoint.FINALIZATION_REQUIRED);
- stateManager.addFinalizingMark();
- assertCurrentCheckpoint(scmContext, stateManager,
FinalizationCheckpoint.FINALIZATION_STARTED);
-
- HDDSLayoutFeature[] finalizationFeatures = HDDSLayoutFeature.values();
- HDDSLayoutFeature finalVersion =
finalizationFeatures[finalizationFeatures.length - 1];
- assertCurrentCheckpoint(scmContext, stateManager,
FinalizationCheckpoint.FINALIZATION_STARTED);
- stateManager.finalizeLayoutFeatures(finalVersion.layoutVersion());
- assertCurrentCheckpoint(scmContext, stateManager,
FinalizationCheckpoint.MLV_EQUALS_SLV);
- stateManager.removeFinalizingMark();
- assertCurrentCheckpoint(scmContext, stateManager,
FinalizationCheckpoint.FINALIZATION_COMPLETE);
- }
-
- private void assertCurrentCheckpoint(SCMContext context,
- FinalizationStateManager stateManager,
- FinalizationCheckpoint expectedCheckpoint) {
-
- // SCM context should have been updated with the current checkpoint.
- assertTrue(context.getFinalizationCheckpoint()
- .hasCrossed(expectedCheckpoint));
- for (FinalizationCheckpoint checkpoint: FinalizationCheckpoint.values()) {
- LOG.info("Comparing expected checkpoint {} to {}", expectedCheckpoint,
- checkpoint);
- if (expectedCheckpoint.compareTo(checkpoint) >= 0) {
- // If the expected current checkpoint is >= this checkpoint,
- // then this checkpoint should be crossed according to the state
- // manager.
- assertTrue(stateManager.crossedCheckpoint(checkpoint));
- } else {
- // Else if the expected current checkpoint is < this
- // checkpoint, then this checkpoint should not be crossed according to
- // the state manager.
- assertFalse(stateManager.crossedCheckpoint(checkpoint));
- }
- }
- }
-
- /**
- * Tests resuming finalization after a failure or leader change, where the
- * disk state will indicate which finalization checkpoint (and therefore
- * set of steps) the SCM must resume from.
- */
- @ParameterizedTest
- @EnumSource(FinalizationCheckpoint.class)
- public void testResumeFinalizationFromCheckpoint(
- FinalizationCheckpoint initialCheckpoint) throws Exception {
- LOG.info("Testing finalization beginning at checkpoint {}",
- initialCheckpoint);
-
- // Create the table and version manager to appear as if we left off from in
- // progress finalization.
- Table<String, String> finalizationStore =
- getMockTableFromCheckpoint(initialCheckpoint);
- HDDSLayoutVersionManager versionManager =
- getMockVersionManagerFromCheckpoint(initialCheckpoint);
- SCMHAManager haManager = mock(SCMHAManager.class);
- DBTransactionBuffer buffer = mock(DBTransactionBuffer.class);
- when(haManager.getDBTransactionBuffer()).thenReturn(buffer);
- NodeManager nodeManager = mock(NodeManager.class);
- SCMStorageConfig storage = mock(SCMStorageConfig.class);
- SCMContext scmContext = SCMContext.emptyContext();
- scmContext.setFinalizationCheckpoint(initialCheckpoint);
-
- FinalizationStateManager stateManager =
- new FinalizationStateManagerTestImpl.Builder()
- .setFinalizationStore(finalizationStore)
- .setRatisServer(mock(SCMRatisServer.class))
- .setTransactionBuffer(buffer)
- .setUpgradeFinalizer(new SCMUpgradeFinalizer(versionManager))
- .build();
-
- FinalizationManager manager = new FinalizationManagerTestImpl.Builder()
- .setFinalizationStateManager(stateManager)
- .setLayoutVersionManager(versionManager)
- .setStorage(storage)
- .setHAManager(SCMHAManagerStub.getInstance(true))
- .setFinalizationStore(finalizationStore)
- .build();
-
- manager.buildUpgradeContext(nodeManager, scmContext);
-
- // Execute upgrade finalization, then check that events happened in the
- // correct order.
- StatusAndMessages status =
- manager.finalizeUpgrade(UUID.randomUUID().toString());
- assertEquals(getStatusFromCheckpoint(initialCheckpoint).status(),
- status.status());
-
- InOrder inOrder = inOrder(buffer, nodeManager, storage);
-
- // Once the initial checkpoint's operations are crossed, this count will
- // be increased to 1 to indicate where finalization should have resumed
- // from.
- VerificationMode count = never();
- if (initialCheckpoint == FinalizationCheckpoint.FINALIZATION_REQUIRED) {
- count = times(1);
- }
-
- // First, SCM should mark that it is beginning finalization.
- inOrder.verify(buffer, count).addToBuffer(
- eq(finalizationStore),
- matches(OzoneConsts.FINALIZING_KEY),
- matches(""));
-
- if (initialCheckpoint == FinalizationCheckpoint.FINALIZATION_STARTED) {
- count = times(1);
- }
-
- // Next, each layout feature should be finalized.
- for (HDDSLayoutFeature feature: HDDSLayoutFeature.values()) {
- // Cannot finalize initial version since we are already there.
- if (!feature.equals(HDDSLayoutFeature.INITIAL_VERSION)) {
- inOrder.verify(storage, count)
- .setLayoutVersion(feature.layoutVersion());
- inOrder.verify(storage, count).persistCurrentState();
- inOrder.verify(buffer, count).addToBuffer(
- eq(finalizationStore),
- matches(OzoneConsts.LAYOUT_VERSION_KEY),
- eq(String.valueOf(feature.layoutVersion())));
- }
- }
-
- if (initialCheckpoint == FinalizationCheckpoint.MLV_EQUALS_SLV) {
- count = times(1);
- }
-
- // Last, the finalizing mark is removed to indicate finalization is
- // complete.
- inOrder.verify(buffer, count).removeFromBuffer(
- eq(finalizationStore),
- matches(OzoneConsts.FINALIZING_KEY));
-
- // If the initial checkpoint was FINALIZATION_COMPLETE, no mocks should
- // have been invoked.
- }
-
- /**
- * On startup, the finalization table will be read to determine the
- * checkpoint we are resuming from. After this, the results will be stored
- * in memory and flushed to the table asynchronously by the buffer, so the
- * mock table can continue to return the initial values since the in memory
- * values will be used after the initial table read on start.
- *
- * Layout version stored in the table is only used for ratis snapshot
- * finalization, which is not covered in this test.
- */
- private Table<String, String> getMockTableFromCheckpoint(
- FinalizationCheckpoint initialCheckpoint) throws Exception {
- Table<String, String> finalizationStore = mock(Table.class);
- when(finalizationStore
- .isExist(eq(OzoneConsts.FINALIZING_KEY)))
- .thenReturn(initialCheckpoint.needsFinalizingMark());
- return finalizationStore;
- }
-
- /**
- * On startup, components will read their version file to get their current
- * layout version and initialize the version manager with that. Simulate
- * that here.
- */
- private HDDSLayoutVersionManager getMockVersionManagerFromCheckpoint(
- FinalizationCheckpoint initialCheckpoint) throws Exception {
- int layoutVersion = HDDSLayoutVersionManager.maxLayoutVersion();
- if (initialCheckpoint.needsMlvBehindSlv()) {
- layoutVersion = HDDSLayoutFeature.INITIAL_VERSION.layoutVersion();
- }
- return new HDDSLayoutVersionManager(layoutVersion);
- }
-
- /**
- * Returns the expected status when finalization is invoked from the
- * provided checkpoint.
- */
- private StatusAndMessages getStatusFromCheckpoint(
- FinalizationCheckpoint initialCheckpoint) {
- if (initialCheckpoint == FinalizationCheckpoint.FINALIZATION_COMPLETE) {
- return UpgradeFinalization.FINALIZED_MSG;
- } else {
- return UpgradeFinalization.STARTING_MSG;
- }
- }
-
-}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestHDDSUpgrade.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestHDDSUpgrade.java
index fe9eeee386c..2e955a6b87b 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestHDDSUpgrade.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestHDDSUpgrade.java
@@ -21,7 +21,6 @@
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL;
import static
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL;
-import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.CLOSED;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT;
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL;
@@ -308,8 +307,7 @@ public void
testFinalizationFromInitialVersionToLatestVersion()
// Verify Post-Upgrade conditions on the SCM.
TestHddsUpgradeUtils.testPostUpgradeConditionsSCM(
- cluster.getStorageContainerManagersList(),
- NUM_CONTAINERS_CREATED, NUM_DATA_NODES);
+ cluster.getStorageContainerManagersList(), NUM_CONTAINERS_CREATED);
TestHddsUpgradeUtils.testDataNodesStateOnSCM(cluster.getStorageContainerManagersList(),
NUM_DATA_NODES, HEALTHY);
@@ -317,7 +315,7 @@ public void
testFinalizationFromInitialVersionToLatestVersion()
// In the happy path case, no containers should have been quasi closed as
// a result of the upgrade.
TestHddsUpgradeUtils.testPostUpgradeConditionsDataNodes(
- cluster.getHddsDatanodes(), NUM_CONTAINERS_CREATED, CLOSED);
+ cluster.getHddsDatanodes(), NUM_CONTAINERS_CREATED);
// Test that we can use a pipeline after upgrade.
// Will fail with exception if there are no pipelines.
@@ -861,11 +859,8 @@ public void testFinalizationWithFailureInjectionHelper(
// Verify Post-Upgrade conditions on the SCM.
// With failure injection
TestHddsUpgradeUtils.testPostUpgradeConditionsSCM(
- cluster.getStorageContainerManagersList(), NUM_CONTAINERS_CREATED,
- NUM_DATA_NODES);
+ cluster.getStorageContainerManagersList(), NUM_CONTAINERS_CREATED);
- // All datanodes on the SCM should have moved to HEALTHY-READONLY state.
- // Due to timing constraint also allow a "HEALTHY" state.
loadSCMState();
TestHddsUpgradeUtils.testDataNodesStateOnSCM(
cluster.getStorageContainerManagersList(), NUM_DATA_NODES, HEALTHY);
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestHddsUpgradeUtils.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestHddsUpgradeUtils.java
index 12fde928024..ec319ea07c5 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestHddsUpgradeUtils.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestHddsUpgradeUtils.java
@@ -22,20 +22,17 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertSame;
-import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.TimeoutException;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
-import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
-import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationCheckpoint;
import org.apache.hadoop.ozone.HddsDatanodeService;
import
org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine;
import org.apache.hadoop.ozone.upgrade.UpgradeFinalization;
@@ -85,20 +82,16 @@ public static void testPreUpgradeConditionsSCM(
* Helper function to test Post-Upgrade conditions on the SCM
*/
public static void testPostUpgradeConditionsSCM(
- List<StorageContainerManager> scms, int numContainers, int numDatanodes)
{
+ List<StorageContainerManager> scms, int numContainers) {
for (StorageContainerManager scm : scms) {
LOG.info("Testing post upgrade conditions on SCM with node ID: {}",
scm.getSCMNodeId());
- testPostUpgradeConditionsSCM(scm, numContainers, numDatanodes);
+ testPostUpgradeConditionsSCM(scm, numContainers);
}
}
public static void testPostUpgradeConditionsSCM(StorageContainerManager scm,
- int numContainers, int
numDatanodes) {
-
- assertTrue(scm.getScmContext().getFinalizationCheckpoint()
- .hasCrossed(FinalizationCheckpoint.FINALIZATION_COMPLETE));
-
+ int numContainers) {
HDDSLayoutVersionManager scmVersionManager = scm.getLayoutVersionManager();
assertEquals(scmVersionManager.getSoftwareLayoutVersion(),
scmVersionManager.getMetadataLayoutVersion());
@@ -132,8 +125,7 @@ public static void testPreUpgradeConditionsDataNodes(
* Helper function to test Post-Upgrade conditions on all the DataNodes.
*/
public static void testPostUpgradeConditionsDataNodes(
- List<HddsDatanodeService> datanodes, int numContainers,
- ContainerProtos.ContainerDataProto.State... validClosedContainerStates) {
+ List<HddsDatanodeService> datanodes, int numContainers) {
try {
GenericTestUtils.waitFor(() -> {
for (HddsDatanodeService dataNode : datanodes) {
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmDataDistributionFinalization.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmDataDistributionFinalization.java
index f98685f0ff7..b09217facc2 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmDataDistributionFinalization.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmDataDistributionFinalization.java
@@ -26,7 +26,6 @@
import static
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT;
import static org.apache.hadoop.hdds.client.ReplicationFactor.THREE;
import static org.apache.hadoop.hdds.client.ReplicationType.RATIS;
-import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.CLOSED;
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL;
import static
org.apache.hadoop.hdds.scm.block.SCMDeletedBlockTransactionStatusManager.EMPTY_SUMMARY;
import static
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL;
@@ -62,7 +61,6 @@
import org.apache.hadoop.hdds.scm.server.SCMConfigurator;
import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
-import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationCheckpoint;
import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizationContext;
import org.apache.hadoop.hdds.utils.db.CodecException;
import org.apache.hadoop.hdds.utils.db.RocksDatabaseException;
@@ -202,9 +200,9 @@ public void testFinalizationEmptyClusterDataDistribution()
throws Exception {
cluster.getStorageContainerManager().getLayoutVersionManager().getMetadataLayoutVersion());
TestHddsUpgradeUtils.testPostUpgradeConditionsSCM(
- cluster.getStorageContainerManagersList(), 0, NUM_DATANODES);
+ cluster.getStorageContainerManagersList(), 0);
TestHddsUpgradeUtils.testPostUpgradeConditionsDataNodes(
- cluster.getHddsDatanodes(), 0, CLOSED);
+ cluster.getHddsDatanodes(), 0);
assertNotNull(cluster.getStorageContainerLocationClient().getDeletedBlockSummary());
for (StorageContainerManager scm:
cluster.getStorageContainerManagersList()) {
@@ -316,9 +314,9 @@ public void
testFinalizationNonEmptyClusterDataDistribution() throws Exception {
cluster.getStorageContainerManager().getLayoutVersionManager().getMetadataLayoutVersion());
TestHddsUpgradeUtils.testPostUpgradeConditionsSCM(
- cluster.getStorageContainerManagersList(), 0, NUM_DATANODES);
+ cluster.getStorageContainerManagersList(), 0);
TestHddsUpgradeUtils.testPostUpgradeConditionsDataNodes(
- cluster.getHddsDatanodes(), 0, CLOSED);
+ cluster.getHddsDatanodes(), 0);
assertNotNull(cluster.getStorageContainerLocationClient().getDeletedBlockSummary());
for (StorageContainerManager scm:
cluster.getStorageContainerManagersList()) {
@@ -450,13 +448,8 @@ private void waitForScmToFinalize(StorageContainerManager
scm)
throws Exception {
GenericTestUtils.waitFor(() -> !scm.isInSafeMode(), 500, 5000);
GenericTestUtils.waitFor(() -> {
- FinalizationCheckpoint checkpoint =
- scm.getScmContext().getFinalizationCheckpoint();
- LOG.info("Waiting for SCM {} (leader? {}) to finalize. Current " +
- "finalization checkpoint is {}",
- scm.getSCMNodeId(), scm.checkLeader(), checkpoint);
- return checkpoint.hasCrossed(
- FinalizationCheckpoint.FINALIZATION_COMPLETE);
+ LOG.info("Waiting for SCM {} (leader? {}) to finalize.",
scm.getSCMNodeId(), scm.checkLeader());
+ return !scm.getLayoutVersionManager().needsFinalization();
}, 2_000, 60_000);
}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java
index da3cb82e68d..6b74cc4d0cf 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java
@@ -17,21 +17,15 @@
package org.apache.hadoop.hdds.upgrade;
-import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.CLOSED;
import static org.assertj.core.api.Assertions.assertThat;
-import static org.junit.jupiter.api.Assertions.assertNotEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.UUID;
-import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
-import java.util.stream.Stream;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
@@ -40,24 +34,17 @@
import org.apache.hadoop.hdds.scm.server.SCMConfigurator;
import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
-import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationCheckpoint;
import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationStateManagerImpl;
import org.apache.hadoop.hdds.scm.server.upgrade.SCMUpgradeFinalizationContext;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
import org.apache.hadoop.ozone.UniformDatanodesFactory;
import org.apache.hadoop.ozone.upgrade.DefaultUpgradeFinalizationExecutor;
-import
org.apache.hadoop.ozone.upgrade.InjectedUpgradeFinalizationExecutor.UpgradeTestInjectionPoints;
import org.apache.hadoop.ozone.upgrade.UpgradeFinalizationExecutor;
-import org.apache.hadoop.ozone.upgrade.UpgradeTestUtils;
import org.apache.ozone.test.GenericTestUtils;
import org.apache.ozone.test.GenericTestUtils.LogCapturer;
-import org.apache.ozone.test.tag.Flaky;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.Arguments;
-import org.junit.jupiter.params.provider.MethodSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -69,9 +56,6 @@ public class TestScmHAFinalization {
private static final String CLIENT_ID = UUID.randomUUID().toString();
private static final Logger LOG =
LoggerFactory.getLogger(TestScmHAFinalization.class);
- private static final String METHOD_SOURCE =
- "org.apache.hadoop.hdds.upgrade" +
- ".TestScmHAFinalization#injectionPointsToTest";
private StorageContainerLocationProtocol scmClient;
private MiniOzoneHAClusterImpl cluster;
@@ -127,118 +111,19 @@ public void shutdown() {
}
}
- /**
- * Argument supplier for parameterized tests.
- */
- public static Stream<Arguments> injectionPointsToTest() {
- // Do not test from BEFORE_PRE_FINALIZE_UPGRADE injection point.
- // Finalization will not have started so there will be no persisted state
- // to resume from.
- return Stream.of(
- Arguments.of(UpgradeTestInjectionPoints.AFTER_PRE_FINALIZE_UPGRADE),
- Arguments.of(UpgradeTestInjectionPoints.AFTER_COMPLETE_FINALIZATION),
- Arguments.of(UpgradeTestInjectionPoints.AFTER_POST_FINALIZE_UPGRADE)
- );
- }
-
- @ParameterizedTest
- @MethodSource(METHOD_SOURCE)
- public void testFinalizationWithLeaderChange(
- UpgradeTestInjectionPoints haltingPoint) throws Exception {
-
- CountDownLatch pauseLatch = new CountDownLatch(1);
- CountDownLatch unpauseLatch = new CountDownLatch(1);
- init(new OzoneConfiguration(),
- UpgradeTestUtils.newPausingFinalizationExecutor(haltingPoint,
- pauseLatch, unpauseLatch, LOG), 0);
- pauseLatch.await();
-
- // Stop the leader, forcing a leader change in the middle of finalization.
- // This will cause the initial client call for finalization
- // to be interrupted.
- StorageContainerManager oldLeaderScm = cluster.getActiveSCM();
- LOG.info("Stopping current SCM leader {} to initiate a leader change.",
- oldLeaderScm.getSCMNodeId());
- cluster.shutdownStorageContainerManager(oldLeaderScm);
-
- // Wait for the remaining two SCMs to elect a new leader.
- cluster.waitForClusterToBeReady();
-
- // While finalization is paused, check its state on the remaining SCMs.
- checkMidFinalizationConditions(haltingPoint,
- cluster.getStorageContainerManagersList());
-
- // Restart actually creates a new SCM.
- // Since this SCM will be a follower, the implementation of its upgrade
- // finalization executor does not matter for this test.
- cluster.restartStorageContainerManager(oldLeaderScm, true);
-
- // Make sure the original SCM leader is not the leader anymore.
- StorageContainerManager newLeaderScm = cluster.getActiveSCM();
- assertNotEquals(newLeaderScm.getSCMNodeId(),
- oldLeaderScm.getSCMNodeId());
-
- // Resume finalization from the new leader.
- unpauseLatch.countDown();
-
- // Client should complete exceptionally since the original SCM it
- // requested to was restarted.
- finalizationFuture.get();
- TestHddsUpgradeUtils.waitForFinalizationFromClient(scmClient, CLIENT_ID);
- // Make sure old leader has caught up and all SCMs have finalized.
- waitForScmsToFinalize(cluster.getStorageContainerManagersList());
-
- TestHddsUpgradeUtils.testPostUpgradeConditionsSCM(
- cluster.getStorageContainerManagersList(), 0, NUM_DATANODES);
- TestHddsUpgradeUtils.testPostUpgradeConditionsDataNodes(
- cluster.getHddsDatanodes(), 0, CLOSED);
- }
-
- @ParameterizedTest
- @MethodSource(METHOD_SOURCE)
- @Flaky("HDDS-8714")
- public void testFinalizationWithRestart(
- UpgradeTestInjectionPoints haltingPoint) throws Exception {
- CountDownLatch terminateLatch = new CountDownLatch(1);
- init(new OzoneConfiguration(),
- UpgradeTestUtils.newTerminatingFinalizationExecutor(haltingPoint,
- terminateLatch, LOG),
- 0);
- terminateLatch.await();
-
- // Once upgrade finalization is stopped at the halting point, restart all
- // SCMs.
- LOG.info("Restarting all SCMs during upgrade finalization.");
- // Restarting an SCM from mini ozone actually replaces the SCM with a new
- // instance. We will use the normal upgrade finalization executor for
- // these new instances, since the last one aborted at the halting point.
- cluster.getSCMConfigurator()
- .setUpgradeFinalizationExecutor(
- new DefaultUpgradeFinalizationExecutor<>());
- List<StorageContainerManager> originalSCMs =
- cluster.getStorageContainerManagers();
-
- for (StorageContainerManager scm: originalSCMs) {
- cluster.restartStorageContainerManager(scm, false);
- }
-
- checkMidFinalizationConditions(haltingPoint,
- cluster.getStorageContainerManagersList());
-
- // After all SCMs were restarted, finalization should resume
- // automatically once a leader is elected.
- cluster.waitForClusterToBeReady();
-
+ @Test
+ public void testFinalizationWithLeaderChange() throws Exception {
+ OzoneConfiguration conf = new OzoneConfiguration();
+ init(conf, new DefaultUpgradeFinalizationExecutor<>(), 0);
finalizationFuture.get();
TestHddsUpgradeUtils.waitForFinalizationFromClient(scmClient, CLIENT_ID);
- // Once the leader tells the client finalization is complete, wait for all
- // followers to catch up so we can check their state.
+ // Ensure all SCMs finalize, indicating the message has been propagated
across them all
waitForScmsToFinalize(cluster.getStorageContainerManagersList());
TestHddsUpgradeUtils.testPostUpgradeConditionsSCM(
- cluster.getStorageContainerManagersList(), 0, NUM_DATANODES);
+ cluster.getStorageContainerManagersList(), 0);
TestHddsUpgradeUtils.testPostUpgradeConditionsDataNodes(
- cluster.getHddsDatanodes(), 0, CLOSED);
+ cluster.getHddsDatanodes(), 0);
}
@Test
@@ -274,9 +159,9 @@ public void testSnapshotFinalization() throws Exception {
waitForScmsToFinalize(activeScms);
TestHddsUpgradeUtils.testPostUpgradeConditionsSCM(
- activeScms, 0, NUM_DATANODES);
+ activeScms, 0);
TestHddsUpgradeUtils.testPostUpgradeConditionsDataNodes(
- cluster.getHddsDatanodes(), 0, CLOSED);
+ cluster.getHddsDatanodes(), 0);
// Move SCM log index farther ahead to make sure a snapshot install
// happens on the restarted SCM.
@@ -292,7 +177,7 @@ public void testSnapshotFinalization() throws Exception {
waitForScmToFinalize(inactiveScm);
TestHddsUpgradeUtils.testPostUpgradeConditionsSCM(
- inactiveScm, 0, NUM_DATANODES);
+ inactiveScm, 0);
// Use log to verify a snapshot was installed.
assertThat(logCapture.getOutput()).contains("New SCM snapshot " +
@@ -310,45 +195,8 @@ private void waitForScmToFinalize(StorageContainerManager
scm)
throws Exception {
GenericTestUtils.waitFor(() -> !scm.isInSafeMode(), 500, 5000);
GenericTestUtils.waitFor(() -> {
- FinalizationCheckpoint checkpoint =
- scm.getScmContext().getFinalizationCheckpoint();
- LOG.info("Waiting for SCM {} (leader? {}) to finalize. Current " +
- "finalization checkpoint is {}",
- scm.getSCMNodeId(), scm.checkLeader(), checkpoint);
- return checkpoint.hasCrossed(
- FinalizationCheckpoint.FINALIZATION_COMPLETE);
+ LOG.info("Waiting for SCM {} (leader? {}) to finalize.",
scm.getSCMNodeId(), scm.checkLeader());
+ return !scm.getLayoutVersionManager().needsFinalization();
}, 2_000, 60_000);
}
-
- private void checkMidFinalizationConditions(
- UpgradeTestInjectionPoints haltingPoint,
- List<StorageContainerManager> scms) {
-
- // Ratis only makes sure that the Leader has processed the finalization,
- // the followers might have this in the Raft Log and not yet processed it.
- switch (haltingPoint) {
- case BEFORE_PRE_FINALIZE_UPGRADE:
- // At least one node (leader) should be in the FINALIZATION_REQUIRED
stage.
- assertTrue(scms.stream().anyMatch(scm ->
- scm.getScmContext().getFinalizationCheckpoint() ==
FinalizationCheckpoint.FINALIZATION_REQUIRED));
- break;
- case AFTER_PRE_FINALIZE_UPGRADE:
- // At least one node (leader) should be in the FINALIZATION_STARTED
stage.
- assertTrue(scms.stream().anyMatch(scm ->
- scm.getScmContext().getFinalizationCheckpoint() ==
FinalizationCheckpoint.FINALIZATION_STARTED));
- break;
- case AFTER_COMPLETE_FINALIZATION:
- // At least one node (leader) should be in the MLV_EQUALS_SLV stage.
- assertTrue(scms.stream().anyMatch(scm ->
- scm.getScmContext().getFinalizationCheckpoint() ==
FinalizationCheckpoint.MLV_EQUALS_SLV));
- break;
- case AFTER_POST_FINALIZE_UPGRADE:
- // At least one node (leader) should be in the FINALIZATION_COMPLETE
stage.
- assertTrue(scms.stream().anyMatch(scm ->
- scm.getScmContext().getFinalizationCheckpoint() ==
FinalizationCheckpoint.FINALIZATION_COMPLETE));
- break;
- default:
- fail("Unknown halting point in test: " + haltingPoint);
- }
- }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]