This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 3cc7c5d5dda HDDS-14862. Log volume failure as error (#9950)
3cc7c5d5dda is described below
commit 3cc7c5d5dda9c83d1bf23fe3fb4ad864982b5367
Author: Rishabh Patel <[email protected]>
AuthorDate: Thu Mar 26 00:41:15 2026 -0700
HDDS-14862. Log volume failure as error (#9950)
---
.../main/java/org/apache/hadoop/ozone/HddsDatanodeService.java | 1 +
.../container/common/states/endpoint/VersionEndpointTask.java | 1 +
.../apache/hadoop/ozone/container/common/volume/HddsVolume.java | 5 ++---
.../hadoop/ozone/container/common/volume/MutableVolumeSet.java | 9 ++++-----
.../hadoop/ozone/container/common/volume/StorageVolume.java | 2 ++
.../apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java | 4 ++--
.../container/upgrade/ScmHAFinalizeUpgradeActionDatanode.java | 1 +
7 files changed, 13 insertions(+), 10 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
index 9b0d8747933..c49978ce6f2 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
@@ -412,6 +412,7 @@ private void startRatisForTest() throws IOException {
HddsVolume hddsVolume = (HddsVolume) storageVolume;
boolean result = StorageVolumeUtil.checkVolume(hddsVolume, clusterId,
clusterId, conf, LOG, null);
if (!result) {
+ LOG.error("Marking volume {} as failed",
hddsVolume.getStorageDir().getPath());
volumeSet.failVolume(hddsVolume.getHddsRootDir().getPath());
}
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
index b9326c07c5b..40844c563e9 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java
@@ -120,6 +120,7 @@ private void checkVolumeSet(MutableVolumeSet volumeSet,
scmId, clusterId, configuration, LOG,
ozoneContainer.getDbVolumeSet());
if (!result) {
+ LOG.error("Marking volume {} as failed",
volume.getStorageDir().getPath());
volumeSet.failVolume(volume.getStorageDir().getPath());
}
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
index f331db7defc..a4f1afa08ea 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java
@@ -299,7 +299,7 @@ public synchronized VolumeCheckResult check(@Nullable
Boolean unused)
VolumeCheckResult result = super.check(unused);
if (isDbLoadFailure()) {
- LOG.warn("Volume {} failed to access RocksDB: RocksDB parent directory
is null, " +
+ LOG.error("Volume {} failed to access RocksDB: RocksDB parent directory
is null, " +
"the volume might not have been loaded properly.", getStorageDir());
return VolumeCheckResult.FAILED;
}
@@ -312,8 +312,7 @@ public synchronized VolumeCheckResult check(@Nullable
Boolean unused)
// Check that per-volume RocksDB is present.
File dbFile = new File(dbParentDir, CONTAINER_DB_NAME);
if (!dbFile.exists() || !dbFile.canRead()) {
- LOG.warn("Volume {} failed health check. Could not access RocksDB at " +
- "{}", getStorageDir(), dbFile);
+ LOG.error("Volume {} failed health check. Could not access RocksDB at
{}", getStorageDir(), dbFile);
return VolumeCheckResult.FAILED;
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
index 9ce69fa14bd..a79a06b6541 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
@@ -223,8 +223,7 @@ public void checkAllVolumes(StorageVolumeChecker checker)
}
if (!failedVolumes.isEmpty()) {
- LOG.warn("checkAllVolumes got {} failed volumes - {}",
- failedVolumes.size(), failedVolumes);
+ LOG.error("checkAllVolumes got {} failed volumes - {}",
failedVolumes.size(), failedVolumes);
handleVolumeFailures(failedVolumes);
} else {
LOG.debug("checkAllVolumes encountered no failures");
@@ -242,6 +241,7 @@ private void handleVolumeFailures(
for (StorageVolume v : failedVolumes) {
// Immediately mark the volume as failed so it is unavailable
// for new containers.
+ LOG.error("Marking volume {} as failed", v.getStorageDir().getPath());
failVolume(v.getStorageDir().getPath());
}
@@ -337,11 +337,10 @@ public void failVolume(String volumeRoot) {
failedVolumeMap.put(volumeRoot, volume);
volumeHealthMetrics.decrementHealthyVolumes();
volumeHealthMetrics.incrementFailedVolumes();
- LOG.info("Moving Volume : {} to failed Volumes", volumeRoot);
} else if (failedVolumeMap.containsKey(volumeRoot)) {
- LOG.info("Volume : {} is not active", volumeRoot);
+ LOG.warn("Unable to fail the volume: {} as it is inactive",
volumeRoot);
} else {
- LOG.warn("Volume : {} does not exist in VolumeSet", volumeRoot);
+ LOG.warn("Unable to fail the volume: {} as it does not exist in the
VolumeSet", volumeRoot);
}
} finally {
this.writeUnlock();
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
index 5260f846893..68be2ae227a 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java
@@ -605,6 +605,7 @@ public DatanodeConfiguration getDatanodeConfig() {
}
public void failVolume() {
+ LOG.warn("Volume {} failed", this);
setState(VolumeState.FAILED);
if (volumeUsage != null) {
volumeUsage.shutdown();
@@ -685,6 +686,7 @@ public synchronized VolumeCheckResult check(@Nullable
Boolean unused)
throw new InterruptedException("Directory check of volume " + this +
" interrupted.");
}
+ LOG.error("Directory check of volume {} failed", this);
return VolumeCheckResult.FAILED;
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
index a89a4958aa7..43aa05c850c 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
@@ -96,8 +96,8 @@ public void run() {
try {
readVolume(hddsVolumeDir);
} catch (Throwable t) {
- LOG.error("Caught an exception during reading container files" +
- " from Volume {} {}", hddsVolumeDir, t);
+ LOG.error("Could not read container files from the volume {}. " +
+ "Marking the volume as failed", hddsVolumeDir, t);
volumeSet.failVolume(hddsVolumeDir.getPath());
}
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/upgrade/ScmHAFinalizeUpgradeActionDatanode.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/upgrade/ScmHAFinalizeUpgradeActionDatanode.java
index 74ccdfb8798..37d3b241f0d 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/upgrade/ScmHAFinalizeUpgradeActionDatanode.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/upgrade/ScmHAFinalizeUpgradeActionDatanode.java
@@ -54,6 +54,7 @@ public void execute(DatanodeStateMachine dsm) throws
Exception {
if (volume instanceof HddsVolume) {
HddsVolume hddsVolume = (HddsVolume) volume;
if (!upgradeVolume(hddsVolume, hddsVolume.getClusterID())) {
+ LOG.error("Marking volume {} as failed",
volume.getStorageDir().getAbsolutePath());
volumeSet.failVolume(volume.getStorageDir().getAbsolutePath());
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]