This is an automated email from the ASF dual-hosted git repository.
hemant pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new e9073166da HDDS-9802. Tool to fix corrupted snapshot chain (#6386)
e9073166da is described below
commit e9073166da9cb16e0529df9f59818842cc7fbbcb
Author: Hemant Kumar <[email protected]>
AuthorDate: Tue Mar 19 16:48:22 2024 -0700
HDDS-9802. Tool to fix corrupted snapshot chain (#6386)
---
.../hadoop/ozone/om/helpers/SnapshotInfo.java | 29 ++-
hadoop-ozone/dist/src/shell/ozone/ozone | 6 +
.../apache/hadoop/ozone/repair/OzoneRepair.java | 64 +++++++
.../org/apache/hadoop/ozone/repair/RDBRepair.java | 58 ++++++
.../hadoop/ozone/repair/om/SnapshotRepair.java | 200 +++++++++++++++++++++
.../hadoop/ozone/repair/om/package-info.java | 22 +++
.../apache/hadoop/ozone/repair/package-info.java | 22 +++
7 files changed, 399 insertions(+), 2 deletions(-)
diff --git
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java
index b635ffd6d2..fd84ffe066 100644
---
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java
+++
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java
@@ -49,8 +49,8 @@ import static
org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
* This class is used for storing info related to Snapshots.
*
* Each snapshot created has an associated SnapshotInfo entry
- * containing the snapshotid, snapshot path,
- * snapshot checkpoint directory, previous snapshotid
+ * containing the snapshotId, snapshot path,
+ * snapshot checkpoint directory, previous snapshotId
* for the snapshot path & global amongst other necessary fields.
*/
public final class SnapshotInfo implements Auditable, CopyObject<SnapshotInfo>
{
@@ -714,4 +714,29 @@ public final class SnapshotInfo implements Auditable,
CopyObject<SnapshotInfo> {
.setDeepCleanedDeletedDir(deepCleanedDeletedDir)
.build();
}
+
+ @Override
+ public String toString() {
+ return "SnapshotInfo{" +
+ ", snapshotId: '" + snapshotId + '\'' +
+ ", name: '" + name + "'," +
+ ", volumeName: '" + volumeName + '\'' +
+ ", bucketName: '" + bucketName + '\'' +
+ ", snapshotStatus: '" + snapshotStatus + '\'' +
+ ", creationTime: '" + creationTime + '\'' +
+ ", deletionTime: '" + deletionTime + '\'' +
+ ", pathPreviousSnapshotId: '" + pathPreviousSnapshotId + '\'' +
+ ", globalPreviousSnapshotId: '" + globalPreviousSnapshotId + '\'' +
+ ", snapshotPath: '" + snapshotPath + '\'' +
+ ", checkpointDir: '" + checkpointDir + '\'' +
+ ", dbTxSequenceNumber: '" + dbTxSequenceNumber + '\'' +
+ ", deepClean: '" + deepClean + '\'' +
+ ", sstFiltered: '" + sstFiltered + '\'' +
+ ", referencedSize: '" + referencedSize + '\'' +
+ ", referencedReplicatedSize: '" + referencedReplicatedSize + '\'' +
+ ", exclusiveSize: '" + exclusiveSize + '\'' +
+ ", exclusiveReplicatedSize: '" + exclusiveReplicatedSize + '\'' +
+ ", deepCleanedDeletedDir: '" + deepCleanedDeletedDir + '\'' +
+ '}';
+ }
}
diff --git a/hadoop-ozone/dist/src/shell/ozone/ozone
b/hadoop-ozone/dist/src/shell/ozone/ozone
index 92edee8372..dbb18db8e2 100755
--- a/hadoop-ozone/dist/src/shell/ozone/ozone
+++ b/hadoop-ozone/dist/src/shell/ozone/ozone
@@ -59,6 +59,7 @@ function ozone_usage
ozone_add_subcommand "dtutil" client "operations related to delegation
tokens"
ozone_add_subcommand "admin" client "Ozone admin tool"
ozone_add_subcommand "debug" client "Ozone debug tool"
+ ozone_add_subcommand "repair" client "Ozone repair tool"
ozone_add_subcommand "checknative" client "checks if native libraries are
loaded"
ozone_generate_usage "${OZONE_SHELL_EXECNAME}" false
@@ -221,6 +222,11 @@ function ozonecmd_case
OZONE_DEBUG_OPTS="${OZONE_DEBUG_OPTS} ${OZONE_MODULE_ACCESS_ARGS}"
OZONE_RUN_ARTIFACT_NAME="ozone-tools"
;;
+ repair)
+ OZONE_CLASSNAME=org.apache.hadoop.ozone.repair.OzoneRepair
+ OZONE_DEBUG_OPTS="${OZONE_DEBUG_OPTS} ${OZONE_MODULE_ACCESS_ARGS}"
+ OZONE_RUN_ARTIFACT_NAME="ozone-tools"
+ ;;
checknative)
OZONE_CLASSNAME=org.apache.hadoop.ozone.shell.checknative.CheckNative
OZONE_RUN_ARTIFACT_NAME="ozone-tools"
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/OzoneRepair.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/OzoneRepair.java
new file mode 100644
index 0000000000..3bbbded580
--- /dev/null
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/OzoneRepair.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.repair;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.hdds.cli.GenericCli;
+import org.apache.hadoop.hdds.cli.HddsVersionProvider;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import picocli.CommandLine;
+
+/**
+ * Ozone Repair Command line tool.
+ */
[email protected](name = "ozone repair",
+ description = "Operational tool to repair Ozone",
+ versionProvider = HddsVersionProvider.class,
+ mixinStandardHelpOptions = true)
+public class OzoneRepair extends GenericCli {
+
+ private OzoneConfiguration ozoneConf;
+
+ public OzoneRepair() {
+ super(OzoneRepair.class);
+ }
+
+ @VisibleForTesting
+ public OzoneRepair(OzoneConfiguration configuration) {
+ super(OzoneRepair.class);
+ this.ozoneConf = configuration;
+ }
+
+ public OzoneConfiguration getOzoneConf() {
+ if (ozoneConf == null) {
+ ozoneConf = createOzoneConfiguration();
+ }
+ return ozoneConf;
+ }
+
+ /**
+ * Main for the Ozone Repair shell Command handling.
+ *
+ * @param argv - System Args Strings[]
+ * @throws Exception
+ */
+ public static void main(String[] argv) throws Exception {
+ new OzoneRepair().run(argv);
+ }
+}
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RDBRepair.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RDBRepair.java
new file mode 100644
index 0000000000..0f36934ec1
--- /dev/null
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/RDBRepair.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.repair;
+
+import org.apache.hadoop.hdds.cli.GenericCli;
+import org.apache.hadoop.hdds.cli.SubcommandWithParent;
+import org.kohsuke.MetaInfServices;
+import picocli.CommandLine;
+
+import java.util.concurrent.Callable;
+
+/**
+ * Ozone Repair CLI for RocksDB.
+ */
[email protected](name = "ldb",
+ description = "Operational tool to repair RocksDB table.")
+@MetaInfServices(SubcommandWithParent.class)
+public class RDBRepair implements Callable<Void>, SubcommandWithParent {
+
+ @CommandLine.Spec
+ private CommandLine.Model.CommandSpec spec;
+
+ @CommandLine.Option(names = {"--db"},
+ required = true,
+ description = "Database File Path")
+ private String dbPath;
+
+ public String getDbPath() {
+ return dbPath;
+ }
+
+ @Override
+ public Void call() {
+ GenericCli.missingSubcommand(spec);
+ return null;
+ }
+
+ @Override
+ public Class<?> getParentType() {
+ return OzoneRepair.class;
+ }
+}
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/SnapshotRepair.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/SnapshotRepair.java
new file mode 100644
index 0000000000..ec5e2f8f93
--- /dev/null
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/SnapshotRepair.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.repair.om;
+
+import org.apache.hadoop.hdds.cli.SubcommandWithParent;
+import org.apache.hadoop.hdds.utils.IOUtils;
+import org.apache.hadoop.hdds.utils.db.StringCodec;
+import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB;
+import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksIterator;
+import org.apache.hadoop.ozone.debug.RocksDBUtils;
+import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
+import org.apache.hadoop.ozone.repair.RDBRepair;
+import org.apache.hadoop.ozone.shell.bucket.BucketUri;
+import org.kohsuke.MetaInfServices;
+import org.rocksdb.ColumnFamilyDescriptor;
+import org.rocksdb.ColumnFamilyHandle;
+import org.rocksdb.RocksDBException;
+import picocli.CommandLine;
+import picocli.CommandLine.Model.CommandSpec;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.UUID;
+import java.util.concurrent.Callable;
+
+import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
+import static org.apache.hadoop.ozone.OzoneConsts.SNAPSHOT_INFO_TABLE;
+
+/**
+ * Tool to repair snapshotInfoTable in case it has corrupted entries.
+ */
[email protected](
+ name = "snapshot",
+ description = "CLI to update global and path previous snapshot for a
snapshot in case snapshot chain is corrupted."
+)
+@MetaInfServices(SubcommandWithParent.class)
+public class SnapshotRepair implements Callable<Void>, SubcommandWithParent {
+
+ @CommandLine.Spec
+ private static CommandSpec spec;
+
+ @CommandLine.ParentCommand
+ private RDBRepair parent;
+
+ @CommandLine.Mixin
+ private BucketUri bucketUri;
+
+ @CommandLine.Parameters(description = "Snapshot name to update", index = "1")
+ private String snapshotName;
+
+ @CommandLine.Option(names = {"--global-previous", "--gp"},
+ required = true,
+ description = "Global previous snapshotId to set for the given snapshot")
+ private UUID globalPreviousSnapshotId;
+
+ @CommandLine.Option(names = {"--path-previous", "--pp"},
+ required = true,
+ description = "Path previous snapshotId to set for the given snapshot")
+ private UUID pathPreviousSnapshotId;
+
+ @CommandLine.Option(names = {"--dry-run"},
+ required = true,
+ description = "To dry-run the command.", defaultValue = "true")
+ private boolean dryRun;
+
+ @Override
+ public Void call() throws Exception {
+ List<ColumnFamilyHandle> cfHandleList = new ArrayList<>();
+ List<ColumnFamilyDescriptor> cfDescList =
RocksDBUtils.getColumnFamilyDescriptors(parent.getDbPath());
+
+ try (ManagedRocksDB db = ManagedRocksDB.open(parent.getDbPath(),
cfDescList, cfHandleList)) {
+ ColumnFamilyHandle snapshotInfoCfh = getSnapshotInfoCfh(cfHandleList);
+ if (snapshotInfoCfh == null) {
+ System.err.println(SNAPSHOT_INFO_TABLE + " is not in a column family
in DB for the given path.");
+ return null;
+ }
+
+ String snapshotInfoTableKey =
SnapshotInfo.getTableKey(bucketUri.getValue().getVolumeName(),
+ bucketUri.getValue().getBucketName(), snapshotName);
+
+ SnapshotInfo snapshotInfo = getSnapshotInfo(db, snapshotInfoCfh,
snapshotInfoTableKey);
+ if (snapshotInfo == null) {
+ System.err.println(snapshotName + " does not exist for given
bucketUri: " + OM_KEY_PREFIX +
+ bucketUri.getValue().getVolumeName() + OM_KEY_PREFIX +
bucketUri.getValue().getBucketName());
+ return null;
+ }
+
+ // snapshotIdSet is the set of the all existed snapshots ID to make that
the provided global previous and path
+ // previous exist and after the update snapshot does not point to ghost
snapshot.
+ Set<UUID> snapshotIdSet = getSnapshotIdSet(db, snapshotInfoCfh);
+
+ if (Objects.equals(snapshotInfo.getSnapshotId(),
globalPreviousSnapshotId)) {
+ System.err.println("globalPreviousSnapshotId: '" +
globalPreviousSnapshotId +
+ "' is equal to given snapshot's ID: '" +
snapshotInfo.getSnapshotId() + "'.");
+ return null;
+ }
+
+ if (Objects.equals(snapshotInfo.getSnapshotId(),
pathPreviousSnapshotId)) {
+ System.err.println("pathPreviousSnapshotId: '" +
pathPreviousSnapshotId +
+ "' is equal to given snapshot's ID: '" +
snapshotInfo.getSnapshotId() + "'.");
+ return null;
+ }
+
+ if (!snapshotIdSet.contains(globalPreviousSnapshotId)) {
+ System.err.println("globalPreviousSnapshotId: '" +
globalPreviousSnapshotId +
+ "' does not exist in snapshotInfoTable.");
+ return null;
+ }
+
+ if (!snapshotIdSet.contains(pathPreviousSnapshotId)) {
+ System.err.println("pathPreviousSnapshotId: '" +
pathPreviousSnapshotId +
+ "' does not exist in snapshotInfoTable.");
+ return null;
+ }
+
+ snapshotInfo.setGlobalPreviousSnapshotId(globalPreviousSnapshotId);
+ snapshotInfo.setPathPreviousSnapshotId(pathPreviousSnapshotId);
+
+ if (dryRun) {
+ System.out.println("SnapshotInfo would be updated to : " +
snapshotInfo);
+ } else {
+ byte[] snapshotInfoBytes =
SnapshotInfo.getCodec().toPersistedFormat(snapshotInfo);
+ db.get()
+ .put(snapshotInfoCfh,
StringCodec.get().toPersistedFormat(snapshotInfoTableKey), snapshotInfoBytes);
+
+ System.out.println("Snapshot Info is updated to : " +
+ getSnapshotInfo(db, snapshotInfoCfh, snapshotInfoTableKey));
+ }
+ } catch (RocksDBException exception) {
+ System.err.println("Failed to update the RocksDB for the given path: " +
parent.getDbPath());
+ System.err.println(
+ "Make sure that Ozone entity (OM, SCM or DN) is not running for the
give dbPath and current host.");
+ System.err.println(exception);
+ } finally {
+ IOUtils.closeQuietly(cfHandleList);
+ }
+
+ return null;
+ }
+
+ private Set<UUID> getSnapshotIdSet(ManagedRocksDB db, ColumnFamilyHandle
snapshotInfoCfh)
+ throws IOException {
+ Set<UUID> snapshotIdSet = new HashSet<>();
+ try (ManagedRocksIterator iterator = new
ManagedRocksIterator(db.get().newIterator(snapshotInfoCfh))) {
+ iterator.get().seekToFirst();
+
+ while (iterator.get().isValid()) {
+ SnapshotInfo snapshotInfo =
SnapshotInfo.getCodec().fromPersistedFormat(iterator.get().value());
+ snapshotIdSet.add(snapshotInfo.getSnapshotId());
+ iterator.get().next();
+ }
+ }
+ return snapshotIdSet;
+ }
+
+ private ColumnFamilyHandle getSnapshotInfoCfh(List<ColumnFamilyHandle>
cfHandleList) throws RocksDBException {
+ byte[] nameBytes = SNAPSHOT_INFO_TABLE.getBytes(StandardCharsets.UTF_8);
+
+ for (ColumnFamilyHandle cf : cfHandleList) {
+ if (Arrays.equals(cf.getName(), nameBytes)) {
+ return cf;
+ }
+ }
+
+ return null;
+ }
+
+ private SnapshotInfo getSnapshotInfo(ManagedRocksDB db, ColumnFamilyHandle
snapshotInfoCfh, String snapshotInfoLKey)
+ throws IOException, RocksDBException {
+ byte[] bytes = db.get().get(snapshotInfoCfh,
StringCodec.get().toPersistedFormat(snapshotInfoLKey));
+ return bytes != null ? SnapshotInfo.getCodec().fromPersistedFormat(bytes)
: null;
+ }
+
+ @Override
+ public Class<?> getParentType() {
+ return RDBRepair.class;
+ }
+}
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/package-info.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/package-info.java
new file mode 100644
index 0000000000..9e2324a4a6
--- /dev/null
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * OM related repair tools.
+ */
+package org.apache.hadoop.ozone.repair.om;
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/package-info.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/package-info.java
new file mode 100644
index 0000000000..bd382d04cf
--- /dev/null
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Ozone Repair tools.
+ */
+package org.apache.hadoop.ozone.repair;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]