dracarys09 commented on code in PR #4581:
URL: https://github.com/apache/cassandra/pull/4581#discussion_r2904632995
##########
src/java/org/apache/cassandra/tcm/Epoch.java:
##########
@@ -118,6 +118,16 @@ public Epoch nextEpoch()
return new Epoch(epoch + 1);
}
+ public Epoch previousEpoch()
Review Comment:
Yeah, I added it in the `Epoch` class for completeness. Moved it to the tool
class.
##########
src/java/org/apache/cassandra/tools/OfflineClusterMetadataDump.java:
##########
@@ -0,0 +1,584 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.tools;
+
+import java.io.IOException;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableList;
+
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.db.ConsistencyLevel;
+import org.apache.cassandra.db.Keyspace;
+import org.apache.cassandra.db.SystemKeyspace;
+import org.apache.cassandra.io.util.FileOutputStreamPlus;
+import org.apache.cassandra.schema.DistributedMetadataLogKeyspace;
+import org.apache.cassandra.schema.DistributedSchema;
+import org.apache.cassandra.schema.Keyspaces;
+import org.apache.cassandra.schema.Schema;
+import org.apache.cassandra.schema.SchemaConstants;
+import org.apache.cassandra.tcm.ClusterMetadata;
+import org.apache.cassandra.tcm.ClusterMetadataService;
+import org.apache.cassandra.tcm.Epoch;
+import org.apache.cassandra.tcm.MetadataSnapshots;
+import org.apache.cassandra.tcm.log.Entry;
+import org.apache.cassandra.tcm.log.LogReader;
+import org.apache.cassandra.tcm.log.LogState;
+import org.apache.cassandra.tcm.log.SystemKeyspaceStorage;
+import org.apache.cassandra.tcm.membership.NodeVersion;
+import org.apache.cassandra.tcm.serialization.VerboseMetadataSerializer;
+import org.apache.cassandra.tcm.serialization.Version;
+
+import picocli.CommandLine;
+import picocli.CommandLine.Command;
+import picocli.CommandLine.Option;
+
+import static com.google.common.base.Throwables.getStackTraceAsString;
+
+/**
+ * Offline tool to dump cluster metadata from local SSTables.
+ * <p>
+ * This is an emergency recovery tool for debugging when a Cassandra instance
cannot
+ * start due to cluster metadata issues. It reads the local_metadata_log and
metadata_snapshots
+ * tables from the system keyspace to reconstruct and display the cluster
metadata state.
+ * <p>
+ * <b>NOTE: This tool is for offline use only. Do not run on a live
cluster.</b>
+ * <p>
+ * Usage:
+ * <pre>
+ * # Dump cluster metadata as binary (default)
+ * offlineclustermetadatadump metadata --data-dir /path/to/data
+ *
+ * # Dump cluster metadata as toString output
+ * offlineclustermetadatadump metadata --data-dir /path/to/data --to-string
+ *
+ * # Dump local log entries
+ * offlineclustermetadatadump log --data-dir /path/to/data --from-epoch 1
--to-epoch 50
+ *
+ * # Dump distributed log (CMS nodes)
+ * offlineclustermetadatadump distributed-log --data-dir /path/to/data
+ * </pre>
+ */
+@Command(name = "offlineclustermetadatadump",
+mixinStandardHelpOptions = true,
+description = "Offline tool to dump cluster metadata from local SSTables.
NOTE: For offline use only.",
+subcommands = { OfflineClusterMetadataDump.MetadataCommand.class,
OfflineClusterMetadataDump.LogCommand.class,
OfflineClusterMetadataDump.DistributedLogCommand.class })
+public class OfflineClusterMetadataDump implements Runnable
+{
+ private static final Output output = Output.CONSOLE;
+
+ public static void main(String... args)
+ {
+ Util.initDatabaseDescriptor();
+
+ CommandLine cli = new
CommandLine(OfflineClusterMetadataDump.class).setExecutionExceptionHandler((ex,
cmd, parseResult) -> {
+ err(ex);
+ return 2;
+ });
+ int status = cli.execute(args);
+ System.exit(status);
+ }
+
+ protected static void err(Throwable e)
+ {
+ output.err.println("error: " + e.getMessage());
+ output.err.println("-- StackTrace --");
+ output.err.println(getStackTraceAsString(e));
+ }
+
+ @Override
+ public void run()
+ {
+ CommandLine.usage(this, output.out);
+ }
+
+ /**
+ * Base class with common options and methods shared by all subcommands.
+ */
+ @Command(mixinStandardHelpOptions = true)
+ static abstract class BaseCommand implements Runnable
+ {
+ @Option(names = { "-d", "--data-dir" }, description = "Data directory
containing system keyspace")
+ public String dataDir;
+
+ @Option(names = { "-s", "--sstables" }, description = "Path to SSTable
directory for metadata tables (can be specified multiple times)", arity =
"1..*")
+ public List<String> sstables;
+
+ @Option(names = { "-p", "--partitioner" }, description = "Partitioner
class name",
+ defaultValue = "org.apache.cassandra.dht.Murmur3Partitioner")
+ public String partitioner;
+
+ @Option(names = { "-v", "--verbose" }, description = "Verbose output")
+ public boolean verbose;
+
+ @Option(names = { "--debug" }, description = "Show stack traces on
errors")
+ public boolean debug;
+
+ protected Path tempDir;
+
+ /**
+ * Template method for subcommands.
+ */
+ protected abstract void execute() throws Exception;
+
+ /**
+ * Whether this command needs the distributed metadata keyspace schema.
+ */
+ protected boolean needsDistributedMetadataKeyspace()
+ {
+ return false;
+ }
+
+ @Override
+ public void run()
+ {
+ try
+ {
+ setupTempDirectory();
+
+ // Set the cluster's partitioner - needed when reconstructing
ClusterMetadata
+ // from log entries when no snapshot is available.
+ DatabaseDescriptor.setPartitioner(partitioner);
+
+ if (needsDistributedMetadataKeyspace())
+ {
+ // Set up schema for distributed metadata keyspace
+
ClusterMetadataService.empty(Keyspaces.of(DistributedMetadataLogKeyspace.initialMetadata("dc1")));
+ }
+ else
+ {
+ // No distributed keyspaces needed for local-only commands
+ ClusterMetadataService.empty(Keyspaces.none());
+ }
+ Keyspace.setInitialized();
+
+ // This is needed for non-local system keyspaces (e.g.,
system_cluster_metadata).
+ // Using loadSSTables=false ensures no disk I/O during
initialization - SSTables are imported separately.
+
ClusterMetadata.current().schema.initializeKeyspaceInstances(DistributedSchema.empty(),
false);
+
+ execute();
+ }
+ catch (Exception e)
+ {
+ if (debug)
+ {
+ e.printStackTrace(output.err);
+ }
+ else
+ {
+ output.err.println("Error: " + e.getMessage());
+ }
+ System.exit(1);
+ }
+ finally
+ {
+ cleanupTempDirectory();
+ }
+ }
+
+ /**
+ * Creates a temporary directory and configures DatabaseDescriptor to
use it.
+ */
+ protected void setupTempDirectory() throws IOException
+ {
+ tempDir = Files.createTempDirectory("offlinedump");
+ DatabaseDescriptor.getRawConfig().data_file_directories = new
String[]{ tempDir.resolve("data").toString() };
+ DatabaseDescriptor.getRawConfig().commitlog_directory =
tempDir.resolve("commitlog").toString();
+ DatabaseDescriptor.getRawConfig().hints_directory =
tempDir.resolve("hints").toString();
+ DatabaseDescriptor.getRawConfig().saved_caches_directory =
tempDir.resolve("saved_caches").toString();
+ DatabaseDescriptor.getRawConfig().accord.journal_directory =
tempDir.resolve("accord_journal").toString();
+
+ if (verbose)
+ {
+ output.out.println("Using temporary directory: " + tempDir);
+ }
+ }
+
+ /**
+ * Cleans up the temporary directory.
+ */
+ protected void cleanupTempDirectory()
+ {
+ if (tempDir != null)
+ {
+ try
+ {
+ Files.walkFileTree(tempDir, new SimpleFileVisitor<>()
+ {
+ @Override
+ public FileVisitResult visitFile(Path file,
BasicFileAttributes attrs)
+ throws IOException
+ {
+ Files.deleteIfExists(file);
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult postVisitDirectory(Path dir,
IOException exc)
+ throws IOException
+ {
+ Files.deleteIfExists(dir);
+ return FileVisitResult.CONTINUE;
+ }
+ });
+ }
+ catch (IOException e)
+ {
+ if (verbose)
+ {
+ output.err.println("Warning: Failed to fully cleanup
temp directory: " + tempDir + " (" + e.getMessage() + ")");
+ }
+ }
+ finally
+ {
+ tempDir = null;
+ }
+ }
+ }
+
+ protected void importSystemKeyspaceSSTables() throws IOException
+ {
+ Keyspace ks =
Schema.instance.getKeyspaceInstance(SchemaConstants.SYSTEM_KEYSPACE_NAME);
+
+ // Find and import SSTables for local_metadata_log
+ String logTablePath = findTablePath(SystemKeyspace.METADATA_LOG,
SchemaConstants.SYSTEM_KEYSPACE_NAME);
+ if (logTablePath != null)
+ {
+ ColumnFamilyStore logCfs =
ks.getColumnFamilyStore(SystemKeyspace.METADATA_LOG);
+ logCfs.importNewSSTables(Collections.singleton(logTablePath),
false, false, false, false, false, false, true);
+ if (verbose)
+ {
+ output.out.println("Imported SSTables from: " +
logTablePath);
+ }
+ }
+
+ // Find and import SSTables for metadata_snapshots
+ String snapshotTablePath =
findTablePath(SystemKeyspace.SNAPSHOT_TABLE_NAME,
SchemaConstants.SYSTEM_KEYSPACE_NAME);
+ if (snapshotTablePath != null)
+ {
+ ColumnFamilyStore snapshotCfs =
ks.getColumnFamilyStore(SystemKeyspace.SNAPSHOT_TABLE_NAME);
+
snapshotCfs.importNewSSTables(Collections.singleton(snapshotTablePath), false,
false, false, false, false, false, true);
+ if (verbose)
+ {
+ output.out.println("Imported SSTables from: " +
snapshotTablePath);
+ }
+ }
+ }
+
+ protected void importDistributedLogSSTables() throws IOException
+ {
+ Keyspace ks =
Schema.instance.getKeyspaceInstance(SchemaConstants.METADATA_KEYSPACE_NAME);
+
+ // Find and import SSTables for distributed_metadata_log
+ String logTablePath =
findTablePath(DistributedMetadataLogKeyspace.TABLE_NAME,
SchemaConstants.METADATA_KEYSPACE_NAME);
+ if (logTablePath != null)
+ {
+ ColumnFamilyStore logCfs =
ks.getColumnFamilyStore(DistributedMetadataLogKeyspace.TABLE_NAME);
+ logCfs.importNewSSTables(Collections.singleton(logTablePath),
false, false, false, false, false, false, true);
+ if (verbose)
+ {
+ output.out.println("Imported SSTables from: " +
logTablePath);
+ }
+ }
+ }
+
+ protected String findTablePath(String tableName, String keyspaceName)
throws IOException
+ {
+ if (sstables != null && !sstables.isEmpty())
+ {
+ for (String sstablePath : sstables)
+ {
+ if (sstablePath.contains(tableName))
+ return sstablePath;
+ Path tableDir = Path.of(sstablePath, tableName);
+ if (Files.exists(tableDir))
+ return tableDir.toString();
+ String matches = findTablePathInDir(tableName,
keyspaceName, sstablePath);
+ if (matches != null)
+ return matches;
+ }
+ return null;
+ }
+
+ if (dataDir != null)
+ {
+ String matches = findTablePathInDir(tableName, keyspaceName,
dataDir);
+ if (matches != null)
+ return matches;
+ }
+
+ String[] dataDirs = DatabaseDescriptor.getAllDataFileLocations();
+ for (String dir : dataDirs)
+ {
+ String matches = findTablePathInDir(tableName, keyspaceName,
dir);
+ if (matches != null)
+ return matches;
+ }
+
+ return null;
+ }
+
+ private String findTablePathInDir(String tableName, String
keyspaceName, String dataDir) throws IOException
+ {
+ Path ksDir = Path.of(dataDir, keyspaceName);
+ if (Files.exists(ksDir))
+ {
+ try (Stream<Path> paths = Files.list(ksDir))
+ {
+ List<Path> matches = paths.filter(p ->
p.getFileName().toString().startsWith(tableName + "-"))
+ .collect(Collectors.toList());
+ if (!matches.isEmpty())
+ return matches.get(0).toString();
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Gets log state from the given reader, detecting and logging gaps in
epochs.
+ *
+ * @param reader the log reader to read entries from
+ * @param snapshotManager snapshot manager (use NO_OP for log listing
commands)
+ * @param startEpoch if provided, start reading from this epoch (for
--from-epoch)
+ * @param targetEpoch if provided, stop reading at this epoch (for
--to-epoch or --epoch)
+ * @param out output for warnings
+ */
+ @VisibleForTesting
+ static LogState getLogState(LogReader reader,
+ MetadataSnapshots snapshotManager,
+ Long startEpoch,
+ Long targetEpoch,
+ Output out)
+ {
+ Epoch endEpoch = targetEpoch != null ? Epoch.create(targetEpoch) :
Epoch.create(Long.MAX_VALUE);
+ ClusterMetadata base = snapshotManager.getSnapshotBefore(endEpoch);
+
+ Epoch baseEpoch = base != null
+ ? base.epoch
+ : startEpoch != null ?
Epoch.create(startEpoch).previousEpoch() : Epoch.EMPTY;
Review Comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]