This is an automated email from the ASF dual-hosted git repository.

smiklosovic pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/trunk by this push:
     new b11909b611 Make sstabledump possible to show tombstones only
b11909b611 is described below

commit b11909b611de811ed2f030848820a17c77df7013
Author: Stefan Miklosovic <smikloso...@apache.org>
AuthorDate: Tue Sep 24 19:04:17 2024 +0200

    Make sstabledump possible to show tombstones only
    
    patch by Stefan Miklosovic; reviewed by Brad Schoening for CASSANDRA-19939
---
 CHANGES.txt                                        |   1 +
 .../pages/managing/tools/sstable/sstabledump.adoc  |  11 +-
 .../apache/cassandra/tools/JsonTransformer.java    | 227 +++++++++++++--------
 .../org/apache/cassandra/tools/SSTableExport.java  | 102 +++++----
 .../apache/cassandra/tools/SSTableExportTest.java  |   3 +-
 5 files changed, 219 insertions(+), 125 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index bb087d07c0..1255333b9c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 5.1
+ * Make sstabledump possible to show tombstones only (CASSANDRA-19939)
  * Ensure that RFP queries potentially stale replicas even with only key 
columns in the row filter (CASSANDRA-19938)
  * Allow nodes to change IP address while upgrading to TCM (CASSANDRA-19921)
  * Retain existing keyspace params on system tables after upgrade 
(CASSANDRA-19916)
diff --git 
a/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc 
b/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc
index 90f66b8854..df00fac767 100644
--- a/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc
+++ b/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc
@@ -17,9 +17,10 @@ sstabledump <options> <sstable file path>
 |-d |CQL row per line internal representation
 |-e |Enumerate partition keys only
 |-k <arg> |Partition key
-|-x <arg> |Excluded partition key(s)
-|-t |Print raw timestamps instead of iso8601 date strings
 |-l |Output each row as a separate JSON object
+|-o |Enumerate tombstones only
+|-t |Print raw timestamps instead of iso8601 date strings
+|-x <arg> |Excluded partition key(s)
 |===
 
 If necessary, use sstableutil first to find out the sstables used by a
@@ -238,6 +239,12 @@ cat eventlog_dump_2018Jul26_excludekeys
   }
 ....
 
+== Dump tombstones only
+
+It is possible to display only tombstones since CASSANDRA-19939. You enable 
this feature by `-o` flag. This option
+is useful to use if you are interested only in tombstones and the output is 
very long. This way, you find tombstones
+faster.
+
 == Display raw timestamps
 
 By default, dates are displayed in iso8601 date format. Using the -t
diff --git a/src/java/org/apache/cassandra/tools/JsonTransformer.java 
b/src/java/org/apache/cassandra/tools/JsonTransformer.java
index 8debfd3b75..9ffa6be00a 100644
--- a/src/java/org/apache/cassandra/tools/JsonTransformer.java
+++ b/src/java/org/apache/cassandra/tools/JsonTransformer.java
@@ -61,6 +61,7 @@ import org.apache.cassandra.schema.ColumnMetadata;
 import org.apache.cassandra.schema.TableMetadata;
 import org.apache.cassandra.transport.ProtocolVersion;
 import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.cassandra.utils.FBUtilities;
 
 import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis;
 
@@ -83,14 +84,20 @@ public final class JsonTransformer
 
     private boolean rawTime = false;
 
+    private boolean tombstonesOnly = false;
+
+    private long nowInSeconds;
+
     private long currentPosition = 0;
 
-    private JsonTransformer(JsonGenerator json, ISSTableScanner 
currentScanner, boolean rawTime, TableMetadata metadata, boolean isJsonLines)
+    private JsonTransformer(JsonGenerator json, ISSTableScanner 
currentScanner, boolean rawTime, boolean tombstonesOnly, TableMetadata 
metadata, long nowInSeconds, boolean isJsonLines)
     {
         this.json = json;
         this.metadata = metadata;
         this.currentScanner = currentScanner;
         this.rawTime = rawTime;
+        this.tombstonesOnly = tombstonesOnly;
+        this.nowInSeconds = nowInSeconds;
 
         if (isJsonLines)
         {
@@ -107,24 +114,24 @@ public final class JsonTransformer
         }
     }
 
-    public static void toJson(ISSTableScanner currentScanner, 
Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata 
metadata, OutputStream out)
+    public static void toJson(ISSTableScanner currentScanner, 
Stream<UnfilteredRowIterator> partitions, boolean rawTime, boolean 
tombstonesOnly, TableMetadata metadata, long nowInSeconds, OutputStream out)
             throws IOException
     {
         try (JsonGenerator json = jsonFactory.createGenerator(new 
OutputStreamWriter(out, StandardCharsets.UTF_8)))
         {
-            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, metadata, false);
+            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, tombstonesOnly, metadata, nowInSeconds, false);
             json.writeStartArray();
             partitions.forEach(transformer::serializePartition);
             json.writeEndArray();
         }
     }
 
-    public static void toJsonLines(ISSTableScanner currentScanner, 
Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata 
metadata, OutputStream out)
-            throws IOException
+    public static void toJsonLines(ISSTableScanner currentScanner, 
Stream<UnfilteredRowIterator> partitions, boolean rawTime,  boolean 
tombstonesOnly, TableMetadata metadata, long nowInSeconds, OutputStream out)
+    throws IOException
     {
         try (JsonGenerator json = jsonFactory.createGenerator(new 
OutputStreamWriter(out, StandardCharsets.UTF_8)))
         {
-            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, metadata, true);
+            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, tombstonesOnly, metadata, nowInSeconds, true);
             partitions.forEach(transformer::serializePartition);
         }
     }
@@ -133,7 +140,7 @@ public final class JsonTransformer
     {
         try (JsonGenerator json = jsonFactory.createGenerator(new 
OutputStreamWriter(out, StandardCharsets.UTF_8)))
         {
-            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, metadata, false);
+            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, false, metadata, FBUtilities.nowInSeconds(), false);
             json.writeStartArray();
             keys.forEach(transformer::serializePartitionKey);
             json.writeEndArray();
@@ -206,49 +213,18 @@ public final class JsonTransformer
     {
         try
         {
-            json.writeStartObject();
-            json.writeObjectField("table kind", metadata.kind.name());
-            
-            json.writeFieldName("partition");
-            json.writeStartObject();
-            json.writeFieldName("key");
-            serializePartitionKey(partition.partitionKey());
-            json.writeNumberField("position", 
this.currentScanner.getCurrentPosition());
-
-            if (!partition.partitionLevelDeletion().isLive())
-                serializeDeletion(partition.partitionLevelDeletion());
-
-            json.writeEndObject();
-
-            json.writeFieldName("rows");
-            json.writeStartArray();
-            updatePosition();
-
-            if (partition.staticRow() != null)
+            boolean shouldSerialize = true;
+            if (tombstonesOnly)
             {
-                if (!partition.staticRow().isEmpty())
-                    serializeRow(partition.staticRow());
-                updatePosition();
-            }
+                shouldSerialize = partition.partitionLevelDeletion() != null 
&& !partition.partitionLevelDeletion().isLive();
 
-            Unfiltered unfiltered;
-            while (partition.hasNext())
-            {
-                unfiltered = partition.next();
-                if (unfiltered instanceof Row)
-                {
-                    serializeRow((Row) unfiltered);
-                }
-                else if (unfiltered instanceof RangeTombstoneMarker)
-                {
-                    serializeTombstone((RangeTombstoneMarker) unfiltered);
-                }
-                updatePosition();
+                // check if some row should be printed
+                if (!shouldSerialize)
+                    shouldSerialize = containsSerializableRow(partition);
             }
 
-            json.writeEndArray();
-
-            json.writeEndObject();
+            if (shouldSerialize)
+                serializePartitionInternal(partition);
         }
 
         catch (IOException e)
@@ -258,61 +234,146 @@ public final class JsonTransformer
         }
     }
 
-    private void serializeRow(Row row)
+    private void serializePartitionInternal(UnfilteredRowIterator partition) 
throws IOException
     {
-        try
+        json.writeStartObject();
+        json.writeObjectField("table kind", metadata.kind.name());
+
+        json.writeFieldName("partition");
+        json.writeStartObject();
+        json.writeFieldName("key");
+        serializePartitionKey(partition.partitionKey());
+        json.writeNumberField("position", 
this.currentScanner.getCurrentPosition());
+
+        if (!partition.partitionLevelDeletion().isLive())
+            serializeDeletion(partition.partitionLevelDeletion());
+
+        json.writeEndObject();
+
+        json.writeFieldName("rows");
+        json.writeStartArray();
+        updatePosition();
+
+        if (partition.staticRow() != null)
         {
-            json.writeStartObject();
-            String rowType = row.isStatic() ? "static_block" : "row";
-            json.writeFieldName("type");
-            json.writeString(rowType);
-            json.writeNumberField("position", this.currentPosition);
+            if (!partition.staticRow().isEmpty())
+                serializeRow(partition.staticRow());
+            updatePosition();
+        }
 
-            // Only print clustering information for non-static rows.
-            if (!row.isStatic())
+        Unfiltered unfiltered;
+        while (partition.hasNext())
+        {
+            unfiltered = partition.next();
+            if (unfiltered instanceof Row)
             {
-                serializeClustering(row.clustering());
+                serializeRow((Row) unfiltered);
             }
+            else if (unfiltered instanceof RangeTombstoneMarker)
+            {
+                serializeTombstone((RangeTombstoneMarker) unfiltered);
+            }
+            updatePosition();
+        }
+
+        json.writeEndArray();
+
+        json.writeEndObject();
+    }
+
+    private void serializeRow(Row row)
+    {
+        try
+        {
+            if (shouldSerializeRow(row))
+                serializeRowInternal(row);
+        }
+        catch (IOException e)
+        {
+            logger.error("Fatal error parsing row.", e);
+        }
+    }
 
-            LivenessInfo liveInfo = row.primaryKeyLivenessInfo();
-            if (!liveInfo.isEmpty())
+    private boolean containsSerializableRow(UnfilteredRowIterator partition)
+    {
+        boolean shouldSerialize = false;
+        Unfiltered unfiltered;
+        while (partition.hasNext())
+        {
+            unfiltered = partition.next();
+            if (unfiltered instanceof Row)
             {
-                objectIndenter.setCompact(false);
-                json.writeFieldName("liveness_info");
-                objectIndenter.setCompact(true);
-                json.writeStartObject();
-                json.writeFieldName("tstamp");
-                json.writeString(dateString(TimeUnit.MICROSECONDS, 
liveInfo.timestamp()));
-                if (liveInfo.isExpiring())
+                if (shouldSerializeRow((Row) unfiltered))
                 {
-                    json.writeNumberField("ttl", liveInfo.ttl());
-                    json.writeFieldName("expires_at");
-                    json.writeString(dateString(TimeUnit.SECONDS, 
liveInfo.localExpirationTime()));
-                    json.writeFieldName("expired");
-                    json.writeBoolean(liveInfo.localExpirationTime() < 
(currentTimeMillis() / 1000));
+                    shouldSerialize = true;
+                    break;
                 }
-                json.writeEndObject();
-                objectIndenter.setCompact(false);
             }
-
-            // If this is a deletion, indicate that, otherwise write cells.
-            if (!row.deletion().isLive())
+            else if (unfiltered instanceof RangeTombstoneMarker)
             {
-                serializeDeletion(row.deletion().time());
+                shouldSerialize = true;
+                break;
             }
-            json.writeFieldName("cells");
-            json.writeStartArray();
-            for (ColumnData cd : row)
+        }
+
+        partition.close();
+
+        return shouldSerialize;
+    }
+
+    private boolean shouldSerializeRow(Row row)
+    {
+        return !tombstonesOnly || row.hasDeletion(nowInSeconds);
+    }
+
+    private void serializeRowInternal(Row row) throws IOException
+    {
+        json.writeStartObject();
+        String rowType = row.isStatic() ? "static_block" : "row";
+        json.writeFieldName("type");
+        json.writeString(rowType);
+        json.writeNumberField("position", this.currentPosition);
+
+        // Only print clustering information for non-static rows.
+        if (!row.isStatic())
+        {
+            serializeClustering(row.clustering());
+        }
+
+        LivenessInfo liveInfo = row.primaryKeyLivenessInfo();
+        if (!liveInfo.isEmpty())
+        {
+            objectIndenter.setCompact(false);
+            json.writeFieldName("liveness_info");
+            objectIndenter.setCompact(true);
+            json.writeStartObject();
+            json.writeFieldName("tstamp");
+            json.writeString(dateString(TimeUnit.MICROSECONDS, 
liveInfo.timestamp()));
+            if (liveInfo.isExpiring())
             {
-                serializeColumnData(cd, liveInfo);
+                json.writeNumberField("ttl", liveInfo.ttl());
+                json.writeFieldName("expires_at");
+                json.writeString(dateString(TimeUnit.SECONDS, 
liveInfo.localExpirationTime()));
+                json.writeFieldName("expired");
+                json.writeBoolean(liveInfo.localExpirationTime() < 
(currentTimeMillis() / 1000));
             }
-            json.writeEndArray();
             json.writeEndObject();
+            objectIndenter.setCompact(false);
         }
-        catch (IOException e)
+
+        // If this is a deletion, indicate that, otherwise write cells.
+        if (!row.deletion().isLive())
         {
-            logger.error("Fatal error parsing row.", e);
+            serializeDeletion(row.deletion().time());
         }
+        json.writeFieldName("cells");
+        json.writeStartArray();
+        for (ColumnData cd : row)
+        {
+            serializeColumnData(cd, liveInfo);
+        }
+        json.writeEndArray();
+        json.writeEndObject();
     }
 
     private void serializeTombstone(RangeTombstoneMarker tombstone)
diff --git a/src/java/org/apache/cassandra/tools/SSTableExport.java 
b/src/java/org/apache/cassandra/tools/SSTableExport.java
index 05ec576553..0181e9f29f 100644
--- a/src/java/org/apache/cassandra/tools/SSTableExport.java
+++ b/src/java/org/apache/cassandra/tools/SSTableExport.java
@@ -36,6 +36,7 @@ import org.apache.commons.cli.PosixParser;
 import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.db.DecoratedKey;
 import org.apache.cassandra.db.PartitionPosition;
+import org.apache.cassandra.db.rows.Row;
 import org.apache.cassandra.db.rows.UnfilteredRowIterator;
 import org.apache.cassandra.dht.AbstractBounds;
 import org.apache.cassandra.dht.Bounds;
@@ -67,6 +68,7 @@ public class SSTableExport
     private static final String DEBUG_OUTPUT_OPTION = "d";
     private static final String EXCLUDE_KEY_OPTION = "x";
     private static final String ENUMERATE_KEYS_OPTION = "e";
+    private static final String ENUMERATE_TOMBSTONES_OPTION = "o";
     private static final String RAW_TIMESTAMPS = "t";
     private static final String PARTITION_JSON_LINES = "l";
 
@@ -87,16 +89,19 @@ public class SSTableExport
         excludeKey.setArgs(500);
         options.addOption(excludeKey);
 
-        Option optEnumerate = new Option(ENUMERATE_KEYS_OPTION, false, 
"enumerate partition keys only");
+        Option optEnumerate = new Option(ENUMERATE_KEYS_OPTION, false, 
"Enumerate partition keys only");
         options.addOption(optEnumerate);
 
+        Option optTombstones = new Option(ENUMERATE_TOMBSTONES_OPTION, false, 
"Enumerate tombstones only");
+        options.addOption(optTombstones);
+
         Option debugOutput = new Option(DEBUG_OUTPUT_OPTION, false, "CQL row 
per line internal representation");
         options.addOption(debugOutput);
 
         Option rawTimestamps = new Option(RAW_TIMESTAMPS, false, "Print raw 
timestamps instead of iso8601 date strings");
         options.addOption(rawTimestamps);
 
-        Option partitionJsonLines= new Option(PARTITION_JSON_LINES, false, 
"Output json lines, by partition");
+        Option partitionJsonLines = new Option(PARTITION_JSON_LINES, false, 
"Output json lines, by partition");
         options.addOption(partitionJsonLines);
     }
 
@@ -160,6 +165,11 @@ public class SSTableExport
                                                System.out);
                 }
             }
+            else if (cmd.hasOption(ENUMERATE_TOMBSTONES_OPTION))
+            {
+                final ISSTableScanner currentScanner = sstable.getScanner();
+                process(currentScanner, Util.iterToStream(currentScanner), 
metadata);
+            }
             else
             {
                 IPartitioner partitioner = sstable.getPartitioner();
@@ -179,43 +189,9 @@ public class SSTableExport
                 {
                     currentScanner = sstable.getScanner();
                 }
-                Stream<UnfilteredRowIterator> partitions = 
Util.iterToStream(currentScanner).filter(i ->
-                    excludes.isEmpty() || 
!excludes.contains(metadata.partitionKeyType.getString(i.partitionKey().getKey()))
-                );
-                if (cmd.hasOption(DEBUG_OUTPUT_OPTION))
-                {
-                    AtomicLong position = new AtomicLong();
-                    partitions.forEach(partition ->
-                    {
-                        position.set(currentScanner.getCurrentPosition());
-
-                        if (!partition.partitionLevelDeletion().isLive())
-                        {
-                            System.out.println("[" + 
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" +
-                                               position.get() + " " + 
partition.partitionLevelDeletion());
-                        }
-                        if (!partition.staticRow().isEmpty())
-                        {
-                            System.out.println("[" + 
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" +
-                                               position.get() + " " + 
partition.staticRow().toString(metadata, true));
-                        }
-                        partition.forEachRemaining(row ->
-                        {
-                            System.out.println(
-                            "[" + 
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@"
-                            + position.get() + " " + row.toString(metadata, 
false, true));
-                            position.set(currentScanner.getCurrentPosition());
-                        });
-                    });
-                }
-                else if (cmd.hasOption(PARTITION_JSON_LINES))
-                {
-                    JsonTransformer.toJsonLines(currentScanner, partitions, 
cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out);
-                }
-                else
-                {
-                    JsonTransformer.toJson(currentScanner, partitions, 
cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out);
-                }
+
+                Stream<UnfilteredRowIterator> partitions = 
Util.iterToStream(currentScanner).filter(i -> excludes.isEmpty() || 
!excludes.contains(metadata.partitionKeyType.getString(i.partitionKey().getKey())));
+                process(currentScanner, partitions, metadata);
             }
         }
         catch (IOException e)
@@ -226,6 +202,54 @@ public class SSTableExport
         System.exit(0);
     }
 
+    private static void process(ISSTableScanner scanner, 
Stream<UnfilteredRowIterator> partitions, TableMetadata metadata) throws 
IOException
+    {
+        long nowInSeconds = FBUtilities.nowInSeconds();
+        boolean hasTombstoneOption = 
cmd.hasOption(ENUMERATE_TOMBSTONES_OPTION);
+
+        if (cmd.hasOption(DEBUG_OUTPUT_OPTION))
+        {
+            AtomicLong position = new AtomicLong();
+            partitions.forEach(partition ->
+            {
+                position.set(scanner.getCurrentPosition());
+
+                if (!partition.partitionLevelDeletion().isLive())
+                {
+                    System.out.println('[' + 
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" +
+                                       position.get() + ' ' + 
partition.partitionLevelDeletion());
+                }
+                if (!partition.staticRow().isEmpty())
+                {
+                    System.out.println('[' + 
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" +
+                                       position.get() + ' ' + 
partition.staticRow().toString(metadata, true));
+                }
+                partition.forEachRemaining(row ->
+                {
+                    boolean shouldPrint = true;
+                    if (hasTombstoneOption && row.isRow())
+                        shouldPrint = ((Row) row).hasDeletion(nowInSeconds);
+
+                    if (shouldPrint)
+                    {
+                        System.out.println('[' + 
metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@"
+                                           + position.get() + ' ' + 
row.toString(metadata, false, true));
+                    }
+
+                    position.set(scanner.getCurrentPosition());
+                });
+             });
+        }
+        else if (cmd.hasOption(PARTITION_JSON_LINES))
+        {
+            JsonTransformer.toJsonLines(scanner, partitions, 
cmd.hasOption(RAW_TIMESTAMPS), hasTombstoneOption, metadata, nowInSeconds, 
System.out);
+        }
+        else
+        {
+            JsonTransformer.toJson(scanner, partitions, 
cmd.hasOption(RAW_TIMESTAMPS), hasTombstoneOption, metadata, nowInSeconds, 
System.out);
+        }
+    }
+
     private static void printUsage()
     {
         String usage = String.format("sstabledump <sstable file path> 
<options>%n");
diff --git a/test/unit/org/apache/cassandra/tools/SSTableExportTest.java 
b/test/unit/org/apache/cassandra/tools/SSTableExportTest.java
index 2a29664a9a..77144f14c4 100644
--- a/test/unit/org/apache/cassandra/tools/SSTableExportTest.java
+++ b/test/unit/org/apache/cassandra/tools/SSTableExportTest.java
@@ -60,9 +60,10 @@ public class SSTableExportTest extends OfflineToolUtils
         String help = "usage: sstabledump <sstable file path> <options>\n" +
                        "Dump contents of given SSTable to standard output in 
JSON format.\n" +
                        " -d         CQL row per line internal 
representation\n" +
-                       " -e         enumerate partition keys only\n" +
+                       " -e         Enumerate partition keys only\n" +
                        " -k <arg>   List of included partition keys\n" +
                        " -l         Output json lines, by partition\n" +
+                       " -o         Enumerate tombstones only\n" +
                        " -t         Print raw timestamps instead of iso8601 
date strings\n" +
                        " -x <arg>   List of excluded partition keys\n";
         Assertions.assertThat(tool.getStdout()).isEqualTo(help);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to