This is an automated email from the ASF dual-hosted git repository. smiklosovic pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push: new b11909b611 Make sstabledump possible to show tombstones only b11909b611 is described below commit b11909b611de811ed2f030848820a17c77df7013 Author: Stefan Miklosovic <smikloso...@apache.org> AuthorDate: Tue Sep 24 19:04:17 2024 +0200 Make sstabledump possible to show tombstones only patch by Stefan Miklosovic; reviewed by Brad Schoening for CASSANDRA-19939 --- CHANGES.txt | 1 + .../pages/managing/tools/sstable/sstabledump.adoc | 11 +- .../apache/cassandra/tools/JsonTransformer.java | 227 +++++++++++++-------- .../org/apache/cassandra/tools/SSTableExport.java | 102 +++++---- .../apache/cassandra/tools/SSTableExportTest.java | 3 +- 5 files changed, 219 insertions(+), 125 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index bb087d07c0..1255333b9c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Make sstabledump possible to show tombstones only (CASSANDRA-19939) * Ensure that RFP queries potentially stale replicas even with only key columns in the row filter (CASSANDRA-19938) * Allow nodes to change IP address while upgrading to TCM (CASSANDRA-19921) * Retain existing keyspace params on system tables after upgrade (CASSANDRA-19916) diff --git a/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc b/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc index 90f66b8854..df00fac767 100644 --- a/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc +++ b/doc/modules/cassandra/pages/managing/tools/sstable/sstabledump.adoc @@ -17,9 +17,10 @@ sstabledump <options> <sstable file path> |-d |CQL row per line internal representation |-e |Enumerate partition keys only |-k <arg> |Partition key -|-x <arg> |Excluded partition key(s) -|-t |Print raw timestamps instead of iso8601 date strings |-l |Output each row as a separate JSON object +|-o |Enumerate tombstones only +|-t |Print raw timestamps instead of iso8601 date strings +|-x <arg> |Excluded partition key(s) |=== If necessary, use sstableutil first to find out the sstables used by a @@ -238,6 +239,12 @@ cat eventlog_dump_2018Jul26_excludekeys } .... +== Dump tombstones only + +It is possible to display only tombstones since CASSANDRA-19939. You enable this feature by `-o` flag. This option +is useful to use if you are interested only in tombstones and the output is very long. This way, you find tombstones +faster. + == Display raw timestamps By default, dates are displayed in iso8601 date format. Using the -t diff --git a/src/java/org/apache/cassandra/tools/JsonTransformer.java b/src/java/org/apache/cassandra/tools/JsonTransformer.java index 8debfd3b75..9ffa6be00a 100644 --- a/src/java/org/apache/cassandra/tools/JsonTransformer.java +++ b/src/java/org/apache/cassandra/tools/JsonTransformer.java @@ -61,6 +61,7 @@ import org.apache.cassandra.schema.ColumnMetadata; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.transport.ProtocolVersion; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.FBUtilities; import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis; @@ -83,14 +84,20 @@ public final class JsonTransformer private boolean rawTime = false; + private boolean tombstonesOnly = false; + + private long nowInSeconds; + private long currentPosition = 0; - private JsonTransformer(JsonGenerator json, ISSTableScanner currentScanner, boolean rawTime, TableMetadata metadata, boolean isJsonLines) + private JsonTransformer(JsonGenerator json, ISSTableScanner currentScanner, boolean rawTime, boolean tombstonesOnly, TableMetadata metadata, long nowInSeconds, boolean isJsonLines) { this.json = json; this.metadata = metadata; this.currentScanner = currentScanner; this.rawTime = rawTime; + this.tombstonesOnly = tombstonesOnly; + this.nowInSeconds = nowInSeconds; if (isJsonLines) { @@ -107,24 +114,24 @@ public final class JsonTransformer } } - public static void toJson(ISSTableScanner currentScanner, Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata metadata, OutputStream out) + public static void toJson(ISSTableScanner currentScanner, Stream<UnfilteredRowIterator> partitions, boolean rawTime, boolean tombstonesOnly, TableMetadata metadata, long nowInSeconds, OutputStream out) throws IOException { try (JsonGenerator json = jsonFactory.createGenerator(new OutputStreamWriter(out, StandardCharsets.UTF_8))) { - JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, metadata, false); + JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, tombstonesOnly, metadata, nowInSeconds, false); json.writeStartArray(); partitions.forEach(transformer::serializePartition); json.writeEndArray(); } } - public static void toJsonLines(ISSTableScanner currentScanner, Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata metadata, OutputStream out) - throws IOException + public static void toJsonLines(ISSTableScanner currentScanner, Stream<UnfilteredRowIterator> partitions, boolean rawTime, boolean tombstonesOnly, TableMetadata metadata, long nowInSeconds, OutputStream out) + throws IOException { try (JsonGenerator json = jsonFactory.createGenerator(new OutputStreamWriter(out, StandardCharsets.UTF_8))) { - JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, metadata, true); + JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, tombstonesOnly, metadata, nowInSeconds, true); partitions.forEach(transformer::serializePartition); } } @@ -133,7 +140,7 @@ public final class JsonTransformer { try (JsonGenerator json = jsonFactory.createGenerator(new OutputStreamWriter(out, StandardCharsets.UTF_8))) { - JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, metadata, false); + JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, false, metadata, FBUtilities.nowInSeconds(), false); json.writeStartArray(); keys.forEach(transformer::serializePartitionKey); json.writeEndArray(); @@ -206,49 +213,18 @@ public final class JsonTransformer { try { - json.writeStartObject(); - json.writeObjectField("table kind", metadata.kind.name()); - - json.writeFieldName("partition"); - json.writeStartObject(); - json.writeFieldName("key"); - serializePartitionKey(partition.partitionKey()); - json.writeNumberField("position", this.currentScanner.getCurrentPosition()); - - if (!partition.partitionLevelDeletion().isLive()) - serializeDeletion(partition.partitionLevelDeletion()); - - json.writeEndObject(); - - json.writeFieldName("rows"); - json.writeStartArray(); - updatePosition(); - - if (partition.staticRow() != null) + boolean shouldSerialize = true; + if (tombstonesOnly) { - if (!partition.staticRow().isEmpty()) - serializeRow(partition.staticRow()); - updatePosition(); - } + shouldSerialize = partition.partitionLevelDeletion() != null && !partition.partitionLevelDeletion().isLive(); - Unfiltered unfiltered; - while (partition.hasNext()) - { - unfiltered = partition.next(); - if (unfiltered instanceof Row) - { - serializeRow((Row) unfiltered); - } - else if (unfiltered instanceof RangeTombstoneMarker) - { - serializeTombstone((RangeTombstoneMarker) unfiltered); - } - updatePosition(); + // check if some row should be printed + if (!shouldSerialize) + shouldSerialize = containsSerializableRow(partition); } - json.writeEndArray(); - - json.writeEndObject(); + if (shouldSerialize) + serializePartitionInternal(partition); } catch (IOException e) @@ -258,61 +234,146 @@ public final class JsonTransformer } } - private void serializeRow(Row row) + private void serializePartitionInternal(UnfilteredRowIterator partition) throws IOException { - try + json.writeStartObject(); + json.writeObjectField("table kind", metadata.kind.name()); + + json.writeFieldName("partition"); + json.writeStartObject(); + json.writeFieldName("key"); + serializePartitionKey(partition.partitionKey()); + json.writeNumberField("position", this.currentScanner.getCurrentPosition()); + + if (!partition.partitionLevelDeletion().isLive()) + serializeDeletion(partition.partitionLevelDeletion()); + + json.writeEndObject(); + + json.writeFieldName("rows"); + json.writeStartArray(); + updatePosition(); + + if (partition.staticRow() != null) { - json.writeStartObject(); - String rowType = row.isStatic() ? "static_block" : "row"; - json.writeFieldName("type"); - json.writeString(rowType); - json.writeNumberField("position", this.currentPosition); + if (!partition.staticRow().isEmpty()) + serializeRow(partition.staticRow()); + updatePosition(); + } - // Only print clustering information for non-static rows. - if (!row.isStatic()) + Unfiltered unfiltered; + while (partition.hasNext()) + { + unfiltered = partition.next(); + if (unfiltered instanceof Row) { - serializeClustering(row.clustering()); + serializeRow((Row) unfiltered); } + else if (unfiltered instanceof RangeTombstoneMarker) + { + serializeTombstone((RangeTombstoneMarker) unfiltered); + } + updatePosition(); + } + + json.writeEndArray(); + + json.writeEndObject(); + } + + private void serializeRow(Row row) + { + try + { + if (shouldSerializeRow(row)) + serializeRowInternal(row); + } + catch (IOException e) + { + logger.error("Fatal error parsing row.", e); + } + } - LivenessInfo liveInfo = row.primaryKeyLivenessInfo(); - if (!liveInfo.isEmpty()) + private boolean containsSerializableRow(UnfilteredRowIterator partition) + { + boolean shouldSerialize = false; + Unfiltered unfiltered; + while (partition.hasNext()) + { + unfiltered = partition.next(); + if (unfiltered instanceof Row) { - objectIndenter.setCompact(false); - json.writeFieldName("liveness_info"); - objectIndenter.setCompact(true); - json.writeStartObject(); - json.writeFieldName("tstamp"); - json.writeString(dateString(TimeUnit.MICROSECONDS, liveInfo.timestamp())); - if (liveInfo.isExpiring()) + if (shouldSerializeRow((Row) unfiltered)) { - json.writeNumberField("ttl", liveInfo.ttl()); - json.writeFieldName("expires_at"); - json.writeString(dateString(TimeUnit.SECONDS, liveInfo.localExpirationTime())); - json.writeFieldName("expired"); - json.writeBoolean(liveInfo.localExpirationTime() < (currentTimeMillis() / 1000)); + shouldSerialize = true; + break; } - json.writeEndObject(); - objectIndenter.setCompact(false); } - - // If this is a deletion, indicate that, otherwise write cells. - if (!row.deletion().isLive()) + else if (unfiltered instanceof RangeTombstoneMarker) { - serializeDeletion(row.deletion().time()); + shouldSerialize = true; + break; } - json.writeFieldName("cells"); - json.writeStartArray(); - for (ColumnData cd : row) + } + + partition.close(); + + return shouldSerialize; + } + + private boolean shouldSerializeRow(Row row) + { + return !tombstonesOnly || row.hasDeletion(nowInSeconds); + } + + private void serializeRowInternal(Row row) throws IOException + { + json.writeStartObject(); + String rowType = row.isStatic() ? "static_block" : "row"; + json.writeFieldName("type"); + json.writeString(rowType); + json.writeNumberField("position", this.currentPosition); + + // Only print clustering information for non-static rows. + if (!row.isStatic()) + { + serializeClustering(row.clustering()); + } + + LivenessInfo liveInfo = row.primaryKeyLivenessInfo(); + if (!liveInfo.isEmpty()) + { + objectIndenter.setCompact(false); + json.writeFieldName("liveness_info"); + objectIndenter.setCompact(true); + json.writeStartObject(); + json.writeFieldName("tstamp"); + json.writeString(dateString(TimeUnit.MICROSECONDS, liveInfo.timestamp())); + if (liveInfo.isExpiring()) { - serializeColumnData(cd, liveInfo); + json.writeNumberField("ttl", liveInfo.ttl()); + json.writeFieldName("expires_at"); + json.writeString(dateString(TimeUnit.SECONDS, liveInfo.localExpirationTime())); + json.writeFieldName("expired"); + json.writeBoolean(liveInfo.localExpirationTime() < (currentTimeMillis() / 1000)); } - json.writeEndArray(); json.writeEndObject(); + objectIndenter.setCompact(false); } - catch (IOException e) + + // If this is a deletion, indicate that, otherwise write cells. + if (!row.deletion().isLive()) { - logger.error("Fatal error parsing row.", e); + serializeDeletion(row.deletion().time()); } + json.writeFieldName("cells"); + json.writeStartArray(); + for (ColumnData cd : row) + { + serializeColumnData(cd, liveInfo); + } + json.writeEndArray(); + json.writeEndObject(); } private void serializeTombstone(RangeTombstoneMarker tombstone) diff --git a/src/java/org/apache/cassandra/tools/SSTableExport.java b/src/java/org/apache/cassandra/tools/SSTableExport.java index 05ec576553..0181e9f29f 100644 --- a/src/java/org/apache/cassandra/tools/SSTableExport.java +++ b/src/java/org/apache/cassandra/tools/SSTableExport.java @@ -36,6 +36,7 @@ import org.apache.commons.cli.PosixParser; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PartitionPosition; +import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Bounds; @@ -67,6 +68,7 @@ public class SSTableExport private static final String DEBUG_OUTPUT_OPTION = "d"; private static final String EXCLUDE_KEY_OPTION = "x"; private static final String ENUMERATE_KEYS_OPTION = "e"; + private static final String ENUMERATE_TOMBSTONES_OPTION = "o"; private static final String RAW_TIMESTAMPS = "t"; private static final String PARTITION_JSON_LINES = "l"; @@ -87,16 +89,19 @@ public class SSTableExport excludeKey.setArgs(500); options.addOption(excludeKey); - Option optEnumerate = new Option(ENUMERATE_KEYS_OPTION, false, "enumerate partition keys only"); + Option optEnumerate = new Option(ENUMERATE_KEYS_OPTION, false, "Enumerate partition keys only"); options.addOption(optEnumerate); + Option optTombstones = new Option(ENUMERATE_TOMBSTONES_OPTION, false, "Enumerate tombstones only"); + options.addOption(optTombstones); + Option debugOutput = new Option(DEBUG_OUTPUT_OPTION, false, "CQL row per line internal representation"); options.addOption(debugOutput); Option rawTimestamps = new Option(RAW_TIMESTAMPS, false, "Print raw timestamps instead of iso8601 date strings"); options.addOption(rawTimestamps); - Option partitionJsonLines= new Option(PARTITION_JSON_LINES, false, "Output json lines, by partition"); + Option partitionJsonLines = new Option(PARTITION_JSON_LINES, false, "Output json lines, by partition"); options.addOption(partitionJsonLines); } @@ -160,6 +165,11 @@ public class SSTableExport System.out); } } + else if (cmd.hasOption(ENUMERATE_TOMBSTONES_OPTION)) + { + final ISSTableScanner currentScanner = sstable.getScanner(); + process(currentScanner, Util.iterToStream(currentScanner), metadata); + } else { IPartitioner partitioner = sstable.getPartitioner(); @@ -179,43 +189,9 @@ public class SSTableExport { currentScanner = sstable.getScanner(); } - Stream<UnfilteredRowIterator> partitions = Util.iterToStream(currentScanner).filter(i -> - excludes.isEmpty() || !excludes.contains(metadata.partitionKeyType.getString(i.partitionKey().getKey())) - ); - if (cmd.hasOption(DEBUG_OUTPUT_OPTION)) - { - AtomicLong position = new AtomicLong(); - partitions.forEach(partition -> - { - position.set(currentScanner.getCurrentPosition()); - - if (!partition.partitionLevelDeletion().isLive()) - { - System.out.println("[" + metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" + - position.get() + " " + partition.partitionLevelDeletion()); - } - if (!partition.staticRow().isEmpty()) - { - System.out.println("[" + metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" + - position.get() + " " + partition.staticRow().toString(metadata, true)); - } - partition.forEachRemaining(row -> - { - System.out.println( - "[" + metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" - + position.get() + " " + row.toString(metadata, false, true)); - position.set(currentScanner.getCurrentPosition()); - }); - }); - } - else if (cmd.hasOption(PARTITION_JSON_LINES)) - { - JsonTransformer.toJsonLines(currentScanner, partitions, cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out); - } - else - { - JsonTransformer.toJson(currentScanner, partitions, cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out); - } + + Stream<UnfilteredRowIterator> partitions = Util.iterToStream(currentScanner).filter(i -> excludes.isEmpty() || !excludes.contains(metadata.partitionKeyType.getString(i.partitionKey().getKey()))); + process(currentScanner, partitions, metadata); } } catch (IOException e) @@ -226,6 +202,54 @@ public class SSTableExport System.exit(0); } + private static void process(ISSTableScanner scanner, Stream<UnfilteredRowIterator> partitions, TableMetadata metadata) throws IOException + { + long nowInSeconds = FBUtilities.nowInSeconds(); + boolean hasTombstoneOption = cmd.hasOption(ENUMERATE_TOMBSTONES_OPTION); + + if (cmd.hasOption(DEBUG_OUTPUT_OPTION)) + { + AtomicLong position = new AtomicLong(); + partitions.forEach(partition -> + { + position.set(scanner.getCurrentPosition()); + + if (!partition.partitionLevelDeletion().isLive()) + { + System.out.println('[' + metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" + + position.get() + ' ' + partition.partitionLevelDeletion()); + } + if (!partition.staticRow().isEmpty()) + { + System.out.println('[' + metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" + + position.get() + ' ' + partition.staticRow().toString(metadata, true)); + } + partition.forEachRemaining(row -> + { + boolean shouldPrint = true; + if (hasTombstoneOption && row.isRow()) + shouldPrint = ((Row) row).hasDeletion(nowInSeconds); + + if (shouldPrint) + { + System.out.println('[' + metadata.partitionKeyType.getString(partition.partitionKey().getKey()) + "]@" + + position.get() + ' ' + row.toString(metadata, false, true)); + } + + position.set(scanner.getCurrentPosition()); + }); + }); + } + else if (cmd.hasOption(PARTITION_JSON_LINES)) + { + JsonTransformer.toJsonLines(scanner, partitions, cmd.hasOption(RAW_TIMESTAMPS), hasTombstoneOption, metadata, nowInSeconds, System.out); + } + else + { + JsonTransformer.toJson(scanner, partitions, cmd.hasOption(RAW_TIMESTAMPS), hasTombstoneOption, metadata, nowInSeconds, System.out); + } + } + private static void printUsage() { String usage = String.format("sstabledump <sstable file path> <options>%n"); diff --git a/test/unit/org/apache/cassandra/tools/SSTableExportTest.java b/test/unit/org/apache/cassandra/tools/SSTableExportTest.java index 2a29664a9a..77144f14c4 100644 --- a/test/unit/org/apache/cassandra/tools/SSTableExportTest.java +++ b/test/unit/org/apache/cassandra/tools/SSTableExportTest.java @@ -60,9 +60,10 @@ public class SSTableExportTest extends OfflineToolUtils String help = "usage: sstabledump <sstable file path> <options>\n" + "Dump contents of given SSTable to standard output in JSON format.\n" + " -d CQL row per line internal representation\n" + - " -e enumerate partition keys only\n" + + " -e Enumerate partition keys only\n" + " -k <arg> List of included partition keys\n" + " -l Output json lines, by partition\n" + + " -o Enumerate tombstones only\n" + " -t Print raw timestamps instead of iso8601 date strings\n" + " -x <arg> List of excluded partition keys\n"; Assertions.assertThat(tool.getStdout()).isEqualTo(help); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org