[ https://issues.apache.org/jira/browse/CASSANDRA-13848?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Kevin Wern updated CASSANDRA-13848: ----------------------------------- Reviewer: Jeff Jirsa Status: Patch Available (was: Open) >From 834cab8a0a67dbbefa608ddd47109bb9883025a2 Mon Sep 17 00:00:00 2001 From: Kevin Wern <kevin.m.w...@gmail.com> Date: Mon, 9 Oct 2017 04:26:25 -0400 Subject: [PATCH] sstabledump: add -l option for jsonl --- .../apache/cassandra/tools/JsonTransformer.java | 35 +++++++++++++++++----- .../org/apache/cassandra/tools/SSTableExport.java | 8 +++++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/java/org/apache/cassandra/tools/JsonTransformer.java b/src/java/org/apache/cassandra/tools/JsonTransformer.java index e6aaf07..0c7ed7e 100644 --- a/src/java/org/apache/cassandra/tools/JsonTransformer.java +++ b/src/java/org/apache/cassandra/tools/JsonTransformer.java @@ -56,6 +56,7 @@ import org.codehaus.jackson.JsonGenerator; import org.codehaus.jackson.impl.Indenter; import org.codehaus.jackson.util.DefaultPrettyPrinter.NopIndenter; import org.codehaus.jackson.util.DefaultPrettyPrinter; +import org.codehaus.jackson.util.MinimalPrettyPrinter; public final class JsonTransformer { @@ -78,17 +79,26 @@ public final class JsonTransformer private long currentPosition = 0; - private JsonTransformer(JsonGenerator json, ISSTableScanner currentScanner, boolean rawTime, TableMetadata metadata) + private JsonTransformer(JsonGenerator json, ISSTableScanner currentScanner, boolean rawTime, TableMetadata metadata, boolean isJsonLines) { this.json = json; this.metadata = metadata; this.currentScanner = currentScanner; this.rawTime = rawTime; - DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter(); - prettyPrinter.indentObjectsWith(objectIndenter); - prettyPrinter.indentArraysWith(arrayIndenter); - json.setPrettyPrinter(prettyPrinter); + if (isJsonLines) + { + MinimalPrettyPrinter minimalPrettyPrinter = new MinimalPrettyPrinter(); + minimalPrettyPrinter.setRootValueSeparator("\n"); + json.setPrettyPrinter(minimalPrettyPrinter); + } + else + { + DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter(); + prettyPrinter.indentObjectsWith(objectIndenter); + prettyPrinter.indentArraysWith(arrayIndenter); + json.setPrettyPrinter(prettyPrinter); + } } public static void toJson(ISSTableScanner currentScanner, Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata metadata, OutputStream out) @@ -96,18 +106,28 @@ public final class JsonTransformer { try (JsonGenerator json = jsonFactory.createJsonGenerator(new OutputStreamWriter(out, StandardCharsets.UTF_8))) { - JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, metadata); + JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, metadata, false); json.writeStartArray(); partitions.forEach(transformer::serializePartition); json.writeEndArray(); } } + public static void toJsonLines(ISSTableScanner currentScanner, Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata metadata, OutputStream out) + throws IOException + { + try (JsonGenerator json = jsonFactory.createJsonGenerator(new OutputStreamWriter(out, StandardCharsets.UTF_8))) + { + JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, metadata, true); + partitions.forEach(transformer::serializePartition); + } + } + public static void keysToJson(ISSTableScanner currentScanner, Stream<DecoratedKey> keys, boolean rawTime, TableMetadata metadata, OutputStream out) throws IOException { try (JsonGenerator json = jsonFactory.createJsonGenerator(new OutputStreamWriter(out, StandardCharsets.UTF_8))) { - JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, metadata); + JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime, metadata, false); json.writeStartArray(); keys.forEach(transformer::serializePartitionKey); json.writeEndArray(); @@ -221,6 +241,7 @@ public final class JsonTransformer json.writeEndObject(); } } + catch (IOException e) { String key = metadata.partitionKeyType.getString(partition.partitionKey().getKey()); diff --git a/src/java/org/apache/cassandra/tools/SSTableExport.java b/src/java/org/apache/cassandra/tools/SSTableExport.java index 95e3ed6..4079ee7 100644 --- a/src/java/org/apache/cassandra/tools/SSTableExport.java +++ b/src/java/org/apache/cassandra/tools/SSTableExport.java @@ -62,6 +62,7 @@ public class SSTableExport private static final String EXCLUDE_KEY_OPTION = "x"; private static final String ENUMERATE_KEYS_OPTION = "e"; private static final String RAW_TIMESTAMPS = "t"; + private static final String PARTITION_JSON_LINES = "l"; private static final Options options = new Options(); private static CommandLine cmd; @@ -88,6 +89,9 @@ public class SSTableExport Option rawTimestamps = new Option(RAW_TIMESTAMPS, false, "Print raw timestamps instead of iso8601 date strings"); options.addOption(rawTimestamps); + + Option partitionJsonLines= new Option(PARTITION_JSON_LINES, false, "Output json lines, by partition"); + options.addOption(partitionJsonLines); } /** @@ -194,6 +198,10 @@ public class SSTableExport }); }); } + else if (cmd.hasOption(PARTITION_JSON_LINES)) + { + JsonTransformer.toJsonLines(currentScanner, partitions, cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out); + } else { JsonTransformer.toJson(currentScanner, partitions, cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out); -- 2.10.1 (Apple Git-78) > Allow sstabledump to do a json object per partition to better handle large > sstables > ----------------------------------------------------------------------------------- > > Key: CASSANDRA-13848 > URL: https://issues.apache.org/jira/browse/CASSANDRA-13848 > Project: Cassandra > Issue Type: New Feature > Components: Tools > Reporter: Jeff Jirsa > Assignee: Kevin Wern > Priority: Trivial > Labels: lhf > > sstable2json / sstabledump make a huge json document of the whole file. For > very large sstables this makes it impossible to load in memory to do anything > with it. Allowing users to Break it into small json objects per partition > would be useful. -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org