[ 
https://issues.apache.org/jira/browse/CASSANDRA-13848?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Kevin Wern updated CASSANDRA-13848:
-----------------------------------
    Reviewer: Jeff Jirsa
      Status: Patch Available  (was: Open)

>From 834cab8a0a67dbbefa608ddd47109bb9883025a2 Mon Sep 17 00:00:00 2001
From: Kevin Wern <kevin.m.w...@gmail.com>
Date: Mon, 9 Oct 2017 04:26:25 -0400
Subject: [PATCH] sstabledump: add -l option for jsonl

---
 .../apache/cassandra/tools/JsonTransformer.java    | 35 +++++++++++++++++-----
 .../org/apache/cassandra/tools/SSTableExport.java  |  8 +++++
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/src/java/org/apache/cassandra/tools/JsonTransformer.java 
b/src/java/org/apache/cassandra/tools/JsonTransformer.java
index e6aaf07..0c7ed7e 100644
--- a/src/java/org/apache/cassandra/tools/JsonTransformer.java
+++ b/src/java/org/apache/cassandra/tools/JsonTransformer.java
@@ -56,6 +56,7 @@ import org.codehaus.jackson.JsonGenerator;
 import org.codehaus.jackson.impl.Indenter;
 import org.codehaus.jackson.util.DefaultPrettyPrinter.NopIndenter;
 import org.codehaus.jackson.util.DefaultPrettyPrinter;
+import org.codehaus.jackson.util.MinimalPrettyPrinter;
 
 public final class JsonTransformer
 {
@@ -78,17 +79,26 @@ public final class JsonTransformer
 
     private long currentPosition = 0;
 
-    private JsonTransformer(JsonGenerator json, ISSTableScanner 
currentScanner, boolean rawTime, TableMetadata metadata)
+    private JsonTransformer(JsonGenerator json, ISSTableScanner 
currentScanner, boolean rawTime, TableMetadata metadata, boolean isJsonLines)
     {
         this.json = json;
         this.metadata = metadata;
         this.currentScanner = currentScanner;
         this.rawTime = rawTime;
 
-        DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
-        prettyPrinter.indentObjectsWith(objectIndenter);
-        prettyPrinter.indentArraysWith(arrayIndenter);
-        json.setPrettyPrinter(prettyPrinter);
+        if (isJsonLines)
+        {
+            MinimalPrettyPrinter minimalPrettyPrinter = new 
MinimalPrettyPrinter();
+            minimalPrettyPrinter.setRootValueSeparator("\n");
+            json.setPrettyPrinter(minimalPrettyPrinter);
+        }
+        else
+        {
+            DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
+            prettyPrinter.indentObjectsWith(objectIndenter);
+            prettyPrinter.indentArraysWith(arrayIndenter);
+            json.setPrettyPrinter(prettyPrinter);
+        }
     }
 
     public static void toJson(ISSTableScanner currentScanner, 
Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata 
metadata, OutputStream out)
@@ -96,18 +106,28 @@ public final class JsonTransformer
     {
         try (JsonGenerator json = jsonFactory.createJsonGenerator(new 
OutputStreamWriter(out, StandardCharsets.UTF_8)))
         {
-            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, metadata);
+            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, metadata, false);
             json.writeStartArray();
             partitions.forEach(transformer::serializePartition);
             json.writeEndArray();
         }
     }
 
+    public static void toJsonLines(ISSTableScanner currentScanner, 
Stream<UnfilteredRowIterator> partitions, boolean rawTime, TableMetadata 
metadata, OutputStream out)
+            throws IOException
+    {
+        try (JsonGenerator json = jsonFactory.createJsonGenerator(new 
OutputStreamWriter(out, StandardCharsets.UTF_8)))
+        {
+            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, metadata, true);
+            partitions.forEach(transformer::serializePartition);
+        }
+    }
+
     public static void keysToJson(ISSTableScanner currentScanner, 
Stream<DecoratedKey> keys, boolean rawTime, TableMetadata metadata, 
OutputStream out) throws IOException
     {
         try (JsonGenerator json = jsonFactory.createJsonGenerator(new 
OutputStreamWriter(out, StandardCharsets.UTF_8)))
         {
-            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, metadata);
+            JsonTransformer transformer = new JsonTransformer(json, 
currentScanner, rawTime, metadata, false);
             json.writeStartArray();
             keys.forEach(transformer::serializePartitionKey);
             json.writeEndArray();
@@ -221,6 +241,7 @@ public final class JsonTransformer
                 json.writeEndObject();
             }
         }
+
         catch (IOException e)
         {
             String key = 
metadata.partitionKeyType.getString(partition.partitionKey().getKey());
diff --git a/src/java/org/apache/cassandra/tools/SSTableExport.java 
b/src/java/org/apache/cassandra/tools/SSTableExport.java
index 95e3ed6..4079ee7 100644
--- a/src/java/org/apache/cassandra/tools/SSTableExport.java
+++ b/src/java/org/apache/cassandra/tools/SSTableExport.java
@@ -62,6 +62,7 @@ public class SSTableExport
     private static final String EXCLUDE_KEY_OPTION = "x";
     private static final String ENUMERATE_KEYS_OPTION = "e";
     private static final String RAW_TIMESTAMPS = "t";
+    private static final String PARTITION_JSON_LINES = "l";
 
     private static final Options options = new Options();
     private static CommandLine cmd;
@@ -88,6 +89,9 @@ public class SSTableExport
 
         Option rawTimestamps = new Option(RAW_TIMESTAMPS, false, "Print raw 
timestamps instead of iso8601 date strings");
         options.addOption(rawTimestamps);
+
+        Option partitionJsonLines= new Option(PARTITION_JSON_LINES, false, 
"Output json lines, by partition");
+        options.addOption(partitionJsonLines);
     }
 
     /**
@@ -194,6 +198,10 @@ public class SSTableExport
                         });
                     });
                 }
+                else if (cmd.hasOption(PARTITION_JSON_LINES))
+                {
+                    JsonTransformer.toJsonLines(currentScanner, partitions, 
cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out);
+                }
                 else
                 {
                     JsonTransformer.toJson(currentScanner, partitions, 
cmd.hasOption(RAW_TIMESTAMPS), metadata, System.out);
-- 
2.10.1 (Apple Git-78)


> Allow sstabledump to do a json object per partition to better handle large 
> sstables
> -----------------------------------------------------------------------------------
>
>                 Key: CASSANDRA-13848
>                 URL: https://issues.apache.org/jira/browse/CASSANDRA-13848
>             Project: Cassandra
>          Issue Type: New Feature
>          Components: Tools
>            Reporter: Jeff Jirsa
>            Assignee: Kevin Wern
>            Priority: Trivial
>              Labels: lhf
>
> sstable2json / sstabledump make a huge json document of the whole file. For 
> very large sstables this makes it impossible to load in memory to do anything 
> with it. Allowing users to Break it into small json objects per partition 
> would be useful.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to