[ 
https://issues.apache.org/jira/browse/AVRO-1858?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16703784#comment-16703784
 ] 

ASF GitHub Bot commented on AVRO-1858:
--------------------------------------

dkulp closed pull request #100: AVRO-1858 add tojson head mode
URL: https://github.com/apache/avro/pull/100
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java 
b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
index fcc89caf0..79625e3cd 100644
--- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
+++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
@@ -20,12 +20,14 @@
 import java.io.BufferedInputStream;
 import java.io.InputStream;
 import java.io.PrintStream;
+import java.util.ArrayList;
 import java.util.List;
 
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
 import joptsimple.OptionSpec;
 
+import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileStream;
 import org.apache.avro.io.DatumWriter;
@@ -36,6 +38,7 @@
 
 /** Reads a data file and dumps to JSON */
 public class DataFileReadTool implements Tool {
+  private static final long DEFAULT_HEAD_COUNT = 10;
 
   @Override
   public String getName() {
@@ -53,10 +56,14 @@ public int run(InputStream stdin, PrintStream out, 
PrintStream err,
     OptionParser optionParser = new OptionParser();
     OptionSpec<Void> prettyOption = optionParser
         .accepts("pretty", "Turns on pretty printing.");
+    String headDesc = String.format("Converts the first X records (default is 
%d).", DEFAULT_HEAD_COUNT);
+    OptionSpec<String> headOption = optionParser.accepts("head", 
headDesc).withOptionalArg();
 
     OptionSet optionSet = optionParser.parse(args.toArray(new String[0]));
     Boolean pretty = optionSet.has(prettyOption);
-    List<String> nargs = (List<String>)optionSet.nonOptionArguments();
+    List<String> nargs = new 
ArrayList<String>((List<String>)optionSet.nonOptionArguments());
+
+    long headCount = getHeadCount(optionSet, headOption, nargs);
 
     if (nargs.size() != 1) {
       printHelp(err);
@@ -73,8 +80,10 @@ public int run(InputStream stdin, PrintStream out, 
PrintStream err,
       Schema schema = streamReader.getSchema();
       DatumWriter<Object> writer = new GenericDatumWriter<Object>(schema);
       JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out, 
pretty);
-      for (Object datum : streamReader)
+      for(long recordCount = 0; streamReader.hasNext() && recordCount < 
headCount; recordCount++) {
+        Object datum = streamReader.next();
         writer.write(datum, encoder);
+      }
       encoder.flush();
       out.println();
       out.flush();
@@ -84,8 +93,28 @@ public int run(InputStream stdin, PrintStream out, 
PrintStream err,
     return 0;
   }
 
+  private static long getHeadCount(OptionSet optionSet, OptionSpec<String> 
headOption, List<String> nargs) {
+    long headCount = Long.MAX_VALUE;
+    if(optionSet.has(headOption)) {
+      headCount = DEFAULT_HEAD_COUNT;
+      List<String> headValues = optionSet.valuesOf(headOption);
+      if(headValues.size() > 0) {
+        // if the value parses to int, assume it's meant to go with --head
+        // otherwise assume it was an optionSet.nonOptionArgument and add back 
to the list
+        // TODO: support input filenames whose whole path+name is int parsable?
+        try {
+          headCount = Long.parseLong(headValues.get(0));
+          if(headCount < 0) throw new AvroRuntimeException("--head count must 
not be negative");
+        } catch(NumberFormatException ex) {
+          nargs.addAll(headValues);
+        }
+      }
+    }
+    return headCount;
+  }
+
   private void printHelp(PrintStream ps) {
-    ps.println("tojson --pretty input-file");
+    ps.println("tojson [--pretty] [--head[=X]] input-file");
     ps.println();
     ps.println(getShortDescription());
     ps.println("A dash ('-') can be given as an input file to use stdin");
diff --git 
a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java 
b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
index 0270b713f..473ac2d4d 100644
--- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
+++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
@@ -35,6 +35,7 @@
 import java.util.Collections;
 import java.util.List;
 
+import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.AvroTestUtil;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Type;
@@ -47,7 +48,7 @@
 
 @SuppressWarnings("deprecation")
 public class TestDataFileTools {
-  static final int COUNT = 10;
+  static final int COUNT = 15;
   static File sampleFile;
   static String jsonData;
   static Schema schema;
@@ -117,6 +118,43 @@ public void testReadToJsonPretty() throws Exception {
         run(new DataFileReadTool(), "--pretty", sampleFile.getPath()));
   }
 
+  @Test
+  public void testReadHeadDefaultCount() throws Exception {
+    String expectedJson = jsonData.substring(0, 20); // first 10 numbers
+    assertEquals(expectedJson,
+      run(new DataFileReadTool(), "--head", sampleFile.getPath()));
+  }
+
+  @Test
+  public void testReadHeadEquals3Count() throws Exception {
+    String expectedJson = jsonData.substring(0, 6); // first 3 numbers
+    assertEquals(expectedJson,
+      run(new DataFileReadTool(), "--head=3", sampleFile.getPath()));
+  }
+
+  @Test
+  public void testReadHeadSpace5Count() throws Exception {
+    String expectedJson = jsonData.substring(0, 10); // first 5 numbers
+    assertEquals(expectedJson,
+      run(new DataFileReadTool(), "--head", "5", sampleFile.getPath()));
+  }
+
+  @Test
+  public void testReadHeadLongCount() throws Exception {
+    assertEquals(jsonData,
+      run(new DataFileReadTool(), "--head=3000000000", sampleFile.getPath()));
+  }
+
+  @Test
+  public void testReadHeadEqualsZeroCount() throws Exception {
+    assertEquals("\n", run(new DataFileReadTool(), "--head=0", 
sampleFile.getPath()));
+  }
+
+  @Test(expected = AvroRuntimeException.class)
+  public void testReadHeadNegativeCount() throws Exception {
+    assertEquals("\n", run(new DataFileReadTool(), "--head=-5", 
sampleFile.getPath()));
+  }
+
   @Test
   public void testGetMeta() throws Exception {
     String output = run(new DataFileGetMetaTool(), sampleFile.getPath());


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Update DataFileReadTool (tojson) to support a "head" concept
> ------------------------------------------------------------
>
>                 Key: AVRO-1858
>                 URL: https://issues.apache.org/jira/browse/AVRO-1858
>             Project: Apache Avro
>          Issue Type: Improvement
>          Components: java
>    Affects Versions: 1.8.1
>            Reporter: Mike Hurley
>            Assignee: Mike Hurley
>            Priority: Major
>
> It would be nice if the tojson operator supported a "head" concept in order 
> to get a sampling of records in an Avro file.
> Allow specifying a maximum record count to display. If no max is given in 
> head mode, use a reasonable default (like 10).



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to