carbondata git commit: [CARBONDATA-2965] support Benchmark command in CarbonCli [Forced Update!]

xuchuanyin Wed, 26 Sep 2018 00:48:17 -0700

Repository: carbondata
Updated Branches:
  refs/heads/master 23a9e7c5f -> e07df44a1 (forced update)



[CARBONDATA-2965] support Benchmark command in CarbonCli

A new command called "benchmark" is added in CarbonCli tool to output the scan 
performance of the specified file and column.
Example usage:
```bash
shell>java -jar carbondata-cli.jar org.apache.carbondata.CarbonCli -cmd 
benchmark -p hdfs://carbon1:9000/carbon.store/tpchcarbon_base/lineitem/ -a -c 
l_comment
```
will scan output the scan time of l_comment column in first file in the input 
folder and prints: (or using -f option to provide the data file instead of 
folder)

```
ReadHeaderAndFooter takes 12,598 us
ConvertFooter takes 4,712 us
ReadAllMetaAndConvertFooter takes 8,039 us

Scan column 'l_comment'
Blocklet#0: ColumnChunkIO takes 222,609 us
Blocklet#0: DecompressPage takes 111,985 us
Blocklet#1: ColumnChunkIO takes 186,522 us
Blocklet#1: DecompressPage takes 89,132 us
Blocklet#2: ColumnChunkIO takes 209,129 us
Blocklet#2: DecompressPage takes 84,051 us
```
This closes #2755


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e07df44a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e07df44a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e07df44a

Branch: refs/heads/master
Commit: e07df44a1db52304c54ab4e379f28b0f026449fd
Parents: 49f6715
Author: Jacky Li <jacky.li...@qq.com>
Authored: Sun Sep 23 00:01:04 2018 +0800
Committer: xuchuanyin <xuchuan...@hust.edu.cn>
Committed: Wed Sep 26 15:47:37 2018 +0800

----------------------------------------------------------------------
 .../core/util/DataFileFooterConverterV3.java    |   6 +-
 pom.xml                                         |   7 +-
 tools/cli/pom.xml                               |   5 +
 .../org/apache/carbondata/tool/CarbonCli.java   |  90 ++++----
 .../org/apache/carbondata/tool/Command.java     |  28 +++
 .../org/apache/carbondata/tool/DataFile.java    |  94 +++++++--
 .../org/apache/carbondata/tool/DataSummary.java | 188 ++++++-----------
 .../apache/carbondata/tool/FileCollector.java   | 147 +++++++++++++
 .../apache/carbondata/tool/ScanBenchmark.java   | 205 +++++++++++++++++++
 .../apache/carbondata/tool/CarbonCliTest.java   |  94 +++++----
 10 files changed, 622 insertions(+), 242 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java
 
b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java
index 41e22fd..438e3e3 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java
@@ -59,12 +59,16 @@ public class DataFileFooterConverterV3 extends 
AbstractDataFileFooterConverter {
    */
   @Override public DataFileFooter readDataFileFooter(TableBlockInfo 
tableBlockInfo)
       throws IOException {
-    DataFileFooter dataFileFooter = new DataFileFooter();
     CarbonHeaderReader carbonHeaderReader = new 
CarbonHeaderReader(tableBlockInfo.getFilePath());
     FileHeader fileHeader = carbonHeaderReader.readHeader();
     CarbonFooterReaderV3 reader =
         new CarbonFooterReaderV3(tableBlockInfo.getFilePath(), 
tableBlockInfo.getBlockOffset());
     FileFooter3 footer = reader.readFooterVersion3();
+    return convertDataFileFooter(fileHeader, footer);
+  }
+
+  public DataFileFooter convertDataFileFooter(FileHeader fileHeader, 
FileFooter3 footer) {
+    DataFileFooter dataFileFooter = new DataFileFooter();
     dataFileFooter.setVersionId(ColumnarFormatVersion.valueOf((short) 
fileHeader.getVersion()));
     dataFileFooter.setNumberOfRows(footer.getNum_rows());
     dataFileFooter.setSegmentInfo(getSegmentInfo(footer.getSegment_info()));

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index eff438b..00a5287 100644
--- a/pom.xml
+++ b/pom.xml
@@ -106,6 +106,7 @@
     <module>store/sdk</module>
     <module>store/search</module>
     <module>assembly</module>
+    <module>tools/cli</module>
   </modules>
 
   <properties>
@@ -718,12 +719,6 @@
         <module>datamap/mv/core</module>
       </modules>
     </profile>
-    <profile>
-      <id>tools</id>
-      <modules>
-        <module>tools/cli</module>
-      </modules>
-    </profile>
   </profiles>
 
 </project>

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/tools/cli/pom.xml
----------------------------------------------------------------------
diff --git a/tools/cli/pom.xml b/tools/cli/pom.xml
index 0d00438..60e69dc 100644
--- a/tools/cli/pom.xml
+++ b/tools/cli/pom.xml
@@ -25,6 +25,11 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>servlet-api</artifactId>
+      <version>2.5</version>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
----------------------------------------------------------------------
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java 
b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
index effb139..5725f8e 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
@@ -47,11 +47,16 @@ public class CarbonCli {
         .withDescription("the path which contains carbondata files, nested 
folder is supported")
         .withLongOpt("path")
         .create("p");
+    Option file = OptionBuilder.withArgName("file")
+        .hasArg()
+        .withDescription("the carbondata file path")
+        .withLongOpt("file")
+        .create("f");
 
     Option command = OptionBuilder
         .withArgName("command name")
         .hasArg()
-        .withDescription("command to execute, supported commands are: summary")
+        .withDescription("command to execute, supported commands are: summary, 
benchmark")
         .isRequired(true)
         .create("cmd");
 
@@ -70,6 +75,7 @@ public class CarbonCli {
     Options options = new Options();
     options.addOption(help);
     options.addOption(path);
+    options.addOption(file);
     options.addOption(command);
     options.addOption(all);
     options.addOption(schema);
@@ -87,71 +93,49 @@ public class CarbonCli {
   static void run(String[] args, PrintStream out) {
     Options options = buildOptions();
     CommandLineParser parser = new PosixParser();
-    try {
-      CommandLine line = parser.parse(options, args);
-      if (line.hasOption("h")) {
-        printHelp(options);
-        return;
-      }
-
-      String cmd = line.getOptionValue("cmd");
-      if (cmd.equalsIgnoreCase("summary")) {
-        runSummaryCommand(line, options, out);
-      } else {
-        out.println("command " + cmd + " is not supported");
-        printHelp(options);
-        return;
-      }
 
-      out.flush();
+    CommandLine line;
+    try {
+      line = parser.parse(options, args);
     } catch (ParseException exp) {
       out.println("Parsing failed. Reason: " + exp.getMessage());
-    } catch (IOException | MemoryException e) {
-      out.println(out);
+      return;
     }
-  }
 
-  private static void printHelp(Options options) {
-    HelpFormatter formatter = new HelpFormatter();
-    formatter.printHelp("CarbonCli", options);
-  }
+    if (line.hasOption("h")) {
+      printHelp(options);
+      return;
+    }
 
-  private static void runSummaryCommand(CommandLine line, Options options, 
PrintStream out)
-      throws IOException, MemoryException {
     String path = "";
     if (line.hasOption("p")) {
       path = line.getOptionValue("path");
+    }
+    out.println("Input Folder: " + path);
+
+    String cmd = line.getOptionValue("cmd");
+    Command command;
+    if (cmd.equalsIgnoreCase("summary")) {
+      command = new DataSummary(path, out);
+    } else if (cmd.equalsIgnoreCase("benchmark")) {
+      command = new ScanBenchmark(path, out);
     } else {
-      System.err.println("path is required");
+      out.println("command " + cmd + " is not supported");
       printHelp(options);
       return;
     }
-    DataSummary summary = new DataSummary(path, out);
-    if (summary.isEmpty()) {
-      System.out.println("no data file found");
-      return;
-    }
-    out.println("Input Folder: " + path);
-    summary.printBasic();
-    boolean printAll = false;
-    if (line.hasOption("a")) {
-      printAll = true;
-    }
-    if (line.hasOption("s") || printAll) {
-      summary.printSchema();
-    }
-    if (line.hasOption("m") || printAll) {
-      summary.printSegments();
-    }
-    if (line.hasOption("t") || printAll) {
-      summary.printTableProperties();
-    }
-    if (line.hasOption("b") || printAll) {
-      summary.printBlockletDetail();
-    }
-    if (line.hasOption("c")) {
-      String columName = line.getOptionValue("c");
-      summary.printColumnStats(columName);
+
+    try {
+      command.run(line);
+      out.flush();
+    } catch (IOException | MemoryException e) {
+      e.printStackTrace();
     }
   }
+
+  private static void printHelp(Options options) {
+    HelpFormatter formatter = new HelpFormatter();
+    formatter.printHelp("CarbonCli", options);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/tools/cli/src/main/java/org/apache/carbondata/tool/Command.java
----------------------------------------------------------------------
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/Command.java 
b/tools/cli/src/main/java/org/apache/carbondata/tool/Command.java
new file mode 100644
index 0000000..cb7d8df
--- /dev/null
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/Command.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.tool;
+
+import java.io.IOException;
+
+import org.apache.carbondata.core.memory.MemoryException;
+
+import org.apache.commons.cli.CommandLine;
+
+interface Command {
+  void run(CommandLine line) throws IOException, MemoryException;
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
----------------------------------------------------------------------
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java 
b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
index ea67829..da81d84 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
@@ -55,6 +55,8 @@ import static 
org.apache.carbondata.core.constants.CarbonCommonConstants.FILE_SE
  * Contains information extracted from a .carbondata file
  */
 class DataFile {
+  private CarbonFile dataFile;
+
   // file full path
   private String filePath;
 
@@ -80,33 +82,37 @@ class DataFile {
 
   private FileHeader header;
   private FileFooter3 footer;
+  private long footerOffset;
   private List<ColumnSchema> schema;
   private List<Blocklet> blocklets;
 
-  DataFile(CarbonFile file) throws IOException {
-    this.fileSizeInBytes = file.getSize();
+  DataFile(CarbonFile dataFile) {
+    this.dataFile = dataFile;
+    this.filePath = dataFile.getPath();
+    this.fileSizeInBytes = dataFile.getSize();
+  }
 
+  void collectAllMeta() throws IOException {
     FileHeader header = null;
     FileFooter3 footer = null;
     try {
-      header = readHeader(file);
+      header = readHeader();
     } catch (IOException e) {
-      throw new IOException("failed to read header in " + file.getPath(), e);
+      throw new IOException("failed to read header in " + dataFile.getPath(), 
e);
     }
     if (header.isSetSync_marker()) {
       // if sync_marker is set, it is a streaming format file
       throw new UnsupportedOperationException("streaming file is not 
supported");
     }
     try {
-      footer = readFooter(file);
+      footer = readFooter();
     } catch (IOException e) {
-      throw new IOException("failed to read footer in " + file.getPath(), e);
+      throw new IOException("failed to read footer in " + dataFile.getPath(), 
e);
     }
 
-    this.filePath = file.getPath();
     this.header = header;
     this.footer = footer;
-    String filePath = file.getPath();
+    String filePath = dataFile.getPath();
     // folder path that contains this file
     String fileName = filePath.substring(filePath.lastIndexOf(FILE_SEPARATOR));
     this.shardName = CarbonTablePath.getShardName(fileName);
@@ -130,6 +136,7 @@ class DataFile {
       for (int i = 0; i < schema.size(); i++) {
         columnDataSize.add(blockletInfo3.column_data_chunks_offsets.get(i) - 
previousChunkOffset);
         
columnMetaSize.add(blockletInfo3.column_data_chunks_length.get(i).longValue());
+        previousChunkOffset = blockletInfo3.column_data_chunks_offsets.get(i);
       }
       // last column chunk data size
       columnDataSize.add(fileSizeInBytes - footerSizeInBytes - 
previousChunkOffset);
@@ -146,17 +153,17 @@ class DataFile {
     assert (blockletSizeInBytes.size() == getNumBlocklets());
   }
 
-  private FileHeader readHeader(CarbonFile dataFile) throws IOException {
+  FileHeader readHeader() throws IOException {
     CarbonHeaderReader reader = new CarbonHeaderReader(dataFile.getPath());
     this.schema = reader.readSchema();
     return reader.readHeader();
   }
 
-  private FileFooter3 readFooter(CarbonFile dataFile) throws IOException {
+  FileFooter3 readFooter() throws IOException {
     this.fileReader = 
FileFactory.getFileHolder(FileFactory.getFileType(dataFile.getPath()));
     ByteBuffer buffer = 
fileReader.readByteBuffer(FileFactory.getUpdatedFilePath(
         dataFile.getPath()), dataFile.getSize() - 8, 8);
-    long footerOffset = buffer.getLong();
+    this.footerOffset = buffer.getLong();
     this.footerSizeInBytes = this.fileSizeInBytes - footerOffset;
     CarbonFooterReaderV3 footerReader =
         new CarbonFooterReaderV3(dataFile.getAbsolutePath(), footerOffset);
@@ -187,6 +194,53 @@ class DataFile {
     return schema;
   }
 
+  FileReader getFileReader() {
+    return fileReader;
+  }
+
+  long getFooterOffset() {
+    return footerOffset;
+  }
+
+  int getNumBlocklet() {
+    return blockletSizeInBytes.size();
+  }
+
+  long getFileSizeInBytes() {
+    return fileSizeInBytes;
+  }
+
+  int getColumnIndex(String columnName) {
+    List<ColumnSchema> columns = getSchema();
+    for (int i = 0; i < columns.size(); i++) {
+      if (columns.get(i).getColumnName().equalsIgnoreCase(columnName)) {
+        return i;
+      }
+    }
+    throw new IllegalArgumentException(columnName + " not found");
+  }
+
+  ColumnSchema getColumn(String columnName) {
+    List<ColumnSchema> columns = getSchema();
+    for (int i = 0; i < columns.size(); i++) {
+      if (columns.get(i).getColumnName().equalsIgnoreCase(columnName)) {
+        return columns.get(i);
+      }
+    }
+    throw new IllegalArgumentException(columnName + " not found");
+  }
+
+  int numDimensions() {
+    int numDimensions = 0;
+    List<ColumnSchema> columns = getSchema();
+    for (ColumnSchema column : columns) {
+      if (column.isDimensionColumn()) {
+        numDimensions++;
+      }
+    }
+    return numDimensions;
+  }
+
   private int getNumBlocklets() {
     return footer.blocklet_info_list3.size();
   }
@@ -396,28 +450,34 @@ class DataFile {
         if (column.isSortColumn()) {
           minValue = ByteUtil.toXorInt(min, 0, min.length);
           dataValue = ByteUtil.toXorInt(data, 0, data.length) - minValue;
-          factorValue = ByteUtil.toXorInt(max, 0, max.length) - 
ByteUtil.toXorInt(min, 0, min.length);
+          factorValue =
+              ByteUtil.toXorInt(max, 0, max.length) - ByteUtil.toXorInt(min, 
0, min.length);
         } else {
           minValue = ByteUtil.toLong(min, 0, min.length);
           dataValue = ByteUtil.toLong(data, 0, data.length) - minValue;
-          factorValue = ByteUtil.toLong(max, 0, max.length) - 
ByteUtil.toLong(min, 0, min.length);
+          factorValue =
+              ByteUtil.toLong(max, 0, max.length) - ByteUtil.toLong(min, 0, 
min.length);
         }
       } else if (column.getDataType() == DataTypes.LONG) {
         minValue = ByteUtil.toLong(min, 0, min.length);
         dataValue = ByteUtil.toLong(data, 0, data.length) - minValue;
-        factorValue = ByteUtil.toLong(max, 0, max.length) - 
ByteUtil.toLong(min, 0, min.length);
+        factorValue =
+            ByteUtil.toLong(max, 0, max.length) - ByteUtil.toLong(min, 0, 
min.length);
       } else if (column.getDataType() == DataTypes.DATE) {
         minValue = ByteUtil.toInt(min, 0, min.length);
         dataValue = ByteUtil.toInt(data, 0, data.length) - minValue;
-        factorValue = ByteUtil.toInt(max, 0, max.length) - ByteUtil.toInt(min, 
0, min.length);
+        factorValue =
+            ByteUtil.toInt(max, 0, max.length) - ByteUtil.toInt(min, 0, 
min.length);
       } else if (column.getDataType() == DataTypes.TIMESTAMP) {
         minValue = ByteUtil.toLong(min, 0, min.length);
         dataValue = ByteUtil.toLong(data, 0, data.length) - minValue;
-        factorValue = ByteUtil.toLong(max, 0, max.length) - 
ByteUtil.toLong(min, 0, min.length);
+        factorValue =
+            ByteUtil.toLong(max, 0, max.length) - ByteUtil.toLong(min, 0, 
min.length);
       } else if (column.getDataType() == DataTypes.DOUBLE) {
         minValue = ByteUtil.toDouble(min, 0, min.length);
         dataValue = ByteUtil.toDouble(data, 0, data.length) - minValue;
-        factorValue = ByteUtil.toDouble(max, 0, max.length) - 
ByteUtil.toDouble(min, 0, min.length);
+        factorValue =
+            ByteUtil.toDouble(max, 0, max.length) - ByteUtil.toDouble(min, 0, 
min.length);
       } else {
         throw new UnsupportedOperationException("data type: " + 
column.getDataType());
       }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
----------------------------------------------------------------------
diff --git 
a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java 
b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
index 7ca6951..6463977 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
@@ -20,15 +20,12 @@ package org.apache.carbondata.tool;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.nio.charset.Charset;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 import org.apache.carbondata.common.Strings;
 import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
@@ -40,7 +37,6 @@ import org.apache.carbondata.core.reader.CarbonHeaderReader;
 import org.apache.carbondata.core.statusmanager.LoadMetadataDetails;
 import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
 import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.core.util.path.CarbonTablePath;
 import org.apache.carbondata.format.BlockletInfo3;
 import org.apache.carbondata.format.FileFooter3;
 import org.apache.carbondata.format.FileHeader;
@@ -48,135 +44,89 @@ import org.apache.carbondata.format.TableInfo;
 
 import static 
org.apache.carbondata.core.constants.CarbonCommonConstants.DEFAULT_CHARSET;
 
+import org.apache.commons.cli.CommandLine;
+
 /**
  * Data Summary command implementation for {@link CarbonCli}
  */
-class DataSummary {
+class DataSummary implements Command {
   private String dataFolder;
   private PrintStream out;
 
-  private long numBlock;
-  private long numShard;
-  private long numBlocklet;
-  private long numPage;
-  private long numRow;
-  private long totalDataSize;
-
   // file path mapping to file object
-  private LinkedHashMap<String, DataFile> dataFiles = new LinkedHashMap<>();
-  private CarbonFile tableStatusFile;
-  private CarbonFile schemaFile;
+  private LinkedHashMap<String, DataFile> dataFiles;
 
-  DataSummary(String dataFolder, PrintStream out) throws IOException {
+  DataSummary(String dataFolder, PrintStream out) {
     this.dataFolder = dataFolder;
     this.out = out;
-    collectDataFiles();
-  }
-
-  private boolean isColumnarFile(String fileName) {
-    // if the timestamp in file name is "0", it is a streaming file
-    return fileName.endsWith(CarbonTablePath.CARBON_DATA_EXT) &&
-        
!CarbonTablePath.DataFileUtil.getTimeStampFromFileName(fileName).equals("0");
   }
 
-  private boolean isStreamFile(String fileName) {
-    // if the timestamp in file name is "0", it is a streaming file
-    return fileName.endsWith(CarbonTablePath.CARBON_DATA_EXT) &&
-        
CarbonTablePath.DataFileUtil.getTimeStampFromFileName(fileName).equals("0");
-  }
-
-  private void collectDataFiles() throws IOException {
-    Set<String> shards = new HashSet<>();
-    CarbonFile folder = FileFactory.getCarbonFile(dataFolder);
-    List<CarbonFile> files = folder.listFiles(true);
-    List<DataFile> unsortedFiles = new ArrayList<>();
-    for (CarbonFile file : files) {
-      if (isColumnarFile(file.getName())) {
-        DataFile dataFile = new DataFile(file);
-        unsortedFiles.add(dataFile);
-        collectNum(dataFile.getFooter());
-        shards.add(dataFile.getShardName());
-        totalDataSize += file.getSize();
-      } else if (file.getName().endsWith(CarbonTablePath.TABLE_STATUS_FILE)) {
-        tableStatusFile = file;
-      } else if (file.getName().startsWith(CarbonTablePath.SCHEMA_FILE)) {
-        schemaFile = file;
-      } else if (isStreamFile(file.getName())) {
-        out.println("WARN: input path contains streaming file, this tool does 
not support it yet, "
-            + "skipping it...");
-      }
+  @Override
+  public void run(CommandLine line) throws IOException, MemoryException {
+    FileCollector collector = new FileCollector(out);
+    collector.collectFiles(dataFolder);
+    collector.printBasicStats();
+    if (collector.getNumDataFiles() == 0) {
+      return;
     }
-    unsortedFiles.sort((o1, o2) -> {
-      if (o1.getShardName().equalsIgnoreCase(o2.getShardName())) {
-        return Integer.parseInt(o1.getPartNo()) - 
Integer.parseInt(o2.getPartNo());
-      } else {
-        return o1.getShardName().compareTo(o2.getShardName());
+    dataFiles = collector.getDataFiles();
+    boolean printAll = false;
+    if (line.hasOption("a")) {
+      printAll = true;
+    }
+    if (line.hasOption("s") || printAll) {
+      if (dataFiles.size() > 0) {
+        printSchema(dataFiles.entrySet().iterator().next().getValue());
       }
-    });
-    for (DataFile collectedFile : unsortedFiles) {
-      this.dataFiles.put(collectedFile.getFilePath(), collectedFile);
     }
-    numShard = shards.size();
-  }
-
-  private void collectNum(FileFooter3 footer) {
-    numBlock++;
-    numBlocklet += footer.blocklet_index_list.size();
-    numRow += footer.num_rows;
-    for (BlockletInfo3 blockletInfo3 : footer.blocklet_info_list3) {
-      numPage += blockletInfo3.number_number_of_pages;
+    if (line.hasOption("m") || printAll) {
+      printSegments(collector.getTableStatusFile());
+    }
+    if (line.hasOption("t") || printAll) {
+      printTableProperties(collector.getSchemaFile());
+    }
+    if (line.hasOption("b") || printAll) {
+      printBlockletDetail();
+    }
+    if (line.hasOption("c")) {
+      String columName = line.getOptionValue("c");
+      printColumnStats(columName);
     }
   }
 
-  void printBasic() {
-    out.println("## Summary");
-    out.println(
-        String.format("total: %,d blocks, %,d shards, %,d blocklets, %,d 
pages, %,d rows, %s",
-            numBlock, numShard, numBlocklet, numPage, numRow, 
Strings.formatSize(totalDataSize)));
-    out.println(
-        String.format("avg: %s/block, %s/blocklet, %,d rows/block, %,d 
rows/blocklet",
-            Strings.formatSize(totalDataSize / numBlock),
-            Strings.formatSize(totalDataSize / numBlocklet),
-            numRow / numBlock,
-            numRow / numBlocklet));
-  }
-
-  void printSchema() throws IOException {
-    if (dataFiles.size() > 0) {
-      String firstFile = dataFiles.keySet().iterator().next();
-      CarbonFile file = FileFactory.getCarbonFile(firstFile);
-      out.println();
-      out.println("## Schema");
-      out.println(String.format("schema in %s", file.getName()));
-      CarbonHeaderReader reader = new CarbonHeaderReader(file.getPath());
-      FileHeader header = reader.readHeader();
-      out.println("version: V" + header.version);
-      out.println("timestamp: " + new java.sql.Timestamp(header.time_stamp));
-      List<ColumnSchema> columns = reader.readSchema();
-      TablePrinter printer = new TablePrinter(
-          new String[]{"Column Name", "Data Type", "Column Type",
-              "SortColumn", "Encoding", "Ordinal", "Id"});
-      for (ColumnSchema column : columns) {
-        String shortColumnId = "NA";
-        if (column.getColumnUniqueId() != null && 
column.getColumnUniqueId().length() > 4) {
-          shortColumnId = "*" +
-              
column.getColumnUniqueId().substring(column.getColumnUniqueId().length() - 4);
-        }
-        printer.addRow(new String[]{
-            column.getColumnName(),
-            column.getDataType().getName(),
-            column.isDimensionColumn() ? "dimension" : "measure",
-            String.valueOf(column.isSortColumn()),
-            column.getEncodingList().toString(),
-            Integer.toString(column.getSchemaOrdinal()),
-            shortColumnId
-        });
+  private void printSchema(DataFile dataFile) throws IOException {
+    CarbonFile file = FileFactory.getCarbonFile(dataFile.getFilePath());
+    out.println();
+    out.println("## Schema");
+    out.println(String.format("schema in %s", file.getName()));
+    CarbonHeaderReader reader = new CarbonHeaderReader(file.getPath());
+    FileHeader header = reader.readHeader();
+    out.println("version: V" + header.version);
+    out.println("timestamp: " + new java.sql.Timestamp(header.time_stamp));
+    List<ColumnSchema> columns = reader.readSchema();
+    TablePrinter printer = new TablePrinter(
+        new String[]{"Column Name", "Data Type", "Column Type",
+            "SortColumn", "Encoding", "Ordinal", "Id"});
+    for (ColumnSchema column : columns) {
+      String shortColumnId = "NA";
+      if (column.getColumnUniqueId() != null && 
column.getColumnUniqueId().length() > 4) {
+        shortColumnId = "*" +
+            
column.getColumnUniqueId().substring(column.getColumnUniqueId().length() - 4);
       }
-      printer.printFormatted(out);
+      printer.addRow(new String[]{
+          column.getColumnName(),
+          column.getDataType().getName(),
+          column.isDimensionColumn() ? "dimension" : "measure",
+          String.valueOf(column.isSortColumn()),
+          column.getEncodingList().toString(),
+          Integer.toString(column.getSchemaOrdinal()),
+          shortColumnId
+      });
     }
+    printer.printFormatted(out);
   }
 
-  void printSegments() throws IOException {
+  private void printSegments(CarbonFile tableStatusFile) throws IOException {
     out.println();
     out.println("## Segment");
     if (tableStatusFile != null) {
@@ -215,7 +165,7 @@ class DataSummary {
     }
   }
 
-  void printTableProperties() throws IOException {
+  private void printTableProperties(CarbonFile schemaFile) throws IOException {
     out.println();
     out.println("## Table Properties");
     if (schemaFile != null) {
@@ -235,7 +185,7 @@ class DataSummary {
     }
   }
 
-  void printBlockletDetail() {
+  private void printBlockletDetail() {
     out.println();
     out.println("## Block Detail");
 
@@ -262,17 +212,12 @@ class DataSummary {
 
   private int getColumnIndex(String columnName) {
     if (dataFiles.size() > 0) {
-      List<ColumnSchema> columns = 
dataFiles.entrySet().iterator().next().getValue().getSchema();
-      for (int i = 0; i < columns.size(); i++) {
-        if (columns.get(i).getColumnName().equalsIgnoreCase(columnName)) {
-          return i;
-        }
-      }
+      return 
dataFiles.entrySet().iterator().next().getValue().getColumnIndex(columnName);
     }
     throw new RuntimeException("schema for column " + columnName + " not 
found");
   }
 
-  void printColumnStats(String columnName) throws IOException, MemoryException 
{
+  private void printColumnStats(String columnName) throws IOException, 
MemoryException {
     out.println();
     out.println("## Column Statistics for '" + columnName + "'");
     for (DataFile dataFile : dataFiles.values()) {
@@ -354,7 +299,4 @@ class DataSummary {
     }
   }
 
-  public boolean isEmpty() {
-    return dataFiles.size() == 0;
-  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
----------------------------------------------------------------------
diff --git 
a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java 
b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
new file mode 100644
index 0000000..6e3297f
--- /dev/null
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.tool;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.carbondata.common.Strings;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.format.BlockletInfo3;
+import org.apache.carbondata.format.FileFooter3;
+
+/**
+ * A helper to collect all data files, schema file, table status file in a 
given folder
+ */
+class FileCollector {
+
+  private long numBlock;
+  private long numShard;
+  private long numBlocklet;
+  private long numPage;
+  private long numRow;
+  private long totalDataSize;
+
+  // file path mapping to file object
+  private LinkedHashMap<String, DataFile> dataFiles = new LinkedHashMap<>();
+  private CarbonFile tableStatusFile;
+  private CarbonFile schemaFile;
+
+  private PrintStream out;
+
+  FileCollector(PrintStream out) {
+    this.out = out;
+  }
+
+  void collectFiles(String dataFolder) throws IOException {
+    Set<String> shards = new HashSet<>();
+    CarbonFile folder = FileFactory.getCarbonFile(dataFolder);
+    List<CarbonFile> files = folder.listFiles(true);
+    List<DataFile> unsortedFiles = new ArrayList<>();
+    for (CarbonFile file : files) {
+      if (isColumnarFile(file.getName())) {
+        DataFile dataFile = new DataFile(file);
+        dataFile.collectAllMeta();
+        unsortedFiles.add(dataFile);
+        collectNum(dataFile.getFooter());
+        shards.add(dataFile.getShardName());
+        totalDataSize += file.getSize();
+      } else if (file.getName().endsWith(CarbonTablePath.TABLE_STATUS_FILE)) {
+        tableStatusFile = file;
+      } else if (file.getName().startsWith(CarbonTablePath.SCHEMA_FILE)) {
+        schemaFile = file;
+      } else if (isStreamFile(file.getName())) {
+        out.println("WARN: input path contains streaming file, this tool does 
not support it yet, "
+            + "skipping it...");
+      }
+    }
+    unsortedFiles.sort((o1, o2) -> {
+      if (o1.getShardName().equalsIgnoreCase(o2.getShardName())) {
+        return Integer.parseInt(o1.getPartNo()) - 
Integer.parseInt(o2.getPartNo());
+      } else {
+        return o1.getShardName().compareTo(o2.getShardName());
+      }
+    });
+    for (DataFile collectedFile : unsortedFiles) {
+      this.dataFiles.put(collectedFile.getFilePath(), collectedFile);
+    }
+    numShard = shards.size();
+  }
+
+  private void collectNum(FileFooter3 footer) {
+    numBlock++;
+    numBlocklet += footer.blocklet_index_list.size();
+    numRow += footer.num_rows;
+    for (BlockletInfo3 blockletInfo3 : footer.blocklet_info_list3) {
+      numPage += blockletInfo3.number_number_of_pages;
+    }
+  }
+
+  private boolean isColumnarFile(String fileName) {
+    // if the timestamp in file name is "0", it is a streaming file
+    return fileName.endsWith(CarbonTablePath.CARBON_DATA_EXT) &&
+        
!CarbonTablePath.DataFileUtil.getTimeStampFromFileName(fileName).equals("0");
+  }
+
+  private boolean isStreamFile(String fileName) {
+    // if the timestamp in file name is "0", it is a streaming file
+    return fileName.endsWith(CarbonTablePath.CARBON_DATA_EXT) &&
+        
CarbonTablePath.DataFileUtil.getTimeStampFromFileName(fileName).equals("0");
+  }
+
+  // return file path mapping to file object
+  LinkedHashMap<String, DataFile> getDataFiles() {
+    return dataFiles;
+  }
+
+  CarbonFile getTableStatusFile() {
+    return tableStatusFile;
+  }
+
+  CarbonFile getSchemaFile() {
+    return schemaFile;
+  }
+
+  int getNumDataFiles() {
+    return dataFiles.size();
+  }
+
+  void printBasicStats() {
+    if (dataFiles.size() == 0) {
+      System.out.println("no data file found");
+      return;
+    }
+    out.println("## Summary");
+    out.println(
+        String.format("total: %,d blocks, %,d shards, %,d blocklets, %,d 
pages, %,d rows, %s",
+            numBlock, numShard, numBlocklet, numPage, numRow, 
Strings.formatSize(totalDataSize)));
+    out.println(
+        String.format("avg: %s/block, %s/blocklet, %,d rows/block, %,d 
rows/blocklet",
+            Strings.formatSize((float) totalDataSize / numBlock),
+            Strings.formatSize((float) totalDataSize / numBlocklet),
+            numRow / numBlock,
+            numRow / numBlocklet));
+  }
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/tools/cli/src/main/java/org/apache/carbondata/tool/ScanBenchmark.java
----------------------------------------------------------------------
diff --git 
a/tools/cli/src/main/java/org/apache/carbondata/tool/ScanBenchmark.java 
b/tools/cli/src/main/java/org/apache/carbondata/tool/ScanBenchmark.java
new file mode 100644
index 0000000..805d4e8
--- /dev/null
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/ScanBenchmark.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.tool;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.carbondata.common.Strings;
+import org.apache.carbondata.core.datastore.block.BlockletInfos;
+import org.apache.carbondata.core.datastore.block.TableBlockInfo;
+import org.apache.carbondata.core.datastore.chunk.AbstractRawColumnChunk;
+import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
+import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
+import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
+import 
org.apache.carbondata.core.datastore.chunk.reader.CarbonDataReaderFactory;
+import 
org.apache.carbondata.core.datastore.chunk.reader.DimensionColumnChunkReader;
+import 
org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReader;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.datastore.page.ColumnPage;
+import org.apache.carbondata.core.memory.MemoryException;
+import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
+import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
+import org.apache.carbondata.core.util.DataFileFooterConverterV3;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.format.FileFooter3;
+import org.apache.carbondata.format.FileHeader;
+
+import org.apache.commons.cli.CommandLine;
+
+class ScanBenchmark implements Command {
+
+  private String dataFolder;
+  private PrintStream out;
+  private DataFile file;
+
+  ScanBenchmark(String dataFolder, PrintStream out) {
+    this.dataFolder = dataFolder;
+    this.out = out;
+  }
+
+  @Override
+  public void run(CommandLine line) throws IOException, MemoryException {
+    if (line.hasOption("f")) {
+      String filePath = line.getOptionValue("f");
+      file = new DataFile(FileFactory.getCarbonFile(filePath));
+    } else {
+      FileCollector collector = new FileCollector(out);
+      collector.collectFiles(dataFolder);
+      if (collector.getNumDataFiles() == 0) {
+        return;
+      }
+      Map<String, DataFile> dataFiles = collector.getDataFiles();
+      file = dataFiles.entrySet().iterator().next().getValue();
+    }
+
+    out.println("\n## Benchmark");
+    AtomicReference<FileHeader> fileHeaderRef = new AtomicReference<>();
+    AtomicReference<FileFooter3> fileFoorterRef = new AtomicReference<>();
+    AtomicReference<DataFileFooter> convertedFooterRef = new 
AtomicReference<>();
+
+    // benchmark read header and footer time
+    benchmarkOperation("ReadHeaderAndFooter", () -> {
+      fileHeaderRef.set(file.readHeader());
+      fileFoorterRef.set(file.readFooter());
+    });
+    FileHeader fileHeader = fileHeaderRef.get();
+    FileFooter3 fileFooter = fileFoorterRef.get();
+
+    // benchmark convert footer
+    benchmarkOperation("ConvertFooter", () -> {
+      convertFooter(fileHeader, fileFooter);
+    });
+
+    // benchmark read all meta and convert footer
+    benchmarkOperation("ReadAllMetaAndConvertFooter", () -> {
+      DataFileFooter footer = readAndConvertFooter(file);
+      convertedFooterRef.set(footer);
+    });
+
+    if (line.hasOption("c")) {
+      String columnName = line.getOptionValue("c");
+      out.println("\nScan column '" + columnName + "'");
+
+      DataFileFooter footer = convertedFooterRef.get();
+      AtomicReference<AbstractRawColumnChunk> columnChunk = new 
AtomicReference<>();
+      int columnIndex = file.getColumnIndex(columnName);
+      boolean dimension = file.getColumn(columnName).isDimensionColumn();
+      for (int i = 0; i < footer.getBlockletList().size(); i++) {
+        int blockletId = i;
+        out.println(String.format("Blocklet#%d: total size %s, %,d pages, %,d 
rows",
+            blockletId,
+            Strings.formatSize(file.getColumnDataSizeInBytes(blockletId, 
columnIndex)),
+            footer.getBlockletList().get(blockletId).getNumberOfPages(),
+            footer.getBlockletList().get(blockletId).getNumberOfRows()));
+        benchmarkOperation("\tColumnChunk IO", () -> {
+          columnChunk.set(readBlockletColumnChunkIO(footer, blockletId, 
columnIndex, dimension));
+        });
+
+        if (dimensionColumnChunkReader != null) {
+          benchmarkOperation("\tDecompress Pages", () -> {
+            decompressDimensionPages(columnChunk.get(),
+                footer.getBlockletList().get(blockletId).getNumberOfPages());
+          });
+        } else {
+          benchmarkOperation("\tDecompress Pages", () -> {
+            decompressMeasurePages(columnChunk.get(),
+                footer.getBlockletList().get(blockletId).getNumberOfPages());
+          });
+        }
+      }
+    }
+
+  }
+
+  interface Operation {
+    void run() throws IOException, MemoryException;
+  }
+
+  private void benchmarkOperation(String opName, Operation op) throws 
IOException, MemoryException {
+    long start, end;
+    start = System.nanoTime();
+    op.run();
+    end = System.nanoTime();
+    out.println(String.format("%s takes %,d us", opName, (end - start) / 
1000));
+  }
+
+  private DataFileFooter readAndConvertFooter(DataFile file) throws 
IOException {
+    int numBlocklets = file.getNumBlocklet();
+    BlockletInfos blockletInfos = new BlockletInfos(numBlocklets, 0, 
numBlocklets);
+    String segmentId = 
CarbonTablePath.DataFileUtil.getSegmentNo(file.getFilePath());
+    TableBlockInfo blockInfo =
+        new TableBlockInfo(file.getFilePath(), file.getFooterOffset(),
+            segmentId, new String[]{"localhost"}, file.getFileSizeInBytes(),
+            blockletInfos, ColumnarFormatVersion.V3, new String[0]);
+
+    DataFileFooterConverterV3 converter = new DataFileFooterConverterV3();
+    return converter.readDataFileFooter(blockInfo);
+  }
+
+  private DataFileFooter convertFooter(FileHeader fileHeader, FileFooter3 
fileFooter) {
+    DataFileFooterConverterV3 converter = new DataFileFooterConverterV3();
+    return converter.convertDataFileFooter(fileHeader, fileFooter);
+  }
+
+  private DimensionColumnChunkReader dimensionColumnChunkReader;
+  private MeasureColumnChunkReader measureColumnChunkReader;
+
+  private AbstractRawColumnChunk readBlockletColumnChunkIO(
+      DataFileFooter footer, int blockletId, int columnIndex, boolean 
dimension)
+      throws IOException {
+    BlockletInfo blockletInfo = footer.getBlockletList().get(blockletId);
+    if (dimension) {
+      dimensionColumnChunkReader = CarbonDataReaderFactory.getInstance()
+          .getDimensionColumnChunkReader(ColumnarFormatVersion.V3, 
blockletInfo,
+              footer.getSegmentInfo().getColumnCardinality(), 
file.getFilePath(), false);
+      return 
dimensionColumnChunkReader.readRawDimensionChunk(file.getFileReader(), 
columnIndex);
+    } else {
+      columnIndex = columnIndex - file.numDimensions();
+      assert (columnIndex >= 0);
+      measureColumnChunkReader = CarbonDataReaderFactory.getInstance()
+          .getMeasureColumnChunkReader(ColumnarFormatVersion.V3, blockletInfo,
+              file.getFilePath(), false);
+      return 
measureColumnChunkReader.readRawMeasureChunk(file.getFileReader(), columnIndex);
+    }
+  }
+
+  private DimensionColumnPage[] decompressDimensionPages(
+      AbstractRawColumnChunk rawColumnChunk, int numPages) throws IOException, 
MemoryException {
+    DimensionColumnPage[] pages = new DimensionColumnPage[numPages];
+    for (int i = 0; i < pages.length; i++) {
+      pages[i] = dimensionColumnChunkReader.decodeColumnPage(
+          (DimensionRawColumnChunk) rawColumnChunk, i);
+    }
+    return pages;
+  }
+
+  private ColumnPage[] decompressMeasurePages(
+      AbstractRawColumnChunk rawColumnChunk, int numPages) throws IOException, 
MemoryException {
+    ColumnPage[] pages = new ColumnPage[numPages];
+    for (int i = 0; i < pages.length; i++) {
+      pages[i] = measureColumnChunkReader.decodeColumnPage(
+          (MeasureRawColumnChunk) rawColumnChunk, i);
+    }
+    return pages;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
----------------------------------------------------------------------
diff --git 
a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java 
b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
index 0d0d6b5..4dc34c4 100644
--- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
+++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
@@ -45,9 +45,9 @@ public class CarbonCliTest {
     fields[0] = new Field("name", DataTypes.STRING);
     fields[1] = new Field("age", DataTypes.INT);
 
-    TestUtil.writeFilesAndVerify(5000000, new Schema(fields), path, new 
String[]{"age"},
+    TestUtil.writeFilesAndVerify(5000000, new Schema(fields), path, new 
String[]{"name"},
         true, 3, 8, true);
-    TestUtil.writeFilesAndVerify(5000000, new Schema(fields), path, new 
String[]{"age"},
+    TestUtil.writeFilesAndVerify(5000000, new Schema(fields), path, new 
String[]{"name"},
         true, 3, 8, true);
   }
 
@@ -69,7 +69,7 @@ public class CarbonCliTest {
   }
 
   @Test
-  public void testOutputIndividual() {
+  public void testSummaryOutputIndividual() {
     String[] args = {"-cmd", "summary", "-p", path};
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     PrintStream stream = new PrintStream(out);
@@ -79,20 +79,19 @@ public class CarbonCliTest {
         output.contains(
             "Input Folder: ./CarbonCliTest\n"
           + "## Summary\n"
-          + "total: 6 blocks, 2 shards, 12 blocklets, 314 pages, 10,000,000 
rows, 30.72MB\n"
-          + "avg: 5.12MB/block, 2.56MB/blocklet, 1,666,666 rows/block, 833,333 
rows/blocklet"));
+          + "total: 6 blocks, 2 shards, 14 blocklets, 314 pages, 10,000,000 
rows, 32.26MB\n"
+          + "avg: 5.38MB/block, 2.30MB/blocklet, 1,666,666 rows/block, 714,285 
rows/blocklet"));
 
     String[] args2 = {"-cmd", "summary", "-p", path, "-s"};
     out = new ByteArrayOutputStream();
     stream = new PrintStream(out);
     CarbonCli.run(args2, stream);
     output = new String(out.toByteArray());
-
     Assert.assertTrue(
         output.contains(
             "Column Name  Data Type  Column Type  SortColumn  Encoding         
 Ordinal  Id  \n"
-          + "age          INT        dimension    true        [INVERTED_INDEX] 
 1        NA  \n"
-          + "name         STRING     dimension    false       [INVERTED_INDEX] 
 0        NA  \n"));
+          + "name         STRING     dimension    true        [INVERTED_INDEX] 
 0        NA  \n"
+          + "age          INT        measure      false       []               
 1        NA  "));
 
     String[] args3 = {"-cmd", "summary", "-p", path, "-t"};
     out = new ByteArrayOutputStream();
@@ -113,38 +112,37 @@ public class CarbonCliTest {
     stream = new PrintStream(out);
     CarbonCli.run(args4, stream);
     output = new String(out.toByteArray());
-
     Assert.assertTrue(
         output.contains(
-            "BLK  BLKLT  NumPages  NumRows  Size    \n"
-          + "0    0      29        928,000  2.60MB  \n"
-          + "0    1      29        928,000  2.60MB  \n"
-          + "1    0      29        928,000  2.60MB  \n"
-          + "1    1      29        928,000  2.60MB  \n"
-          + "2    0      22        704,000  2.54MB  \n"
-          + "2    1      19        584,000  2.43MB  "));
+            "BLK  BLKLT  NumPages  NumRows  Size      \n"
+          + "0    0      25        800,000  2.58MB    \n"
+          + "0    1      25        800,000  2.58MB    \n"
+          + "1    0      25        800,000  2.58MB    \n"
+          + "1    1      25        800,000  2.58MB    \n"
+          + "2    0      25        800,000  2.58MB    \n"
+          + "2    1      25        800,000  2.58MB    \n"
+          + "2    2      7         200,000  660.74KB  "));
 
     String[] args5 = {"-cmd", "summary", "-p", path, "-c", "name"};
     out = new ByteArrayOutputStream();
     stream = new PrintStream(out);
     CarbonCli.run(args5, stream);
     output = new String(out.toByteArray());
-
     Assert.assertTrue(
         output.contains(
             "BLK  BLKLT  Meta Size  Data Size  LocalDict  DictEntries  
DictSize  AvgPageSize  Min%    Max%    \n"
-          + "0    0      1.82KB     5.19MB     false      0            0.0B    
  11.96KB      robot0  robot9  \n"
-          + "0    1      1.82KB     2.60MB     false      0            0.0B    
  11.96KB      robot0  robot9  \n"
-          + "1    0      1.82KB     5.19MB     false      0            0.0B    
  11.96KB      robot0  robot9  \n"
-          + "1    1      1.82KB     2.60MB     false      0            0.0B    
  11.96KB      robot0  robot9  \n"
-          + "2    0      1.38KB     4.97MB     false      0            0.0B    
  11.92KB      robot0  robot9  \n"
-          + "2    1      1.19KB     2.43MB     false      0            0.0B    
  11.42KB      robot0  robot9  \n"));
+          + "0    0      1.72KB     295.89KB   false      0            0.0B    
  11.77KB      robot0  robot1  \n"
+          + "0    1      1.72KB     295.89KB   false      0            0.0B    
  11.77KB      robot1  robot3  \n"
+          + "1    0      1.72KB     295.89KB   false      0            0.0B    
  11.77KB      robot3  robot4  \n"
+          + "1    1      1.72KB     295.89KB   false      0            0.0B    
  11.77KB      robot4  robot6  \n"
+          + "2    0      1.72KB     295.89KB   false      0            0.0B    
  11.77KB      robot6  robot7  \n"
+          + "2    1      1.72KB     295.89KB   false      0            0.0B    
  11.77KB      robot8  robot9  \n"
+          + "2    2      492.0B     74.03KB    false      0            0.0B    
  10.51KB      robot9  robot9  "));
   }
 
   @Test
-  public void testOutputAll() {
+  public void testSummaryOutputAll() {
     String[] args = {"-cmd", "summary", "-p", path, "-a", "-c", "age"};
-
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     PrintStream stream = new PrintStream(out);
     CarbonCli.run(args, stream);
@@ -153,14 +151,14 @@ public class CarbonCliTest {
         output.contains(
             "Input Folder: ./CarbonCliTest\n"
           + "## Summary\n"
-          + "total: 6 blocks, 2 shards, 12 blocklets, 314 pages, 10,000,000 
rows, 30.72MB\n"
-          + "avg: 5.12MB/block, 2.56MB/blocklet, 1,666,666 rows/block, 833,333 
rows/blocklet"));
+          + "total: 6 blocks, 2 shards, 14 blocklets, 314 pages, 10,000,000 
rows, 32.26MB\n"
+          + "avg: 5.38MB/block, 2.30MB/blocklet, 1,666,666 rows/block, 714,285 
rows/blocklet\n"));
 
     Assert.assertTrue(
         output.contains(
             "Column Name  Data Type  Column Type  SortColumn  Encoding         
 Ordinal  Id  \n"
-          + "age          INT        dimension    true        [INVERTED_INDEX] 
 1        NA  \n"
-          + "name         STRING     dimension    false       [INVERTED_INDEX] 
 0        NA  \n"));
+          + "name         STRING     dimension    true        [INVERTED_INDEX] 
 0        NA  \n"
+          + "age          INT        measure      false       []               
 1        NA  \n"));
 
     Assert.assertTrue(
         output.contains(
@@ -172,23 +170,35 @@ public class CarbonCliTest {
 
     Assert.assertTrue(
         output.contains(
-            "BLK  BLKLT  NumPages  NumRows  Size    \n"
-          + "0    0      29        928,000  2.60MB  \n"
-          + "0    1      29        928,000  2.60MB  \n"
-          + "1    0      29        928,000  2.60MB  \n"
-          + "1    1      29        928,000  2.60MB  \n"
-          + "2    0      22        704,000  2.54MB  \n"
-          + "2    1      19        584,000  2.43MB  "));
+            "BLK  BLKLT  NumPages  NumRows  Size      \n"
+          + "0    0      25        800,000  2.58MB    \n"
+          + "0    1      25        800,000  2.58MB    \n"
+          + "1    0      25        800,000  2.58MB    \n"
+          + "1    1      25        800,000  2.58MB    \n"
+          + "2    0      25        800,000  2.58MB    \n"
+          + "2    1      25        800,000  2.58MB    \n"
+          + "2    2      7         200,000  660.74KB  "));
 
     Assert.assertTrue(
         output.contains(
           "BLK  BLKLT  Meta Size  Data Size  LocalDict  DictEntries  DictSize  
AvgPageSize  Min%  Max%   \n"
-        + "0    0      1.81KB     2.26MB     false      0            0.0B      
79.61KB      0.0   15.5   \n"
-        + "0    1      1.81KB     2.26MB     false      0            0.0B      
79.60KB      15.5  30.9   \n"
-        + "1    0      1.81KB     2.26MB     false      0            0.0B      
79.62KB      30.9  46.4   \n"
-        + "1    1      1.81KB     2.26MB     false      0            0.0B      
79.60KB      46.4  61.9   \n"
-        + "2    0      1.37KB     2.28MB     false      0            0.0B      
106.11KB     61.9  80.5   \n"
-        + "2    1      1.19KB     2.22MB     false      0            0.0B      
119.55KB     80.5  100.0  "));
+        + "0    0      2.90KB     4.87MB     false      0            0.0B      
93.76KB      0.0   100.0  \n"
+        + "0    1      2.90KB     2.29MB     false      0            0.0B      
93.76KB      0.0   100.0  \n"
+        + "1    0      2.90KB     4.87MB     false      0            0.0B      
93.76KB      0.0   100.0  \n"
+        + "1    1      2.90KB     2.29MB     false      0            0.0B      
93.76KB      0.0   100.0  \n"
+        + "2    0      2.90KB     5.52MB     false      0            0.0B      
93.76KB      0.0   100.0  \n"
+        + "2    1      2.90KB     2.94MB     false      0            0.0B      
93.76KB      0.0   100.0  \n"
+        + "2    2      830.0B     586.81KB   false      0            0.0B      
83.71KB      0.0   100.0 "));
+  }
+
+  @Test
+  public void testBenchmark() {
+    String[] args = {"-cmd", "benchmark", "-p", path, "-a", "-c", "name"};
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    PrintStream stream = new PrintStream(out);
+    CarbonCli.run(args, stream);
+    String output = new String(out.toByteArray());
+    System.out.println(output);
   }
 
   @After

carbondata git commit: [CARBONDATA-2965] support Benchmark command in CarbonCli [Forced Update!]

Reply via email to