[CARBONDATA-2566] Optimize CarbonReaderExample Optimize CarbonReaderExample 1.Add different data type, including date and timestamp 2. update the doc 3.invoke the Schema schema = CarbonSchemaReader .readSchemaInSchemaFile(dataFiles[0].getAbsolutePath()) .asOriginOrder();
This closes #2356 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/56bf4e42 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/56bf4e42 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/56bf4e42 Branch: refs/heads/carbonstore Commit: 56bf4e420747ddeb800fc7f004a6ec0d9f5e7d3f Parents: 9469e6b Author: xubo245 <xub...@huawei.com> Authored: Thu May 31 15:52:57 2018 +0800 Committer: kumarvishal09 <kumarvishal1...@gmail.com> Committed: Fri Jun 1 16:33:28 2018 +0530 ---------------------------------------------------------------------- docs/sdk-guide.md | 15 ++-- .../examples/sdk/CarbonReaderExample.java | 92 +++++++++++++++++--- 2 files changed, 89 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/56bf4e42/docs/sdk-guide.md ---------------------------------------------------------------------- diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md index ec70919..2371b33 100644 --- a/docs/sdk-guide.md +++ b/docs/sdk-guide.md @@ -415,17 +415,22 @@ External client can make use of this reader to read CarbonData files without Car String path = "./testWriteFiles"; CarbonReader reader = CarbonReader .builder(path, "_temp") - .projection(new String[]{"name", "age"}) + .projection(new String[]{"stringField", "shortField", "intField", "longField", + "doubleField", "boolField", "dateField", "timeField", "decimalField"}) .build(); // 2. Read data + long day = 24L * 3600 * 1000; int i = 0; while (reader.hasNext()) { - Object[] row = (Object[]) reader.readNextRow(); - System.out.println(row[0] + "\t" + row[1]); - i++; + Object[] row = (Object[]) reader.readNextRow(); + System.out.println(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", + i, row[0], row[1], row[2], row[3], row[4], row[5], + new Date((day * ((int) row[6]))), new Timestamp((long) row[7] / 1000), row[8] + )); + i++; } - + // 3. Close this reader reader.close(); ``` http://git-wip-us.apache.org/repos/asf/carbondata/blob/56bf4e42/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java ---------------------------------------------------------------------- diff --git a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java index d7886c0..8d3ff0d 100644 --- a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java +++ b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java @@ -18,16 +18,19 @@ package org.apache.carbondata.examples.sdk; import java.io.File; +import java.io.FilenameFilter; +import java.sql.Date; +import java.sql.Timestamp; import org.apache.commons.io.FileUtils; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.sdk.file.CarbonReader; +import org.apache.carbondata.sdk.file.CarbonSchemaReader; import org.apache.carbondata.sdk.file.CarbonWriter; import org.apache.carbondata.sdk.file.Field; import org.apache.carbondata.sdk.file.Schema; - /** * Example fo CarbonReader with close method * After readNextRow of CarbonReader, User should close the reader, @@ -39,36 +42,99 @@ public class CarbonReaderExample { try { FileUtils.deleteDirectory(new File(path)); - Field[] fields = new Field[2]; - fields[0] = new Field("name", DataTypes.STRING); - fields[1] = new Field("age", DataTypes.INT); + Field[] fields = new Field[9]; + fields[0] = new Field("stringField", DataTypes.STRING); + fields[1] = new Field("shortField", DataTypes.SHORT); + fields[2] = new Field("intField", DataTypes.INT); + fields[3] = new Field("longField", DataTypes.LONG); + fields[4] = new Field("doubleField", DataTypes.DOUBLE); + fields[5] = new Field("boolField", DataTypes.BOOLEAN); + fields[6] = new Field("dateField", DataTypes.DATE); + fields[7] = new Field("timeField", DataTypes.TIMESTAMP); + fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2)); CarbonWriter writer = CarbonWriter.builder() - .outputPath(path) - .persistSchemaFile(true) - .buildWriterForCSVInput(new Schema(fields)); + .outputPath(path) + .buildWriterForCSVInput(new Schema(fields)); for (int i = 0; i < 10; i++) { - writer.write(new String[]{"robot" + (i % 10), String.valueOf(i)}); + String[] row2 = new String[]{ + "robot" + (i % 10), + String.valueOf(i), + String.valueOf(i), + String.valueOf(Long.MAX_VALUE - i), + String.valueOf((double) i / 2), + String.valueOf(true), + "2019-03-02", + "2019-02-12 03:03:34", + "12.345" + }; + writer.write(row2); } writer.close(); + File[] dataFiles = new File(path).listFiles(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + if (name == null) { + return false; + } + return name.endsWith("carbonindex"); + } + }); + if (dataFiles == null || dataFiles.length < 1) { + throw new RuntimeException("Carbon index file not exists."); + } + Schema schema = CarbonSchemaReader + .readSchemaInIndexFile(dataFiles[0].getAbsolutePath()) + .asOriginOrder(); + // Transform the schema + String[] strings = new String[schema.getFields().length]; + for (int i = 0; i < schema.getFields().length; i++) { + strings[i] = (schema.getFields())[i].getFieldName(); + } + // Read data CarbonReader reader = CarbonReader - .builder(path, "_temp") - .projection(new String[]{"name", "age"}) - .build(); + .builder(path, "_temp") + .projection(strings) + .build(); System.out.println("\nData:"); + long day = 24L * 3600 * 1000; + int i = 0; while (reader.hasNext()) { Object[] row = (Object[]) reader.readNextRow(); - System.out.println(row[0] + " " + row[1]); + System.out.println(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", + i, row[0], row[1], row[2], row[3], row[4], row[5], + new Date((day * ((int) row[6]))), new Timestamp((long) row[7] / 1000), row[8] + )); + i++; + } + System.out.println("\nFinished"); + + // Read data + CarbonReader reader2 = CarbonReader + .builder(path, "_temp") + .projectAllColumns() + .build(); + + System.out.println("\nData:"); + i = 0; + while (reader2.hasNext()) { + Object[] row = (Object[]) reader2.readNextRow(); + System.out.println(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", + i, row[0], new Date((day * ((int) row[1]))), new Timestamp((long) row[2] / 1000), + row[3], row[4], row[5], row[6], row[7], row[8] + )); + i++; } System.out.println("\nFinished"); reader.close(); FileUtils.deleteDirectory(new File(path)); - } catch (Exception e) { + } catch (Throwable e) { e.printStackTrace(); + System.out.println(e.getMessage()); } } }