[CARBONDATA-2524] Support create carbonReader with default projection This closes #2338
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/60b65691 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/60b65691 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/60b65691 Branch: refs/heads/branch-1.4 Commit: 60b65691fec1ce28dda5187100cf8109796befa8 Parents: f27fe0a Author: xubo245 <xub...@huawei.com> Authored: Thu May 24 09:33:23 2018 +0800 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Tue Jun 5 16:04:20 2018 +0530 ---------------------------------------------------------------------- docs/sdk-guide.md | 10 ++ .../sdk/file/CarbonReaderBuilder.java | 40 +++++++- .../carbondata/sdk/file/CarbonReaderTest.java | 101 +++++++++++++++++++ 3 files changed, 149 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/60b65691/docs/sdk-guide.md ---------------------------------------------------------------------- diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md index 4d258f0..328a845 100644 --- a/docs/sdk-guide.md +++ b/docs/sdk-guide.md @@ -493,6 +493,16 @@ Find example code at [CarbonReaderExample](https://github.com/apache/carbondata/ ``` /** + * Project all Columns for carbon reader + * + * @return CarbonReaderBuilder object + * @throws IOException + */ + public CarbonReaderBuilder projectAllColumns(); +``` + +``` + /** * Configure the transactional status of table * If set to false, then reads the carbondata and carbonindex files from a flat folder structure. * If set to true, then reads the carbondata and carbonindex files from segment folder structure. http://git-wip-us.apache.org/repos/asf/carbondata/blob/60b65691/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java index c78cda0..4103c63 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java @@ -26,6 +26,7 @@ import org.apache.carbondata.common.annotations.InterfaceAudience; import org.apache.carbondata.common.annotations.InterfaceStability; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.hadoop.CarbonProjection; import org.apache.carbondata.hadoop.api.CarbonFileInputFormat; @@ -51,6 +52,12 @@ public class CarbonReaderBuilder { private boolean isTransactionalTable = true; /** + * It will be true if use the projectAllColumns methodï¼ + * it will be false if use the projection method + */ + private boolean isProjectAllColumns = true; + + /** * Construct a CarbonReaderBuilder with table path and table name * * @param tablePath table path @@ -70,6 +77,7 @@ public class CarbonReaderBuilder { public CarbonReaderBuilder projection(String[] projectionColumnNames) { Objects.requireNonNull(projectionColumnNames); this.projectionColumns = projectionColumnNames; + isProjectAllColumns = false; return this; } @@ -89,6 +97,33 @@ public class CarbonReaderBuilder { } /** + * Project all Columns for carbon reader + * + * @return CarbonReaderBuilder object + * @throws IOException + */ + public CarbonReaderBuilder projectAllColumns() throws IOException { + CarbonTable carbonTable = CarbonTable + .buildFromTablePath(tableName, tablePath, isTransactionalTable); + + List<ColumnSchema> colList = carbonTable.getTableInfo().getFactTable().getListOfColumns(); + List<String> projectColumn = new ArrayList<String>(); + for (ColumnSchema cols : colList) { + if (cols.getSchemaOrdinal() != -1) { + projectColumn.add(cols.getColumnUniqueId()); + } + } + projectionColumns = new String[projectColumn.size()]; + int i = 0; + for (String columnName : projectColumn) { + projectionColumns[i] = columnName; + i++; + } + isProjectAllColumns = true; + return this; + } + + /** * Configure the filter expression for carbon reader * * @param filterExpression filter expression @@ -186,9 +221,10 @@ public class CarbonReaderBuilder { if (filterExpression != null) { format.setFilterPredicates(job.getConfiguration(), filterExpression); } - if (projectionColumns != null) { - format.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectionColumns)); + if (isProjectAllColumns) { + projectAllColumns(); } + format.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectionColumns)); final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID())); http://git-wip-us.apache.org/repos/asf/carbondata/blob/60b65691/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java ---------------------------------------------------------------------- diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java index 0d2c84e..756dbe4 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java @@ -409,4 +409,105 @@ public class CarbonReaderTest extends TestCase { badRecordLoc); } + @Test + public void testReadFilesWithProjectAllColumns() throws IOException, InterruptedException { + String path = "./testWriteFiles"; + FileUtils.deleteDirectory(new File(path)); + + Field[] fields = new Field[2]; + fields[0] = new Field("name", DataTypes.STRING); + fields[1] = new Field("age", DataTypes.INT); + + TestUtil.writeFilesAndVerify(new Schema(fields), path, true); + + CarbonReader reader = CarbonReader + .builder(path, "_temp") + .projectAllColumns() + .build(); + + // expected output after sorting + String[] name = new String[100]; + int[] age = new int[100]; + for (int i = 0; i < 100; i++) { + name[i] = "robot" + (i / 10); + age[i] = (i % 10) * 10 + i / 10; + } + + int i = 0; + while (reader.hasNext()) { + Object[] row = (Object[]) reader.readNextRow(); + // Default sort column is applied for dimensions. So, need to validate accordingly + Assert.assertEquals(name[i], row[0]); + Assert.assertEquals(age[i], row[1]); + i++; + } + Assert.assertEquals(i, 100); + + reader.close(); + FileUtils.deleteDirectory(new File(path)); + } + + @Test + public void testReadFilesWithDefaultProjection() throws IOException, InterruptedException { + String path = "./testWriteFiles"; + FileUtils.deleteDirectory(new File(path)); + + Field[] fields = new Field[2]; + fields[0] = new Field("name", DataTypes.STRING); + fields[1] = new Field("age", DataTypes.INT); + + TestUtil.writeFilesAndVerify(new Schema(fields), path, true); + + CarbonReader reader = CarbonReader + .builder(path, "_temp") + .build(); + + // expected output after sorting + String[] name = new String[100]; + int[] age = new int[100]; + for (int i = 0; i < 100; i++) { + name[i] = "robot" + (i / 10); + age[i] = (i % 10) * 10 + i / 10; + } + + int i = 0; + while (reader.hasNext()) { + Object[] row = (Object[]) reader.readNextRow(); + Assert.assertEquals(name[i], row[0]); + Assert.assertEquals(age[i], row[1]); + i++; + } + Assert.assertEquals(i, 100); + } + + @Test + public void testReadFilesWithNullProjection() throws IOException, InterruptedException { + String path = "./testWriteFiles"; + FileUtils.deleteDirectory(new File(path)); + + Field[] fields = new Field[2]; + fields[0] = new Field("name", DataTypes.STRING); + fields[1] = new Field("age", DataTypes.INT); + + TestUtil.writeFilesAndVerify(new Schema(fields), path, true); + + CarbonReader reader = CarbonReader + .builder(path, "_temp") + .projection(new String[]{}) + .build(); + + // expected output after sorting + String[] name = new String[100]; + int[] age = new int[100]; + for (int i = 0; i < 100; i++) { + name[i] = "robot" + (i / 10); + age[i] = (i % 10) * 10 + i / 10; + } + // Default sort column is applied for dimensions. So, need to validate accordingly + + while (reader.hasNext()) { + Object[] row = (Object[]) reader.readNextRow(); + assert(row.length==0); + } + } }