[HOTFIX] Add dava doc for datamap interface 1. Rename some of the datamap interface 2. Add more java doc for all public class of datamap interface
This closes #1998 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ef3031d0 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ef3031d0 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ef3031d0 Branch: refs/heads/datamap Commit: ef3031d0cd20316972b8e4bf2a27dcef99cbdb7d Parents: 2117c07 Author: Jacky Li <jacky.li...@qq.com> Authored: Mon Feb 26 10:04:51 2018 +0800 Committer: QiangCai <qiang...@qq.com> Committed: Tue Feb 27 09:29:58 2018 +0800 ---------------------------------------------------------------------- .../carbondata/core/datamap/DataMapChooser.java | 8 +- .../core/datamap/DataMapDistributable.java | 2 + .../carbondata/core/datamap/DataMapLevel.java | 37 + .../carbondata/core/datamap/DataMapMeta.java | 7 + .../core/datamap/DataMapRegistry.java | 21 + .../core/datamap/DataMapStoreManager.java | 86 +- .../carbondata/core/datamap/DataMapType.java | 21 - .../carbondata/core/datamap/TableDataMap.java | 58 +- .../core/datamap/dev/AbstractDataMapWriter.java | 110 --- .../core/datamap/dev/BlockletSerializer.java | 8 +- .../carbondata/core/datamap/dev/DataMap.java | 63 ++ .../core/datamap/dev/DataMapFactory.java | 85 ++ .../core/datamap/dev/DataMapWriter.java | 116 +++ .../core/datamap/dev/IndexDataMap.java | 61 -- .../core/datamap/dev/IndexDataMapFactory.java | 85 -- .../AbstractCoarseGrainIndexDataMap.java | 24 - .../AbstractCoarseGrainIndexDataMapFactory.java | 34 - .../dev/cgdatamap/CoarseGrainDataMap.java | 31 + .../cgdatamap/CoarseGrainDataMapFactory.java | 38 + .../datamap/dev/expr/AndDataMapExprWrapper.java | 4 +- .../datamap/dev/expr/DataMapExprWrapper.java | 4 +- .../dev/expr/DataMapExprWrapperImpl.java | 6 +- .../datamap/dev/expr/OrDataMapExprWrapper.java | 4 +- .../AbstractFineGrainIndexDataMap.java | 24 - .../AbstractFineGrainIndexDataMapFactory.java | 38 - .../dev/fgdatamap/FineGrainBlocklet.java | 134 +++ .../datamap/dev/fgdatamap/FineGrainDataMap.java | 30 + .../dev/fgdatamap/FineGrainDataMapFactory.java | 44 + .../indexstore/BlockletDataMapIndexStore.java | 30 +- .../core/indexstore/FineGrainBlocklet.java | 128 --- .../blockletindex/BlockletDataMap.java | 981 +++++++++++++++++++ .../blockletindex/BlockletDataMapFactory.java | 256 +++++ .../blockletindex/BlockletDataMapModel.java | 2 +- .../blockletindex/BlockletDataRefNode.java | 2 +- .../blockletindex/BlockletIndexDataMap.java | 981 ------------------- .../BlockletIndexDataMapFactory.java | 256 ----- .../core/metadata/schema/table/CarbonTable.java | 2 +- .../blockletindex/TestBlockletDataMap.java | 59 ++ .../blockletindex/TestBlockletIndexDataMap.java | 59 -- .../datamap/examples/MinMaxDataWriter.java | 5 +- .../datamap/examples/MinMaxIndexDataMap.java | 4 +- .../examples/MinMaxIndexDataMapFactory.java | 16 +- .../hadoop/api/CarbonTableInputFormat.java | 8 +- .../testsuite/datamap/CGDataMapTestCase.scala | 379 +++++++ .../datamap/CGIndexDataMapTestCase.scala | 379 ------- .../testsuite/datamap/DataMapWriterSuite.scala | 216 ++++ .../testsuite/datamap/FGDataMapTestCase.scala | 473 +++++++++ .../datamap/FGIndexDataMapTestCase.scala | 474 --------- .../datamap/IndexDataMapWriterSuite.scala | 216 ---- .../testsuite/datamap/TestDataMapCommand.scala | 279 ++++++ .../datamap/TestIndexDataMapCommand.scala | 279 ------ .../iud/InsertOverwriteConcurrentTest.scala | 20 +- .../testsuite/sortcolumns/TestSortColumns.scala | 5 + .../sortcolumns/TestSortColumnsWithUnsafe.scala | 15 +- .../carbondata/datamap/DataMapProvider.java | 12 +- .../datamap/IndexDataMapProvider.java | 20 +- .../datamap/DataMapWriterListener.java | 54 +- 57 files changed, 3451 insertions(+), 3342 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java index 41e9b56..94b48c6 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java @@ -74,12 +74,12 @@ public class DataMapChooser { Expression expression = resolverIntf.getFilterExpression(); // First check for FG datamaps if any exist List<TableDataMap> allDataMapFG = - DataMapStoreManager.getInstance().getAllDataMap(carbonTable, DataMapType.FG); + DataMapStoreManager.getInstance().getAllDataMap(carbonTable, DataMapLevel.FG); ExpressionTuple tuple = selectDataMap(expression, allDataMapFG); if (tuple.dataMapExprWrapper == null) { // Check for CG datamap List<TableDataMap> allDataMapCG = - DataMapStoreManager.getInstance().getAllDataMap(carbonTable, DataMapType.CG); + DataMapStoreManager.getInstance().getAllDataMap(carbonTable, DataMapLevel.CG); tuple = selectDataMap(expression, allDataMapCG); } if (tuple.dataMapExprWrapper != null) { @@ -212,10 +212,10 @@ public class DataMapChooser { List<ColumnExpression> columnExpressions, Set<ExpressionType> expressionTypes) { List<DataMapTuple> tuples = new ArrayList<>(); for (TableDataMap dataMap : allDataMap) { - if (contains(dataMap.getIndexDataMapFactory().getMeta(), columnExpressions, expressionTypes)) + if (contains(dataMap.getDataMapFactory().getMeta(), columnExpressions, expressionTypes)) { tuples.add( - new DataMapTuple(dataMap.getIndexDataMapFactory().getMeta().getIndexedColumns().size(), + new DataMapTuple(dataMap.getDataMapFactory().getMeta().getIndexedColumns().size(), dataMap)); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/DataMapDistributable.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapDistributable.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapDistributable.java index 828cdbb..911c6f2 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapDistributable.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapDistributable.java @@ -19,6 +19,7 @@ package org.apache.carbondata.core.datamap; import java.io.IOException; import java.io.Serializable; +import org.apache.carbondata.common.annotations.InterfaceAudience; import org.apache.carbondata.core.datastore.block.Distributable; import org.apache.carbondata.core.metadata.schema.table.DataMapSchema; @@ -27,6 +28,7 @@ import org.apache.hadoop.mapreduce.InputSplit; /** * Distributable class for datamap. */ +@InterfaceAudience.Internal public abstract class DataMapDistributable extends InputSplit implements Distributable, Serializable { http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/DataMapLevel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapLevel.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapLevel.java new file mode 100644 index 0000000..2c9672a --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapLevel.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; + +/** + * Index level of the datamap + */ +@InterfaceAudience.Developer("DataMap") +@InterfaceStability.Evolving +public enum DataMapLevel { + /** + * Coarse Grain Index, index is of blocklet level + */ + CG, + + /** + * Fine Grain Index, index is of row level + */ + FG; +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/DataMapMeta.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapMeta.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapMeta.java index dd15ccb..396c5db 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapMeta.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapMeta.java @@ -19,8 +19,15 @@ package org.apache.carbondata.core.datamap; import java.util.List; +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; import org.apache.carbondata.core.scan.filter.intf.ExpressionType; +/** + * Metadata of the datamap, set by DataMap developer + */ +@InterfaceAudience.Developer("DataMap") +@InterfaceStability.Evolving public class DataMapMeta { private List<String> indexedColumns; http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/DataMapRegistry.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapRegistry.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapRegistry.java index 03c0c3e..1b6782a 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapRegistry.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapRegistry.java @@ -21,6 +21,27 @@ import java.util.Map; import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; + +/** + * Developer can register a datamap implementation with a short name. + * After registration, user can use short name to create the datamap, like + * <p> + * {@code + * CREATE DATAMAP dm ON TABLE table + * USING 'short-name-of-the-datamap' + * } + * otherwise, user should use the class name of the datamap implementation to create the datamap + * (subclass of {@link org.apache.carbondata.core.datamap.dev.DataMapFactory}) + * <p> + * {@code + * CREATE DATAMAP dm ON TABLE table + * USING 'class-name-of-the-datamap' + * } + */ +@InterfaceAudience.Developer("DataMap") +@InterfaceStability.Evolving public class DataMapRegistry { private static Map<String, String> shortNameToClassName = new ConcurrentHashMap<>(); http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java index ab31393..e38f4f9 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java @@ -22,14 +22,15 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import org.apache.carbondata.common.annotations.InterfaceAudience; import org.apache.carbondata.common.exceptions.MetadataProcessException; import org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; -import org.apache.carbondata.core.datamap.dev.IndexDataMapFactory; +import org.apache.carbondata.core.datamap.dev.DataMapFactory; import org.apache.carbondata.core.indexstore.BlockletDetailsFetcher; import org.apache.carbondata.core.indexstore.SegmentPropertiesFetcher; -import org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexDataMapFactory; +import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.schema.datamap.DataMapProvider; import org.apache.carbondata.core.metadata.schema.table.CarbonTable; @@ -41,6 +42,7 @@ import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager; /** * It maintains all the DataMaps in it. */ +@InterfaceAudience.Internal public final class DataMapStoreManager { private static DataMapStoreManager instance = new DataMapStoreManager(); @@ -63,12 +65,12 @@ public final class DataMapStoreManager { * It gives all datamaps of type @mapType except the default datamap. * */ - public List<TableDataMap> getAllDataMap(CarbonTable carbonTable, DataMapType mapType) { + public List<TableDataMap> getAllDataMap(CarbonTable carbonTable, DataMapLevel mapType) { List<TableDataMap> dataMaps = new ArrayList<>(); - List<TableDataMap> tableDataMaps = getAllDataMap(carbonTable); - if (tableDataMaps != null) { - for (TableDataMap dataMap : tableDataMaps) { - if (mapType == dataMap.getIndexDataMapFactory().getDataMapType()) { + List<TableDataMap> tableIndices = getAllDataMap(carbonTable); + if (tableIndices != null) { + for (TableDataMap dataMap : tableIndices) { + if (mapType == dataMap.getDataMapFactory().getDataMapType()) { dataMaps.add(dataMap); } } @@ -96,13 +98,13 @@ public final class DataMapStoreManager { } /** - * It gives the default datamap of the table. Default datamap of any table is BlockletIndexDataMap + * It gives the default datamap of the table. Default datamap of any table is BlockletDataMap * * @param identifier * @return */ public TableDataMap getDefaultDataMap(AbsoluteTableIdentifier identifier) { - return getDataMap(identifier, BlockletIndexDataMapFactory.DATA_MAP_SCHEMA); + return getDataMap(identifier, BlockletDataMapFactory.DATA_MAP_SCHEMA); } /** @@ -110,16 +112,16 @@ public final class DataMapStoreManager { */ public TableDataMap getDataMap(AbsoluteTableIdentifier identifier, DataMapSchema dataMapSchema) { String table = identifier.getCarbonTableIdentifier().getTableUniqueName(); - List<TableDataMap> tableDataMaps = allDataMaps.get(table); + List<TableDataMap> tableIndices = allDataMaps.get(table); TableDataMap dataMap = null; - if (tableDataMaps != null) { - dataMap = getTableDataMap(dataMapSchema.getDataMapName(), tableDataMaps); + if (tableIndices != null) { + dataMap = getTableDataMap(dataMapSchema.getDataMapName(), tableIndices); } if (dataMap == null) { synchronized (table.intern()) { - tableDataMaps = allDataMaps.get(table); - if (tableDataMaps != null) { - dataMap = getTableDataMap(dataMapSchema.getDataMapName(), tableDataMaps); + tableIndices = allDataMaps.get(table); + if (tableIndices != null) { + dataMap = getTableDataMap(dataMapSchema.getDataMapName(), tableIndices); } if (dataMap == null) { try { @@ -143,12 +145,12 @@ public final class DataMapStoreManager { */ private TableDataMap createAndRegisterDataMap(AbsoluteTableIdentifier identifier, DataMapSchema dataMapSchema) throws MalformedDataMapCommandException { - IndexDataMapFactory indexDataMapFactory; + DataMapFactory dataMapFactory; try { - // try to create datamap by reflection to test whether it is a valid IndexDataMapFactory class - Class<? extends IndexDataMapFactory> factoryClass = - (Class<? extends IndexDataMapFactory>) Class.forName(dataMapSchema.getClassName()); - indexDataMapFactory = factoryClass.newInstance(); + // try to create datamap by reflection to test whether it is a valid DataMapFactory class + Class<? extends DataMapFactory> factoryClass = + (Class<? extends DataMapFactory>) Class.forName(dataMapSchema.getClassName()); + dataMapFactory = factoryClass.newInstance(); } catch (ClassNotFoundException e) { throw new MalformedDataMapCommandException( "DataMap '" + dataMapSchema.getClassName() + "' not found"); @@ -156,39 +158,39 @@ public final class DataMapStoreManager { throw new MetadataProcessException( "failed to create DataMap '" + dataMapSchema.getClassName() + "'", e); } - return registerDataMap(identifier, dataMapSchema, indexDataMapFactory); + return registerDataMap(identifier, dataMapSchema, dataMapFactory); } public TableDataMap registerDataMap(AbsoluteTableIdentifier identifier, - DataMapSchema dataMapSchema, IndexDataMapFactory indexDataMapFactory) { + DataMapSchema dataMapSchema, DataMapFactory dataMapFactory) { String table = identifier.getCarbonTableIdentifier().getTableUniqueName(); // Just update the segmentRefreshMap with the table if not added. getTableSegmentRefresher(identifier); - List<TableDataMap> tableDataMaps = allDataMaps.get(table); - if (tableDataMaps == null) { - tableDataMaps = new ArrayList<>(); + List<TableDataMap> tableIndices = allDataMaps.get(table); + if (tableIndices == null) { + tableIndices = new ArrayList<>(); } - indexDataMapFactory.init(identifier, dataMapSchema); + dataMapFactory.init(identifier, dataMapSchema); BlockletDetailsFetcher blockletDetailsFetcher; SegmentPropertiesFetcher segmentPropertiesFetcher = null; - if (indexDataMapFactory instanceof BlockletDetailsFetcher) { - blockletDetailsFetcher = (BlockletDetailsFetcher) indexDataMapFactory; + if (dataMapFactory instanceof BlockletDetailsFetcher) { + blockletDetailsFetcher = (BlockletDetailsFetcher) dataMapFactory; } else { blockletDetailsFetcher = getBlockletDetailsFetcher(identifier); } segmentPropertiesFetcher = (SegmentPropertiesFetcher) blockletDetailsFetcher; - TableDataMap dataMap = new TableDataMap(identifier, dataMapSchema, indexDataMapFactory, + TableDataMap dataMap = new TableDataMap(identifier, dataMapSchema, dataMapFactory, blockletDetailsFetcher, segmentPropertiesFetcher); - tableDataMaps.add(dataMap); - allDataMaps.put(table, tableDataMaps); + tableIndices.add(dataMap); + allDataMaps.put(table, tableIndices); return dataMap; } - private TableDataMap getTableDataMap(String dataMapName, List<TableDataMap> tableDataMaps) { + private TableDataMap getTableDataMap(String dataMapName, List<TableDataMap> tableIndices) { TableDataMap dataMap = null; - for (TableDataMap tableDataMap : tableDataMaps) { + for (TableDataMap tableDataMap : tableIndices) { if (tableDataMap.getDataMapSchema().getDataMapName().equals(dataMapName)) { dataMap = tableDataMap; break; @@ -218,10 +220,10 @@ public final class DataMapStoreManager { */ public void clearDataMaps(AbsoluteTableIdentifier identifier) { String tableUniqueName = identifier.getCarbonTableIdentifier().getTableUniqueName(); - List<TableDataMap> tableDataMaps = allDataMaps.get(tableUniqueName); + List<TableDataMap> tableIndices = allDataMaps.get(tableUniqueName); segmentRefreshMap.remove(identifier.uniqueName()); - if (tableDataMaps != null) { - for (TableDataMap tableDataMap : tableDataMaps) { + if (tableIndices != null) { + for (TableDataMap tableDataMap : tableIndices) { if (tableDataMap != null) { tableDataMap.clear(); break; @@ -237,15 +239,15 @@ public final class DataMapStoreManager { * @param identifier Table identifier */ public void clearDataMap(AbsoluteTableIdentifier identifier, String dataMapName) { - List<TableDataMap> tableDataMaps = + List<TableDataMap> tableIndices = allDataMaps.get(identifier.getCarbonTableIdentifier().getTableUniqueName()); - if (tableDataMaps != null) { + if (tableIndices != null) { int i = 0; - for (TableDataMap tableDataMap : tableDataMaps) { + for (TableDataMap tableDataMap : tableIndices) { if (tableDataMap != null && dataMapName .equalsIgnoreCase(tableDataMap.getDataMapSchema().getDataMapName())) { tableDataMap.clear(); - tableDataMaps.remove(i); + tableIndices.remove(i); break; } i++; @@ -260,8 +262,8 @@ public final class DataMapStoreManager { * @return */ private BlockletDetailsFetcher getBlockletDetailsFetcher(AbsoluteTableIdentifier identifier) { - TableDataMap blockletMap = getDataMap(identifier, BlockletIndexDataMapFactory.DATA_MAP_SCHEMA); - return (BlockletDetailsFetcher) blockletMap.getIndexDataMapFactory(); + TableDataMap blockletMap = getDataMap(identifier, BlockletDataMapFactory.DATA_MAP_SCHEMA); + return (BlockletDetailsFetcher) blockletMap.getDataMapFactory(); } /** http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/DataMapType.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapType.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapType.java deleted file mode 100644 index bf812b3..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapType.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.carbondata.core.datamap; - -public enum DataMapType { - CG,FG; -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java b/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java index 7f0f3f2..0e9d001 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java @@ -20,16 +20,17 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.apache.carbondata.common.annotations.InterfaceAudience; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datamap.dev.BlockletSerializer; -import org.apache.carbondata.core.datamap.dev.IndexDataMap; -import org.apache.carbondata.core.datamap.dev.IndexDataMapFactory; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datamap.dev.DataMapFactory; +import org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.indexstore.Blocklet; import org.apache.carbondata.core.indexstore.BlockletDetailsFetcher; import org.apache.carbondata.core.indexstore.ExtendedBlocklet; -import org.apache.carbondata.core.indexstore.FineGrainBlocklet; import org.apache.carbondata.core.indexstore.SegmentPropertiesFetcher; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.schema.table.DataMapSchema; @@ -39,16 +40,21 @@ import org.apache.carbondata.events.OperationContext; import org.apache.carbondata.events.OperationEventListener; /** - * IndexDataMap at the table level, user can add any number of datamaps for one table. Depends - * on the filter condition it can prune the blocklets. + * Index at the table level, user can add any number of DataMap for one table, by + * {@code + * CREATE DATAMAP dm ON TABLE table + * USING 'class name of DataMapFactory implementation' + * } + * Depends on the filter condition it can prune the data (blocklet or row level). */ +@InterfaceAudience.Internal public final class TableDataMap extends OperationEventListener { private AbsoluteTableIdentifier identifier; private DataMapSchema dataMapSchema; - private IndexDataMapFactory indexDataMapFactory; + private DataMapFactory dataMapFactory; private BlockletDetailsFetcher blockletDetailsFetcher; @@ -57,12 +63,12 @@ public final class TableDataMap extends OperationEventListener { /** * It is called to initialize and load the required table datamap metadata. */ - public TableDataMap(AbsoluteTableIdentifier identifier, DataMapSchema dataMapSchema, - IndexDataMapFactory indexDataMapFactory, BlockletDetailsFetcher blockletDetailsFetcher, + TableDataMap(AbsoluteTableIdentifier identifier, DataMapSchema dataMapSchema, + DataMapFactory dataMapFactory, BlockletDetailsFetcher blockletDetailsFetcher, SegmentPropertiesFetcher segmentPropertiesFetcher) { this.identifier = identifier; this.dataMapSchema = dataMapSchema; - this.indexDataMapFactory = indexDataMapFactory; + this.dataMapFactory = dataMapFactory; this.blockletDetailsFetcher = blockletDetailsFetcher; this.segmentPropertiesFetcher = segmentPropertiesFetcher; } @@ -84,10 +90,10 @@ public final class TableDataMap extends OperationEventListener { if (filterExp == null) { pruneBlocklets = blockletDetailsFetcher.getAllBlocklets(segmentId, partitions); } else { - List<IndexDataMap> indexDataMaps = indexDataMapFactory.getDataMaps(segmentId); + List<DataMap> dataMaps = dataMapFactory.getDataMaps(segmentId); segmentProperties = segmentPropertiesFetcher.getSegmentProperties(segmentId); - for (IndexDataMap indexDataMap : indexDataMaps) { - pruneBlocklets.addAll(indexDataMap.prune(filterExp, segmentProperties, partitions)); + for (DataMap dataMap : dataMaps) { + pruneBlocklets.addAll(dataMap.prune(filterExp, segmentProperties, partitions)); } } blocklets.addAll(addSegmentId(blockletDetailsFetcher @@ -114,7 +120,7 @@ public final class TableDataMap extends OperationEventListener { public List<DataMapDistributable> toDistributable(List<String> segmentIds) throws IOException { List<DataMapDistributable> distributables = new ArrayList<>(); for (String segmentsId : segmentIds) { - List<DataMapDistributable> list = indexDataMapFactory.toDistributable(segmentsId); + List<DataMapDistributable> list = dataMapFactory.toDistributable(segmentsId); for (DataMapDistributable distributable: list) { distributable.setDataMapSchema(dataMapSchema); distributable.setSegmentId(segmentsId); @@ -137,10 +143,10 @@ public final class TableDataMap extends OperationEventListener { FilterResolverIntf filterExp, List<String> partitions) throws IOException { List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>(); List<Blocklet> blocklets = new ArrayList<>(); - List<IndexDataMap> indexDataMaps = indexDataMapFactory.getDataMaps(distributable); - for (IndexDataMap indexDataMap : indexDataMaps) { + List<DataMap> dataMaps = dataMapFactory.getDataMaps(distributable); + for (DataMap dataMap : dataMaps) { blocklets.addAll( - indexDataMap.prune( + dataMap.prune( filterExp, segmentPropertiesFetcher.getSegmentProperties(distributable.getSegmentId()), partitions)); @@ -149,13 +155,13 @@ public final class TableDataMap extends OperationEventListener { String writePath = identifier.getTablePath() + CarbonCommonConstants.FILE_SEPARATOR + dataMapSchema .getDataMapName(); - if (indexDataMapFactory.getDataMapType() == DataMapType.FG) { + if (dataMapFactory.getDataMapType() == DataMapLevel.FG) { FileFactory.mkdirs(writePath, FileFactory.getFileType(writePath)); } for (Blocklet blocklet : blocklets) { ExtendedBlocklet detailedBlocklet = blockletDetailsFetcher.getExtendedBlocklet(blocklet, distributable.getSegmentId()); - if (indexDataMapFactory.getDataMapType() == DataMapType.FG) { + if (dataMapFactory.getDataMapType() == DataMapLevel.FG) { String blockletwritePath = writePath + CarbonCommonConstants.FILE_SEPARATOR + System.nanoTime(); detailedBlocklet.setDataMapWriterPath(blockletwritePath); @@ -173,7 +179,7 @@ public final class TableDataMap extends OperationEventListener { */ public void clear(List<String> segmentIds) { for (String segmentId: segmentIds) { - indexDataMapFactory.clear(segmentId); + dataMapFactory.clear(segmentId); } } @@ -181,19 +187,19 @@ public final class TableDataMap extends OperationEventListener { * Clears all datamap */ public void clear() { - indexDataMapFactory.clear(); + dataMapFactory.clear(); } public DataMapSchema getDataMapSchema() { return dataMapSchema; } - public IndexDataMapFactory getIndexDataMapFactory() { - return indexDataMapFactory; + public DataMapFactory getDataMapFactory() { + return dataMapFactory; } @Override public void onEvent(Event event, OperationContext opContext) throws Exception { - indexDataMapFactory.fireEvent(event); + dataMapFactory.fireEvent(event); } /** @@ -208,9 +214,9 @@ public final class TableDataMap extends OperationEventListener { throws IOException { List<String> prunedSegments = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); for (String segmentId : segmentIds) { - List<IndexDataMap> indexDataMaps = indexDataMapFactory.getDataMaps(segmentId); - for (IndexDataMap indexDataMap : indexDataMaps) { - if (indexDataMap.isScanRequired(filterExp)) { + List<DataMap> dataMaps = dataMapFactory.getDataMaps(segmentId); + for (DataMap dataMap : dataMaps) { + if (dataMap.isScanRequired(filterExp)) { // If any one task in a given segment contains the data that means the segment need to // be scanned and we need to validate further data maps in the same segment prunedSegments.add(segmentId); http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java deleted file mode 100644 index 1af7dde..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/AbstractDataMapWriter.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.carbondata.core.datamap.dev; - -import java.io.IOException; - -import org.apache.carbondata.core.datastore.impl.FileFactory; -import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; -import org.apache.carbondata.core.util.CarbonUtil; -import org.apache.carbondata.core.util.path.CarbonTablePath; - -/** - * Data Map writer - */ -public abstract class AbstractDataMapWriter { - - protected AbsoluteTableIdentifier identifier; - - protected String segmentId; - - protected String writeDirectoryPath; - - public AbstractDataMapWriter(AbsoluteTableIdentifier identifier, String segmentId, - String writeDirectoryPath) { - this.identifier = identifier; - this.segmentId = segmentId; - this.writeDirectoryPath = writeDirectoryPath; - } - - /** - * Start of new block notification. - * - * @param blockId file name of the carbondata file - */ - public abstract void onBlockStart(String blockId); - - /** - * End of block notification - */ - public abstract void onBlockEnd(String blockId); - - /** - * Start of new blocklet notification. - * - * @param blockletId sequence number of blocklet in the block - */ - public abstract void onBlockletStart(int blockletId); - - /** - * End of blocklet notification - * - * @param blockletId sequence number of blocklet in the block - */ - public abstract void onBlockletEnd(int blockletId); - - /** - * Add the column pages row to the datamap, order of pages is same as `indexColumns` in - * DataMapMeta returned in IndexDataMapFactory. - * Implementation should copy the content of `pages` as needed, because `pages` memory - * may be freed after this method returns, if using unsafe column page. - */ - public abstract void onPageAdded(int blockletId, int pageId, ColumnPage[] pages); - - /** - * This is called during closing of writer.So after this call no more data will be sent to this - * class. - */ - public abstract void finish() throws IOException; - - /** - * It copies the file from temp folder to actual folder - * - * @param dataMapFile - * @throws IOException - */ - protected void commitFile(String dataMapFile) throws IOException { - if (!dataMapFile.startsWith(writeDirectoryPath)) { - throw new UnsupportedOperationException( - "Datamap file " + dataMapFile + " is not written in provided directory path " - + writeDirectoryPath); - } - String dataMapFileName = - dataMapFile.substring(writeDirectoryPath.length(), dataMapFile.length()); - String carbonFilePath = dataMapFileName.substring(0, dataMapFileName.lastIndexOf("/")); - String segmentPath = CarbonTablePath.getSegmentPath(identifier.getTablePath(), segmentId); - if (carbonFilePath.length() > 0) { - carbonFilePath = segmentPath + carbonFilePath; - FileFactory.mkdirs(carbonFilePath, FileFactory.getFileType(carbonFilePath)); - } else { - carbonFilePath = segmentPath; - } - CarbonUtil.copyCarbonDataFileToCarbonStorePath(dataMapFile, carbonFilePath, 0); - } - -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/BlockletSerializer.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/BlockletSerializer.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/BlockletSerializer.java index 3d4c717..bd5f994 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/BlockletSerializer.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/BlockletSerializer.java @@ -20,9 +20,15 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet; import org.apache.carbondata.core.datastore.impl.FileFactory; -import org.apache.carbondata.core.indexstore.FineGrainBlocklet; +/** + * A serializer/deserializer for {@link FineGrainBlocklet}, it is used after prune the data + * by {@link org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainDataMap} + */ +@InterfaceAudience.Internal public class BlockletSerializer { /** http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java new file mode 100644 index 0000000..cdd7387 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMap.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap.dev; + +import java.io.IOException; +import java.util.List; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; + +/** + * An entity which can store and retrieve index data. + */ +@InterfaceAudience.Internal +public interface DataMap<T extends Blocklet> { + + /** + * It is called to load the data map to memory or to initialize it. + */ + void init(DataMapModel dataMapModel) throws MemoryException, IOException; + + /** + * Prune the datamap with filter expression and partition information. It returns the list of + * blocklets where these filters can exist. + * + * @param filterExp + * @return + */ + List<T> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, + List<String> partitions); + + // TODO Move this method to Abstract class + /** + * Validate whether the current segment needs to be fetching the required data + * + * @param filterExp + * @return + */ + boolean isScanRequired(FilterResolverIntf filterExp); + + /** + * Clear complete index table and release memory. + */ + void clear(); + +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java new file mode 100644 index 0000000..77f2249 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap.dev; + +import java.io.IOException; +import java.util.List; + +import org.apache.carbondata.core.datamap.DataMapDistributable; +import org.apache.carbondata.core.datamap.DataMapLevel; +import org.apache.carbondata.core.datamap.DataMapMeta; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.schema.table.DataMapSchema; +import org.apache.carbondata.events.Event; + +/** + * Interface for datamap factory, it is responsible for creating the datamap. + */ +public interface DataMapFactory<T extends DataMap> { + + /** + * Initialization of Datamap factory with the identifier and datamap name + */ + void init(AbsoluteTableIdentifier identifier, DataMapSchema dataMapSchema); + + /** + * Return a new write for this datamap + */ + DataMapWriter createWriter(String segmentId, String writeDirectoryPath); + + /** + * Get the datamap for segmentid + */ + List<T> getDataMaps(String segmentId) throws IOException; + + /** + * Get datamaps for distributable object. + */ + List<T> getDataMaps(DataMapDistributable distributable) throws IOException; + + /** + * Get all distributable objects of a segmentid + * @return + */ + List<DataMapDistributable> toDistributable(String segmentId); + + /** + * + * @param event + */ + void fireEvent(Event event); + + /** + * Clears datamap of the segment + */ + void clear(String segmentId); + + /** + * Clear all datamaps from memory + */ + void clear(); + + /** + * Return metadata of this datamap + */ + DataMapMeta getMeta(); + + /** + * Type of datamap whether it is FG or CG + */ + DataMapLevel getDataMapType(); +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapWriter.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapWriter.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapWriter.java new file mode 100644 index 0000000..18252b6 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapWriter.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap.dev; + +import java.io.IOException; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.path.CarbonTablePath; + +/** + * Writer interface for datamap. + * Developer should implement this interface to write index files. + * Writer will be called for every new block/blocklet/page is created when data load is executing. + */ +@InterfaceAudience.Developer("DataMap") +@InterfaceStability.Evolving +public abstract class DataMapWriter { + + protected AbsoluteTableIdentifier identifier; + + protected String segmentId; + + private String writeDirectoryPath; + + public DataMapWriter(AbsoluteTableIdentifier identifier, String segmentId, + String writeDirectoryPath) { + this.identifier = identifier; + this.segmentId = segmentId; + this.writeDirectoryPath = writeDirectoryPath; + } + + /** + * Start of new block notification. + * + * @param blockId file name of the carbondata file + */ + public abstract void onBlockStart(String blockId); + + /** + * End of block notification + */ + public abstract void onBlockEnd(String blockId); + + /** + * Start of new blocklet notification. + * + * @param blockletId sequence number of blocklet in the block + */ + public abstract void onBlockletStart(int blockletId); + + /** + * End of blocklet notification + * + * @param blockletId sequence number of blocklet in the block + */ + public abstract void onBlockletEnd(int blockletId); + + /** + * Add the column pages row to the datamap, order of pages is same as `indexColumns` in + * DataMapMeta returned in DataMapFactory. + * Implementation should copy the content of `pages` as needed, because `pages` memory + * may be freed after this method returns, if using unsafe column page. + */ + public abstract void onPageAdded(int blockletId, int pageId, ColumnPage[] pages); + + /** + * This is called during closing of writer.So after this call no more data will be sent to this + * class. + */ + public abstract void finish() throws IOException; + + /** + * It commits the index file by copying the file from temp folder to actual folder + * + * @param dataMapFile file path of index file + * @throws IOException if IO fails + */ + protected void commitFile(String dataMapFile) throws IOException { + if (!dataMapFile.startsWith(writeDirectoryPath)) { + throw new UnsupportedOperationException( + "Datamap file " + dataMapFile + " is not written in provided directory path " + + writeDirectoryPath); + } + String dataMapFileName = + dataMapFile.substring(writeDirectoryPath.length(), dataMapFile.length()); + String carbonFilePath = dataMapFileName.substring(0, dataMapFileName.lastIndexOf("/")); + String segmentPath = CarbonTablePath.getSegmentPath(identifier.getTablePath(), segmentId); + if (carbonFilePath.length() > 0) { + carbonFilePath = segmentPath + carbonFilePath; + FileFactory.mkdirs(carbonFilePath, FileFactory.getFileType(carbonFilePath)); + } else { + carbonFilePath = segmentPath; + } + CarbonUtil.copyCarbonDataFileToCarbonStorePath(dataMapFile, carbonFilePath, 0); + } + +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/IndexDataMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/IndexDataMap.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/IndexDataMap.java deleted file mode 100644 index f86bc0a..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/IndexDataMap.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.carbondata.core.datamap.dev; - -import java.io.IOException; -import java.util.List; - -import org.apache.carbondata.core.datastore.block.SegmentProperties; -import org.apache.carbondata.core.indexstore.Blocklet; -import org.apache.carbondata.core.memory.MemoryException; -import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; - -/** - * Datamap is an entity which can store and retrieve index data. - */ -public interface IndexDataMap<T extends Blocklet> { - - /** - * It is called to load the data map to memory or to initialize it. - */ - void init(DataMapModel dataMapModel) throws MemoryException, IOException; - - /** - * Prune the datamap with filter expression and partition information. It returns the list of - * blocklets where these filters can exist. - * - * @param filterExp - * @return - */ - List<T> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, - List<String> partitions); - - // TODO Move this method to Abstract class - /** - * Validate whether the current segment needs to be fetching the required data - * - * @param filterExp - * @return - */ - boolean isScanRequired(FilterResolverIntf filterExp); - - /** - * Clear complete index table and release memory. - */ - void clear(); - -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/IndexDataMapFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/IndexDataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/IndexDataMapFactory.java deleted file mode 100644 index 0bab104..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/IndexDataMapFactory.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.carbondata.core.datamap.dev; - -import java.io.IOException; -import java.util.List; - -import org.apache.carbondata.core.datamap.DataMapDistributable; -import org.apache.carbondata.core.datamap.DataMapMeta; -import org.apache.carbondata.core.datamap.DataMapType; -import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; -import org.apache.carbondata.core.metadata.schema.table.DataMapSchema; -import org.apache.carbondata.events.Event; - -/** - * Interface for datamap factory, it is responsible for creating the datamap. - */ -public interface IndexDataMapFactory<T extends IndexDataMap> { - - /** - * Initialization of Datamap factory with the identifier and datamap name - */ - void init(AbsoluteTableIdentifier identifier, DataMapSchema dataMapSchema); - - /** - * Return a new write for this datamap - */ - AbstractDataMapWriter createWriter(String segmentId, String writeDirectoryPath); - - /** - * Get the datamap for segmentid - */ - List<T> getDataMaps(String segmentId) throws IOException; - - /** - * Get datamaps for distributable object. - */ - List<T> getDataMaps(DataMapDistributable distributable) throws IOException; - - /** - * Get all distributable objects of a segmentid - * @return - */ - List<DataMapDistributable> toDistributable(String segmentId); - - /** - * - * @param event - */ - void fireEvent(Event event); - - /** - * Clears datamap of the segment - */ - void clear(String segmentId); - - /** - * Clear all datamaps from memory - */ - void clear(); - - /** - * Return metadata of this datamap - */ - DataMapMeta getMeta(); - - /** - * Type of datamap whether it is FG or CG - */ - DataMapType getDataMapType(); -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/AbstractCoarseGrainIndexDataMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/AbstractCoarseGrainIndexDataMap.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/AbstractCoarseGrainIndexDataMap.java deleted file mode 100644 index df9d4e8..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/AbstractCoarseGrainIndexDataMap.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.carbondata.core.datamap.dev.cgdatamap; - -import org.apache.carbondata.core.datamap.dev.IndexDataMap; -import org.apache.carbondata.core.indexstore.Blocklet; - -public abstract class AbstractCoarseGrainIndexDataMap implements IndexDataMap<Blocklet> { - -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/AbstractCoarseGrainIndexDataMapFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/AbstractCoarseGrainIndexDataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/AbstractCoarseGrainIndexDataMapFactory.java deleted file mode 100644 index 037c32e..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/AbstractCoarseGrainIndexDataMapFactory.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.carbondata.core.datamap.dev.cgdatamap; - -import org.apache.carbondata.core.datamap.DataMapType; -import org.apache.carbondata.core.datamap.dev.IndexDataMapFactory; - -/** - * 1. Any filter query which hits the table with datamap will call prune method of CGdatamap. - * 2. The prune method of CGDatamap return list Blocklet , these blocklets contain the - * information of block and blocklet. - * 3. Based on the splits scanrdd schedule the tasks. - */ -public abstract class AbstractCoarseGrainIndexDataMapFactory - implements IndexDataMapFactory<AbstractCoarseGrainIndexDataMap> { - - @Override public DataMapType getDataMapType() { - return DataMapType.CG; - } -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/CoarseGrainDataMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/CoarseGrainDataMap.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/CoarseGrainDataMap.java new file mode 100644 index 0000000..62a1d1b --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/CoarseGrainDataMap.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap.dev.cgdatamap; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.indexstore.Blocklet; + +/** + * DataMap for Coarse Grain level, see {@link org.apache.carbondata.core.datamap.DataMapLevel#CG} + */ +@InterfaceAudience.Developer("DataMap") +@InterfaceStability.Evolving +public abstract class CoarseGrainDataMap implements DataMap<Blocklet> { + +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/CoarseGrainDataMapFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/CoarseGrainDataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/CoarseGrainDataMapFactory.java new file mode 100644 index 0000000..4d20cdb --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/cgdatamap/CoarseGrainDataMapFactory.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap.dev.cgdatamap; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.core.datamap.DataMapLevel; +import org.apache.carbondata.core.datamap.dev.DataMapFactory; + +/** + * Factory for {@link CoarseGrainDataMap} + * 1. Any filter query which hits the table with datamap will call prune method of CGdatamap. + * 2. The prune method of CGDatamap return list Blocklet , these blocklets contain the + * information of block and blocklet. + * 3. Based on the splits scanrdd schedule the tasks. + */ +@InterfaceAudience.Developer("DataMap") +@InterfaceStability.Evolving +public abstract class CoarseGrainDataMapFactory implements DataMapFactory<CoarseGrainDataMap> { + + @Override public DataMapLevel getDataMapType() { + return DataMapLevel.CG; + } +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java index 458772f..d6abd81 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/AndDataMapExprWrapper.java @@ -20,7 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import org.apache.carbondata.core.datamap.DataMapType; +import org.apache.carbondata.core.datamap.DataMapLevel; import org.apache.carbondata.core.indexstore.ExtendedBlocklet; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; @@ -91,7 +91,7 @@ public class AndDataMapExprWrapper implements DataMapExprWrapper { return wrappers; } - @Override public DataMapType getDataMapType() { + @Override public DataMapLevel getDataMapType() { return left.getDataMapType(); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapper.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapper.java index 36c2472..7068d80 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapper.java @@ -20,7 +20,7 @@ import java.io.IOException; import java.io.Serializable; import java.util.List; -import org.apache.carbondata.core.datamap.DataMapType; +import org.apache.carbondata.core.datamap.DataMapLevel; import org.apache.carbondata.core.indexstore.ExtendedBlocklet; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; @@ -72,6 +72,6 @@ public interface DataMapExprWrapper extends Serializable { * Get the datamap type. * @return */ - DataMapType getDataMapType(); + DataMapLevel getDataMapType(); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapperImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapperImpl.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapperImpl.java index 695f653..08589b0 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapperImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/DataMapExprWrapperImpl.java @@ -22,7 +22,7 @@ import java.util.List; import java.util.UUID; import org.apache.carbondata.core.datamap.DataMapDistributable; -import org.apache.carbondata.core.datamap.DataMapType; +import org.apache.carbondata.core.datamap.DataMapLevel; import org.apache.carbondata.core.datamap.TableDataMap; import org.apache.carbondata.core.indexstore.ExtendedBlocklet; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; @@ -80,7 +80,7 @@ public class DataMapExprWrapperImpl implements DataMapExprWrapper { return wrappers; } - @Override public DataMapType getDataMapType() { - return dataMap.getIndexDataMapFactory().getDataMapType(); + @Override public DataMapLevel getDataMapType() { + return dataMap.getDataMapFactory().getDataMapType(); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/OrDataMapExprWrapper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/OrDataMapExprWrapper.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/OrDataMapExprWrapper.java index 1d90f0d..e660e36 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/OrDataMapExprWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/expr/OrDataMapExprWrapper.java @@ -22,7 +22,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import org.apache.carbondata.core.datamap.DataMapType; +import org.apache.carbondata.core.datamap.DataMapLevel; import org.apache.carbondata.core.indexstore.ExtendedBlocklet; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; @@ -88,7 +88,7 @@ public class OrDataMapExprWrapper implements DataMapExprWrapper { } - @Override public DataMapType getDataMapType() { + @Override public DataMapLevel getDataMapType() { return left.getDataMapType(); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/AbstractFineGrainIndexDataMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/AbstractFineGrainIndexDataMap.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/AbstractFineGrainIndexDataMap.java deleted file mode 100644 index ea536b9..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/AbstractFineGrainIndexDataMap.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.carbondata.core.datamap.dev.fgdatamap; - -import org.apache.carbondata.core.datamap.dev.IndexDataMap; -import org.apache.carbondata.core.indexstore.FineGrainBlocklet; - -public abstract class AbstractFineGrainIndexDataMap implements IndexDataMap<FineGrainBlocklet> { - -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/AbstractFineGrainIndexDataMapFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/AbstractFineGrainIndexDataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/AbstractFineGrainIndexDataMapFactory.java deleted file mode 100644 index 762c233..0000000 --- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/AbstractFineGrainIndexDataMapFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.carbondata.core.datamap.dev.fgdatamap; - -import org.apache.carbondata.core.datamap.DataMapType; -import org.apache.carbondata.core.datamap.dev.IndexDataMapFactory; - -/** - * 1. Any filter query which hits the table with datamap will call prune method of FGdatamap. - * 2. The prune method of FGDatamap return list FineGrainBlocklet , these blocklets contain the - * information of block, blocklet, page and rowids information as well. - * 3. The pruned blocklets are internally wriitten to file and returns only the block , - * blocklet and filepath information as part of Splits. - * 4. Based on the splits scanrdd schedule the tasks. - * 5. In filterscanner we check the datamapwriterpath from split and reNoteads the - * bitset if exists. And pass this bitset as input to it. - */ -public abstract class AbstractFineGrainIndexDataMapFactory - implements IndexDataMapFactory<AbstractFineGrainIndexDataMap> { - - @Override public DataMapType getDataMapType() { - return DataMapType.FG; - } -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainBlocklet.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainBlocklet.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainBlocklet.java new file mode 100644 index 0000000..b42500f --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainBlocklet.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap.dev.fgdatamap; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; + +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.metadata.schema.table.Writable; +import org.apache.carbondata.core.util.BitSetGroup; + +/** + * Used for returning matched rows after pruned by {@link FineGrainDataMap} + */ +@InterfaceAudience.Developer("DataMap") +@InterfaceStability.Evolving +public class FineGrainBlocklet extends Blocklet implements Serializable { + + private List<Page> pages; + + public FineGrainBlocklet(String blockId, String blockletId, List<Page> pages) { + super(blockId, blockletId); + this.pages = pages; + } + + // For serialization purpose + public FineGrainBlocklet() { + + } + + public List<Page> getPages() { + return pages; + } + + public static class Page implements Writable,Serializable { + + private int pageId; + + private int[] rowId; + + public BitSet getBitSet() { + BitSet bitSet = + new BitSet(CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT); + for (int row : rowId) { + bitSet.set(row); + } + return bitSet; + } + + @Override public void write(DataOutput out) throws IOException { + out.writeInt(pageId); + out.writeInt(rowId.length); + for (int i = 0; i < rowId.length; i++) { + out.writeInt(rowId[i]); + } + } + + @Override public void readFields(DataInput in) throws IOException { + pageId = in.readInt(); + int length = in.readInt(); + rowId = new int[length]; + for (int i = 0; i < length; i++) { + rowId[i] = in.readInt(); + } + } + + public void setPageId(int pageId) { + this.pageId = pageId; + } + + public void setRowId(int[] rowId) { + this.rowId = rowId; + } + } + + public BitSetGroup getBitSetGroup(int numberOfPages) { + BitSetGroup bitSetGroup = new BitSetGroup(numberOfPages); + for (int i = 0; i < pages.size(); i++) { + bitSetGroup.setBitSet(pages.get(i).getBitSet(), pages.get(i).pageId); + } + return bitSetGroup; + } + + @Override public void write(DataOutput out) throws IOException { + super.write(out); + int size = pages.size(); + out.writeInt(size); + for (Page page : pages) { + page.write(out); + } + } + + @Override public void readFields(DataInput in) throws IOException { + super.readFields(in); + int size = in.readInt(); + pages = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + Page page = new Page(); + page.readFields(in); + pages.add(page); + } + } + + @Override public boolean equals(Object o) { + return super.equals(o); + } + + @Override public int hashCode() { + return super.hashCode(); + } +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainDataMap.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainDataMap.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainDataMap.java new file mode 100644 index 0000000..18389b2 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainDataMap.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap.dev.fgdatamap; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.core.datamap.dev.DataMap; + +/** + * DataMap for Fine Grain level, see {@link org.apache.carbondata.core.datamap.DataMapLevel#FG} + */ +@InterfaceAudience.Developer("DataMap") +@InterfaceStability.Evolving +public abstract class FineGrainDataMap implements DataMap<FineGrainBlocklet> { + +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainDataMapFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainDataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainDataMapFactory.java new file mode 100644 index 0000000..0c9aaed --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/fgdatamap/FineGrainDataMapFactory.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.datamap.dev.fgdatamap; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.core.datamap.DataMapLevel; +import org.apache.carbondata.core.datamap.dev.DataMapFactory; + +/** + * Factory for {@link FineGrainDataMap} + * + * 1. Any filter query which hits the table with datamap will call prune method of FGdatamap. + * 2. The prune method of FGDatamap return list FineGrainBlocklet , these blocklets contain the + * information of block, blocklet, page and rowids information as well. + * 3. The pruned blocklets are internally wriitten to file and returns only the block , + * blocklet and filepath information as part of Splits. + * 4. Based on the splits scanrdd schedule the tasks. + * 5. In filterscanner we check the datamapwriterpath from split and reNoteads the + * bitset if exists. And pass this bitset as input to it. + */ +@InterfaceAudience.Developer("DataMap") +@InterfaceStability.Evolving +public abstract class FineGrainDataMapFactory + implements DataMapFactory<FineGrainDataMap> { + + @Override public DataMapLevel getDataMapType() { + return DataMapLevel.FG; + } +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef3031d0/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java index c175378..b4e7d1b 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java @@ -29,8 +29,8 @@ import org.apache.carbondata.core.cache.Cache; import org.apache.carbondata.core.cache.CarbonLRUCache; import org.apache.carbondata.core.datastore.filesystem.CarbonFile; import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap; import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapModel; -import org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexDataMap; import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.metadata.PartitionMapFileStore; @@ -41,7 +41,7 @@ import org.apache.carbondata.core.util.path.CarbonTablePath; * blocks */ public class BlockletDataMapIndexStore - implements Cache<TableBlockIndexUniqueIdentifier, BlockletIndexDataMap> { + implements Cache<TableBlockIndexUniqueIdentifier, BlockletDataMap> { private static final LogService LOGGER = LogServiceFactory.getLogService(BlockletDataMapIndexStore.class.getName()); /** @@ -68,10 +68,10 @@ public class BlockletDataMapIndexStore } @Override - public BlockletIndexDataMap get(TableBlockIndexUniqueIdentifier identifier) + public BlockletDataMap get(TableBlockIndexUniqueIdentifier identifier) throws IOException { String lruCacheKey = identifier.getUniqueTableSegmentIdentifier(); - BlockletIndexDataMap dataMap = (BlockletIndexDataMap) lruCache.get(lruCacheKey); + BlockletDataMap dataMap = (BlockletDataMap) lruCache.get(lruCacheKey); if (dataMap == null) { try { String segmentPath = CarbonTablePath.getSegmentPath( @@ -99,15 +99,15 @@ public class BlockletDataMapIndexStore } @Override - public List<BlockletIndexDataMap> getAll( + public List<BlockletDataMap> getAll( List<TableBlockIndexUniqueIdentifier> tableSegmentUniqueIdentifiers) throws IOException { - List<BlockletIndexDataMap> blockletDataMaps = + List<BlockletDataMap> blockletDataMaps = new ArrayList<>(tableSegmentUniqueIdentifiers.size()); List<TableBlockIndexUniqueIdentifier> missedIdentifiers = new ArrayList<>(); // Get the datamaps for each indexfile from cache. try { for (TableBlockIndexUniqueIdentifier identifier : tableSegmentUniqueIdentifiers) { - BlockletIndexDataMap ifPresent = getIfPresent(identifier); + BlockletDataMap ifPresent = getIfPresent(identifier); if (ifPresent != null) { blockletDataMaps.add(ifPresent); } else { @@ -146,7 +146,7 @@ public class BlockletDataMapIndexStore } } } catch (Throwable e) { - for (BlockletIndexDataMap dataMap : blockletDataMaps) { + for (BlockletDataMap dataMap : blockletDataMaps) { dataMap.clear(); } throw new IOException("Problem in loading segment blocks.", e); @@ -161,9 +161,9 @@ public class BlockletDataMapIndexStore * @return */ @Override - public BlockletIndexDataMap getIfPresent( + public BlockletDataMap getIfPresent( TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier) { - return (BlockletIndexDataMap) lruCache.get( + return (BlockletDataMap) lruCache.get( tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier()); } @@ -186,7 +186,7 @@ public class BlockletDataMapIndexStore * @return map of taks id to segment mapping * @throws IOException */ - private BlockletIndexDataMap loadAndGetDataMap( + private BlockletDataMap loadAndGetDataMap( TableBlockIndexUniqueIdentifier identifier, SegmentIndexFileStore indexFileStore, PartitionMapFileStore partitionFileStore, @@ -198,9 +198,9 @@ public class BlockletDataMapIndexStore if (lock == null) { lock = addAndGetSegmentLock(uniqueTableSegmentIdentifier); } - BlockletIndexDataMap dataMap; + BlockletDataMap dataMap; synchronized (lock) { - dataMap = new BlockletIndexDataMap(); + dataMap = new BlockletDataMap(); dataMap.init(new BlockletDataMapModel(identifier.getFilePath(), indexFileStore.getFileData(identifier.getCarbonIndexFileName()), partitionFileStore.getPartitions(identifier.getCarbonIndexFileName()), @@ -237,8 +237,8 @@ public class BlockletDataMapIndexStore public void clearAccessCount( List<TableBlockIndexUniqueIdentifier> tableSegmentUniqueIdentifiers) { for (TableBlockIndexUniqueIdentifier identifier : tableSegmentUniqueIdentifiers) { - BlockletIndexDataMap cacheable = - (BlockletIndexDataMap) lruCache.get(identifier.getUniqueTableSegmentIdentifier()); + BlockletDataMap cacheable = + (BlockletDataMap) lruCache.get(identifier.getUniqueTableSegmentIdentifier()); cacheable.clear(); } }