Repository: incubator-carbondata Updated Branches: refs/heads/master 5fb1ec088 -> 1f8e83f49
Adding Header And Making Footer Optional Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/b41e48f1 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/b41e48f1 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/b41e48f1 Branch: refs/heads/master Commit: b41e48f1a41394eb0307ecf7308b3422029e7720 Parents: 5fb1ec0 Author: kumarvishal <kumarvishal.1...@gmail.com> Authored: Tue Mar 7 20:54:13 2017 +0800 Committer: jackylk <jacky.li...@huawei.com> Committed: Fri Mar 10 20:27:54 2017 +0800 ---------------------------------------------------------------------- .../carbondata/core/datastore/DataRefNode.java | 6 ++ ...mpressedDimensionChunkFileBasedReaderV3.java | 2 + ...CompressedMeasureChunkFileBasedReaderV3.java | 2 + .../datastore/impl/btree/BTreeNonLeafNode.java | 10 +++ .../impl/btree/BlockBTreeLeafNode.java | 9 +++ .../impl/btree/BlockletBTreeLeafNode.java | 13 ++++ .../core/metadata/blocklet/BlockletInfo.java | 20 ++++++ .../core/reader/CarbonFooterReaderV3.java | 75 ++++++++++++++++++++ .../core/reader/CarbonHeaderReader.java | 68 ++++++++++++++++++ .../core/util/CarbonMetadataUtil.java | 41 +++++++---- .../core/util/DataFileFooterConverterV3.java | 29 ++++---- format/src/main/thrift/carbondata.thrift | 18 ++++- format/src/main/thrift/carbondata_index.thrift | 2 +- pom.xml | 2 +- .../writer/v3/CarbonFactDataWriterImplV3.java | 40 ++++++----- 15 files changed, 291 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java b/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java index 456710a..8914196 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java @@ -122,4 +122,10 @@ public interface DataRefNode { * @return */ BlockletLevelDeleteDeltaDataCache getDeleteDeltaDataCache(); + + /** + * number of pages in blocklet + * @return + */ + int numberOfPages(); } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimensionChunkFileBasedReaderV3.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimensionChunkFileBasedReaderV3.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimensionChunkFileBasedReaderV3.java index acaa2fa..79b75e4 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimensionChunkFileBasedReaderV3.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimensionChunkFileBasedReaderV3.java @@ -40,10 +40,12 @@ import org.apache.commons.lang.ArrayUtils; * V3 format data * data format * Data Format + * <FileHeader> * <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>> * <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>> * <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>> * <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>> + * <File Footer> */ public class CompressedDimensionChunkFileBasedReaderV3 extends AbstractChunkReaderV2V3Format { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMeasureChunkFileBasedReaderV3.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMeasureChunkFileBasedReaderV3.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMeasureChunkFileBasedReaderV3.java index 307af41..58b2937 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMeasureChunkFileBasedReaderV3.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMeasureChunkFileBasedReaderV3.java @@ -41,10 +41,12 @@ import org.apache.commons.lang.ArrayUtils; * V3 format data * data format * Data Format + * <FileHeader> * <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>> * <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>> * <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>> * <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>> + * <File Footer> */ public class CompressedMeasureChunkFileBasedReaderV3 extends AbstractMeasureChunkReaderV2V3Format { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BTreeNonLeafNode.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BTreeNonLeafNode.java b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BTreeNonLeafNode.java index 8e5976d..01c0177 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BTreeNonLeafNode.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BTreeNonLeafNode.java @@ -244,4 +244,14 @@ public class BTreeNonLeafNode implements BTreeNode { public BlockletLevelDeleteDeltaDataCache getDeleteDeltaDataCache() { return deleteDeltaDataCache; } + + /** + * number of pages in blocklet + * @return + */ + @Override + public int numberOfPages() { + // TODO Auto-generated method stub + throw new UnsupportedOperationException("Unsupported operation"); + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockBTreeLeafNode.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockBTreeLeafNode.java b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockBTreeLeafNode.java index 0a2a343..f248ce0 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockBTreeLeafNode.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockBTreeLeafNode.java @@ -59,4 +59,13 @@ public class BlockBTreeLeafNode extends AbstractBTreeLeafNode { return blockInfo.getTableBlockInfo(); } + /** + * number of pages in blocklet + * @return + */ + @Override + public int numberOfPages() { + throw new UnsupportedOperationException("Unsupported operation"); + } + } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockletBTreeLeafNode.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockletBTreeLeafNode.java b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockletBTreeLeafNode.java index 00a0e01..9a123d1 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockletBTreeLeafNode.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/impl/btree/BlockletBTreeLeafNode.java @@ -43,6 +43,11 @@ public class BlockletBTreeLeafNode extends AbstractBTreeLeafNode { private MeasureColumnChunkReader measureColumnChunkReader; /** + * number of pages in blocklet + */ + private int numberOfPages; + + /** * Create a leaf node * * @param builderInfos builder infos which have required metadata to create a leaf node @@ -124,4 +129,12 @@ public class BlockletBTreeLeafNode extends AbstractBTreeLeafNode { throws IOException { return measureColumnChunkReader.readRawMeasureChunk(fileReader, blockIndex); } + + /** + * @return the number of pages in blocklet + */ + @Override + public int numberOfPages() { + return numberOfPages; + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java index 97f3822..bfa9d7e 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java @@ -61,11 +61,23 @@ public class BlockletInfo implements Serializable { */ private BlockletIndex blockletIndex; + /** + * last dimension end offset + */ private long dimensionOffset; + /** + * last measure end offsets + */ private long measureOffsets; /** + * number of pages in blocklet + * default value is one for V1 and V2 version + */ + private int numberOfPages = 1; + + /** * @return the numberOfRows */ public int getNumberOfRows() { @@ -169,4 +181,12 @@ public class BlockletInfo implements Serializable { this.measureOffsets = measureOffsets; } + public int getNumberOfPages() { + return numberOfPages; + } + + public void setNumberOfPages(int numberOfPages) { + this.numberOfPages = numberOfPages; + } + } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/reader/CarbonFooterReaderV3.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonFooterReaderV3.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonFooterReaderV3.java new file mode 100644 index 0000000..8171e12 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonFooterReaderV3.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.reader; + +import java.io.IOException; + +import org.apache.carbondata.format.FileFooter3; + +import org.apache.thrift.TBase; + +/** + * Below class to read file footer of version3 + * carbon data file + */ +public class CarbonFooterReaderV3 { + + //Fact file path + private String filePath; + + //start offset of the file footer + private long footerOffset; + + public CarbonFooterReaderV3(String filePath, long offset) { + this.filePath = filePath; + this.footerOffset = offset; + } + + /** + * It reads the metadata in FileFooter thrift object format. + * + * @return + * @throws IOException + */ + public FileFooter3 readFooterVersion3() throws IOException { + ThriftReader thriftReader = openThriftReader(filePath); + thriftReader.open(); + //Set the offset from where it should read + thriftReader.setReadOffset(footerOffset); + FileFooter3 footer = (FileFooter3) thriftReader.read(); + thriftReader.close(); + return footer; + } + + /** + * Open the thrift reader + * + * @param filePath + * @return + * @throws IOException + */ + private ThriftReader openThriftReader(String filePath) { + + return new ThriftReader(filePath, new ThriftReader.TBaseCreator() { + @Override public TBase create() { + return new FileFooter3(); + } + }); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java new file mode 100644 index 0000000..ab557cc --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonHeaderReader.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.reader; + +import java.io.IOException; + +import org.apache.carbondata.format.FileHeader; + +import org.apache.thrift.TBase; + +/** + * Below class to read file header of version3 + * carbon data file + */ +public class CarbonHeaderReader { + + //Fact file path + private String filePath; + + public CarbonHeaderReader(String filePath) { + this.filePath = filePath; + } + + /** + * It reads the metadata in FileFooter thrift object format. + * + * @return + * @throws IOException + */ + public FileHeader readHeader() throws IOException { + ThriftReader thriftReader = openThriftReader(filePath); + thriftReader.open(); + FileHeader header = (FileHeader) thriftReader.read(); + thriftReader.close(); + return header; + } + + /** + * Open the thrift reader + * + * @param filePath + * @return + * @throws IOException + */ + private ThriftReader openThriftReader(String filePath) { + + return new ThriftReader(filePath, new ThriftReader.TBaseCreator() { + @Override public TBase create() { + return new FileHeader(); + } + }); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java index 371855b..5e227fb 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java @@ -54,6 +54,8 @@ import org.apache.carbondata.format.DataChunk2; import org.apache.carbondata.format.DataChunk3; import org.apache.carbondata.format.Encoding; import org.apache.carbondata.format.FileFooter; +import org.apache.carbondata.format.FileFooter3; +import org.apache.carbondata.format.FileHeader; import org.apache.carbondata.format.IndexHeader; import org.apache.carbondata.format.PresenceMeta; import org.apache.carbondata.format.SegmentInfo; @@ -114,17 +116,18 @@ public class CarbonMetadataUtil { } /** - * It converts list of BlockletInfoColumnar to FileFooter thrift objects + * Below method prepares the file footer object for carbon data file version 3 * * @param infoList * @param numCols * @param cardinalities * @return FileFooter */ - public static FileFooter convertFileFooter3(List<BlockletInfo3> infoList, - List<BlockletIndex> blockletIndexs, int[] cardinalities, List<ColumnSchema> columnSchemaList, + public static FileFooter3 convertFileFooterVersion3(List<BlockletInfo3> infoList, + List<BlockletIndex> blockletIndexs, int[] cardinalities, int numberOfColumns, SegmentProperties segmentProperties) throws IOException { - FileFooter footer = getFileFooter3(infoList, blockletIndexs, cardinalities, columnSchemaList); + FileFooter3 footer = + getFileFooter3(infoList, blockletIndexs, cardinalities, numberOfColumns); for (BlockletInfo3 info : infoList) { footer.addToBlocklet_info_list3(info); } @@ -139,18 +142,14 @@ public class CarbonMetadataUtil { * @param columnSchemaList column schema list * @return file footer */ - private static FileFooter getFileFooter3(List<BlockletInfo3> infoList, - List<BlockletIndex> blockletIndexs, int[] cardinalities, - List<ColumnSchema> columnSchemaList) { + private static FileFooter3 getFileFooter3(List<BlockletInfo3> infoList, + List<BlockletIndex> blockletIndexs, int[] cardinalities, int numberOfColumns) { SegmentInfo segmentInfo = new SegmentInfo(); - segmentInfo.setNum_cols(columnSchemaList.size()); + segmentInfo.setNum_cols(numberOfColumns); segmentInfo.setColumn_cardinalities(CarbonUtil.convertToIntegerList(cardinalities)); - ColumnarFormatVersion version = CarbonProperties.getInstance().getFormatVersion(); - FileFooter footer = new FileFooter(); - footer.setVersion(version.number()); + FileFooter3 footer = new FileFooter3(); footer.setNum_rows(getNumberOfRowForFooter(infoList)); footer.setSegment_info(segmentInfo); - footer.setTable_columns(columnSchemaList); for (BlockletIndex info : blockletIndexs) { footer.addToBlocklet_index_list(info); } @@ -851,7 +850,7 @@ public class CarbonMetadataUtil { b.flip(); return b.array(); case DECIMAL: - return DataTypeUtil.bigDecimalToByte((BigDecimal)data); + return DataTypeUtil.bigDecimalToByte((BigDecimal) data); default: throw new IllegalArgumentException("Invalid data type"); } @@ -887,4 +886,20 @@ public class CarbonMetadataUtil { } } + /** + * Below method will be used to prepare the file header object for carbondata file + * + * @param isFooterPresent is footer present in carbon data file + * @param columnSchemaList list of column schema + * @return file header thrift object + */ + public static FileHeader getFileHeader(boolean isFooterPresent, + List<ColumnSchema> columnSchemaList) { + FileHeader fileHeader = new FileHeader(); + ColumnarFormatVersion version = CarbonProperties.getInstance().getFormatVersion(); + fileHeader.setIs_footer_present(isFooterPresent); + fileHeader.setColumn_schema(columnSchemaList); + fileHeader.setVersion(version.number()); + return fileHeader; + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java index 1ab3133..3fcf427 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java @@ -26,18 +26,21 @@ import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex; import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; -import org.apache.carbondata.core.reader.CarbonFooterReader; -import org.apache.carbondata.format.FileFooter; +import org.apache.carbondata.core.reader.CarbonFooterReaderV3; +import org.apache.carbondata.core.reader.CarbonHeaderReader; +import org.apache.carbondata.format.FileFooter3; +import org.apache.carbondata.format.FileHeader; public class DataFileFooterConverterV3 extends AbstractDataFileFooterConverter { /** * Below method will be used to convert thrift file meta to wrapper file meta * This method will read the footer from footer offset present in the data file - * 1. It will set the stream offset - * 2. It will read the footer data from file - * 3. parse the footer to thrift object - * 4. convert to wrapper object + * 1. It will read the header from carbon data file, header starts from 0 offset + * 2. It will set the stream offset + * 3. It will read the footer data from file + * 4. parse the footer to thrift object + * 5. convert to wrapper object * * @param tableBlockInfo * table block info @@ -46,19 +49,20 @@ public class DataFileFooterConverterV3 extends AbstractDataFileFooterConverter { @Override public DataFileFooter readDataFileFooter(TableBlockInfo tableBlockInfo) throws IOException { DataFileFooter dataFileFooter = new DataFileFooter(); - CarbonFooterReader reader = - new CarbonFooterReader(tableBlockInfo.getFilePath(), tableBlockInfo.getBlockOffset()); - FileFooter footer = reader.readFooter(); - dataFileFooter.setVersionId(ColumnarFormatVersion.valueOf((short) footer.getVersion())); + CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(tableBlockInfo.getFilePath()); + FileHeader fileHeader = carbonHeaderReader.readHeader(); + CarbonFooterReaderV3 reader = + new CarbonFooterReaderV3(tableBlockInfo.getFilePath(), tableBlockInfo.getBlockOffset()); + FileFooter3 footer = reader.readFooterVersion3(); + dataFileFooter.setVersionId(ColumnarFormatVersion.valueOf((short) fileHeader.getVersion())); dataFileFooter.setNumberOfRows(footer.getNum_rows()); dataFileFooter.setSegmentInfo(getSegmentInfo(footer.getSegment_info())); List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>(); - List<org.apache.carbondata.format.ColumnSchema> table_columns = footer.getTable_columns(); + List<org.apache.carbondata.format.ColumnSchema> table_columns = fileHeader.getColumn_schema(); for (int i = 0; i < table_columns.size(); i++) { columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i))); } dataFileFooter.setColumnInTable(columnSchemaList); - List<org.apache.carbondata.format.BlockletIndex> leaf_node_indices_Thrift = footer.getBlocklet_index_list(); List<BlockletIndex> blockletIndexList = new ArrayList<BlockletIndex>(); @@ -107,6 +111,7 @@ public class DataFileFooterConverterV3 extends AbstractDataFileFooterConverter { blockletInfo.setNumberOfRows(blockletInfoThrift.getNum_rows()); blockletInfo.setDimensionOffset(blockletInfoThrift.getDimension_offsets()); blockletInfo.setMeasureOffsets(blockletInfoThrift.getMeasure_offsets()); + blockletInfo.setNumberOfPages(blockletInfoThrift.getNumber_number_of_pages()); return blockletInfo; } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/format/src/main/thrift/carbondata.thrift ---------------------------------------------------------------------- diff --git a/format/src/main/thrift/carbondata.thrift b/format/src/main/thrift/carbondata.thrift index 3114ee1..198afde 100644 --- a/format/src/main/thrift/carbondata.thrift +++ b/format/src/main/thrift/carbondata.thrift @@ -166,6 +166,7 @@ struct BlockletInfo3{ 3: required list<i32> column_data_chunks_length; // Information about length all column chunks in this blocklet 4: required i64 dimension_offsets; 5: required i64 measure_offsets; + 6: required i32 number_number_of_pages; // this is rquired for alter table, in case of alter table when filter is only selected on new added column this will help } /** * Footer for indexed carbon file @@ -178,8 +179,18 @@ struct FileFooter{ 5: required list<BlockletIndex> blocklet_index_list; // blocklet index of all blocklets in this file 6: optional list<BlockletInfo> blocklet_info_list; // Information about blocklets of all columns in this file 7: optional list<BlockletInfo2> blocklet_info_list2; // Information about blocklets of all columns in this file - 8: optional list<BlockletInfo3> blocklet_info_list3; // Information about blocklets of all columns in this file - 9: optional dictionary.ColumnDictionaryChunk dictionary; // blocklet local dictionary + 8: optional dictionary.ColumnDictionaryChunk dictionary; // blocklet local dictionary +} + +/** +* Footer for indexed carbon file for V3 format +*/ +struct FileFooter3{ + 1: required i64 num_rows; // Total number of rows in this file + 2: required SegmentInfo segment_info; // Segment info (will be same/repeated for all files in this segment) + 3: required list<BlockletIndex> blocklet_index_list; // blocklet index of all blocklets in this file + 4: optional list<BlockletInfo3> blocklet_info_list3; // Information about blocklets of all columns in this file + 5: optional dictionary.ColumnDictionaryChunk dictionary; // blocklet local dictionary } /** @@ -187,7 +198,8 @@ struct FileFooter{ */ struct FileHeader{ 1: required i32 version; // version used for data compatibility - 2: required list<schema.ColumnSchema> table_columns; // Description of columns in this file + 2: required list<schema.ColumnSchema> column_schema; // Description of columns in this file + 3: optional bool is_footer_present; // to check whether footer is present or not } /** http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/format/src/main/thrift/carbondata_index.thrift ---------------------------------------------------------------------- diff --git a/format/src/main/thrift/carbondata_index.thrift b/format/src/main/thrift/carbondata_index.thrift index 364a7e5..ef6fb18 100644 --- a/format/src/main/thrift/carbondata_index.thrift +++ b/format/src/main/thrift/carbondata_index.thrift @@ -41,6 +41,6 @@ struct IndexHeader{ struct BlockIndex{ 1: required i64 num_rows; // Total number of rows in this file 2: required string file_name; // Block file name - 3: required i64 offset; // Offset of block + 3: required i64 offset; // Offset of the footer 4: required carbondata.BlockletIndex block_index; // Block index } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 95a3ff4..cf37b50 100644 --- a/pom.xml +++ b/pom.xml @@ -93,7 +93,7 @@ <modules> <module>common</module> - <module>core</module> + <module>core</module> <module>processing</module> <module>hadoop</module> <module>integration/spark-common</module> http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b41e48f1/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java index fa7bf27..49d7ab2 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java @@ -26,7 +26,6 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; import org.apache.carbondata.core.datastore.columnar.IndexStorage; import org.apache.carbondata.core.datastore.compression.WriterCompressModel; -import org.apache.carbondata.core.metadata.ColumnarFormatVersion; import org.apache.carbondata.core.metadata.blocklet.index.BlockletBTreeIndex; import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex; import org.apache.carbondata.core.metadata.index.BlockIndexInfo; @@ -35,9 +34,8 @@ import org.apache.carbondata.core.util.CarbonMetadataUtil; import org.apache.carbondata.core.util.CarbonProperties; import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.core.util.NodeHolder; -import org.apache.carbondata.core.writer.CarbonFooterWriter; import org.apache.carbondata.format.BlockletInfo3; -import org.apache.carbondata.format.FileFooter; +import org.apache.carbondata.format.FileFooter3; import org.apache.carbondata.processing.store.colgroup.ColGroupBlockStorage; import org.apache.carbondata.processing.store.writer.AbstractFactDataWriter; import org.apache.carbondata.processing.store.writer.CarbonDataWriterVo; @@ -45,6 +43,10 @@ import org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterE /** * Below class will be used to write the data in V3 format + * <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>> + * <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>> + * <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>> + * <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>> */ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter<short[]> { @@ -61,8 +63,8 @@ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter<short[]> public CarbonFactDataWriterImplV3(CarbonDataWriterVo dataWriterVo) { super(dataWriterVo); this.numberOfChunksInBlocklet = Integer.parseInt(CarbonProperties.getInstance() - .getProperty(CarbonV3DataFormatConstants.NUMBER_OF_PAGE_IN_BLOCKLET_COLUMN, - CarbonV3DataFormatConstants.NUMBER_OF_PAGE_IN_BLOCKLET_COLUMN_DEFAULT_VALUE)); + .getProperty(CarbonV3DataFormatConstants.NUMBER_OF_PAGE_IN_BLOCKLET_COLUMN, + CarbonV3DataFormatConstants.NUMBER_OF_PAGE_IN_BLOCKLET_COLUMN_DEFAULT_VALUE)); dataWriterHolder = new DataWriterHolder(); } @@ -239,15 +241,20 @@ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter<short[]> try { // get the current file position long currentPosition = channel.size(); - CarbonFooterWriter writer = new CarbonFooterWriter(filePath); // get thrift file footer instance - FileFooter convertFileMeta = CarbonMetadataUtil - .convertFileFooter3(blockletMetadata, blockletIndex, localCardinality, - thriftColumnSchemaList, dataWriterVo.getSegmentProperties()); + FileFooter3 convertFileMeta = CarbonMetadataUtil + .convertFileFooterVersion3(blockletMetadata, blockletIndex, localCardinality, + thriftColumnSchemaList.size(), dataWriterVo.getSegmentProperties()); // fill the carbon index details fillBlockIndexInfoDetails(convertFileMeta.getNum_rows(), filePath, currentPosition); // write the footer - writer.writeFooter(convertFileMeta, currentPosition); + byte[] byteArray = CarbonUtil.getByteArray(convertFileMeta); + ByteBuffer buffer = + ByteBuffer.allocate(byteArray.length + CarbonCommonConstants.LONG_SIZE_IN_BYTE); + buffer.put(byteArray); + buffer.putLong(currentPosition); + buffer.flip(); + channel.write(buffer); } catch (IOException e) { throw new CarbonDataWriterException("Problem while writing the carbon file: ", e); } @@ -319,11 +326,12 @@ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter<short[]> // write the header try { if (fileChannel.size() == 0) { - ColumnarFormatVersion version = CarbonProperties.getInstance().getFormatVersion(); - byte[] header = (CarbonCommonConstants.CARBON_DATA_VERSION_HEADER + version).getBytes(); - ByteBuffer buffer = ByteBuffer.allocate(header.length); - buffer.put(header); - buffer.rewind(); + // below code is to write the file header + byte[] fileHeader = + CarbonUtil.getByteArray(CarbonMetadataUtil.getFileHeader(true, thriftColumnSchemaList)); + ByteBuffer buffer = ByteBuffer.allocate(fileHeader.length); + buffer.put(fileHeader); + buffer.flip(); fileChannel.write(buffer); } offset = channel.size(); @@ -411,7 +419,7 @@ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter<short[]> .getBlockletIndex(nodeHolderList, dataWriterVo.getSegmentProperties().getMeasures())); BlockletInfo3 blockletInfo3 = new BlockletInfo3(numberOfRows, currentDataChunksOffset, currentDataChunksLength, - dimensionOffset, measureOffset); + dimensionOffset, measureOffset, dataWriterHolder.getNumberOfPagesAdded()); blockletMetadata.add(blockletInfo3); }