This is an automated email from the ASF dual-hosted git repository. ravipesala pushed a commit to branch branch-1.6 in repository https://gitbox.apache.org/repos/asf/carbondata.git
commit 57309d70d08675c31975d2a60692835e7a6c22cf Author: Manhua <kevin...@qq.com> AuthorDate: Wed Jul 17 17:39:29 2019 +0800 [CARBONDATA-3473] Fix data size calcution of the last column in CarbonCli When update last column chunk data size, current code use columnDataSize.add(fileSizeInBytes - footerSizeInBytes - previousChunkOffset) for every blocklet. This leads to wrong result for calculting the data size of the last column, especially when a carbon data file has multiple blocklet. In this PR, we fix this problem and modify the calcultion by remarking the end offset of blocklet. This closes #3330 --- .../java/org/apache/carbondata/tool/DataFile.java | 32 +++++++++++----------- .../org/apache/carbondata/tool/CarbonCliTest.java | 6 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java index e553a78..4ed3945 100644 --- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java +++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java @@ -121,16 +121,21 @@ class DataFile { this.partNo = CarbonTablePath.DataFileUtil.getPartNo(fileName); // calculate blocklet size and column size - // first calculate the header size, it equals the offset of first - // column chunk in first blocklet - long headerSizeInBytes = footer.blocklet_info_list3.get(0).column_data_chunks_offsets.get(0); - long previousOffset = headerSizeInBytes; - for (BlockletInfo3 blockletInfo3 : footer.blocklet_info_list3) { + for (int j = 0; j < footer.getBlocklet_info_list3().size(); j++) { + // remark start and end offset of current blocklet for computing blocklet size + // and chunk data size of the last column + BlockletInfo3 blockletInfo3 = footer.blocklet_info_list3.get(j); + long blockletEndOffset; + if (j != footer.getBlocklet_info_list3().size() - 1) { + // use start offset of next blocklet as end offset of current blocklet + blockletEndOffset = footer.blocklet_info_list3.get(j + 1).column_data_chunks_offsets.get(j); + } else { + // use start offset of footer as end offset of current blocklet if it is the last blocklet + blockletEndOffset = fileSizeInBytes - footerSizeInBytes; + } // calculate blocklet size in bytes - long blockletOffset = blockletInfo3.column_data_chunks_offsets.get(0); - blockletSizeInBytes.add(blockletOffset - previousOffset); - previousOffset = blockletOffset; - + this.blockletSizeInBytes.add( + blockletEndOffset - blockletInfo3.column_data_chunks_offsets.get(0)); // calculate column size in bytes for each column LinkedList<Long> columnDataSize = new LinkedList<>(); LinkedList<Long> columnMetaSize = new LinkedList<>(); @@ -140,17 +145,12 @@ class DataFile { columnMetaSize.add(blockletInfo3.column_data_chunks_length.get(i).longValue()); previousChunkOffset = blockletInfo3.column_data_chunks_offsets.get(i); } - // last column chunk data size - columnDataSize.add(fileSizeInBytes - footerSizeInBytes - previousChunkOffset); + // update chunk data size of the last column + columnDataSize.add(blockletEndOffset - previousChunkOffset); columnDataSize.removeFirst(); this.columnDataSizeInBytes.add(columnDataSize); this.columnMetaSizeInBytes.add(columnMetaSize); - } - // last blocklet size - blockletSizeInBytes.add( - fileSizeInBytes - footerSizeInBytes - headerSizeInBytes - previousOffset); - this.blockletSizeInBytes.removeFirst(); assert (blockletSizeInBytes.size() == getNumBlocklets()); } diff --git a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java index af8d51d..4d89777 100644 --- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java +++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java @@ -234,11 +234,11 @@ public class CarbonCliTest { expectedOutput = buildLines( "BLK BLKLT Meta Size Data Size LocalDict DictEntries DictSize AvgPageSize Min% Max% Min Max " , - "0 0 3.36KB 5.14MB false 0 0.0B 93.76KB 0.0 100.0 0 2999990 " , + "0 0 3.36KB 2.57MB false 0 0.0B 93.76KB 0.0 100.0 0 2999990 " , "0 1 3.36KB 2.57MB false 0 0.0B 93.76KB 0.0 100.0 1 2999992 " , - "1 0 3.36KB 5.14MB false 0 0.0B 93.76KB 0.0 100.0 3 2999994 " , + "1 0 3.36KB 2.57MB false 0 0.0B 93.76KB 0.0 100.0 3 2999994 " , "1 1 3.36KB 2.57MB false 0 0.0B 93.76KB 0.0 100.0 5 2999996 " , - "2 0 3.36KB 4.06MB false 0 0.0B 93.76KB 0.0 100.0 7 2999998 " , + "2 0 3.36KB 2.57MB false 0 0.0B 93.76KB 0.0 100.0 7 2999998 " , "2 1 2.04KB 1.49MB false 0 0.0B 89.62KB 0.0 100.0 9 2999999 "); Assert.assertTrue(output.contains(expectedOutput)); Assert.assertTrue(output.contains("## version Details"));