[CARBONDATA-2769] Fix bug when getting shard name from data before version 1.4
datamap creation needs shardname. carbon imported segment id in carbondata file since version 1.4. we should return proper shardName when building datamap on data before version 1.4, or it would cause problem when doing query. This closes #2538 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/edce8a8c Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/edce8a8c Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/edce8a8c Branch: refs/heads/branch-1.4 Commit: edce8a8c1a7226133bb57484f66594df6f1d8463 Parents: 700ad4b Author: Manhua <kevin...@qq.com> Authored: Mon Jul 23 12:01:34 2018 +0800 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Tue Jul 31 00:11:26 2018 +0530 ---------------------------------------------------------------------- .../core/util/path/CarbonTablePath.java | 22 +++++++++++++++++--- .../CarbonFormatDirectoryStructureTest.java | 8 +++++++ 2 files changed, 27 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/edce8a8c/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java b/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java index c88e248..275d3d6 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java +++ b/core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java @@ -661,13 +661,29 @@ public class CarbonTablePath { /** * Unique task name + * + * A shard name is composed by `TaskNo-BucketNo-SegmentNo-Timestamp` + * As for data before version 1.4, shard name was `TaskNo-BucketNo-Timestamp` + * * @param actualBlockName * @return */ public static String getShardName(String actualBlockName) { - return DataFileUtil.getTaskNo(actualBlockName) + "-" + DataFileUtil.getBucketNo(actualBlockName) - + "-" + DataFileUtil.getSegmentNo(actualBlockName) + "-" + DataFileUtil - .getTimeStampFromFileName(actualBlockName); + String segmentNoStr = DataFileUtil.getSegmentNo(actualBlockName); + StringBuilder shardName = new StringBuilder(); + if (null != segmentNoStr) { + shardName.append(DataFileUtil.getTaskNo(actualBlockName)).append("-"); + shardName.append(DataFileUtil.getBucketNo(actualBlockName)).append("-"); + shardName.append(segmentNoStr).append("-"); + shardName.append(DataFileUtil.getTimeStampFromFileName(actualBlockName)); + return shardName.toString(); + } else { + // data before version 1.4 does not have SegmentNo in carbondata filename + shardName.append(DataFileUtil.getTaskNo(actualBlockName)).append("-"); + shardName.append(DataFileUtil.getBucketNo(actualBlockName)).append("-"); + shardName.append(DataFileUtil.getTimeStampFromFileName(actualBlockName)); + return shardName.toString(); + } } /** http://git-wip-us.apache.org/repos/asf/carbondata/blob/edce8a8c/core/src/test/java/org/apache/carbondata/core/util/path/CarbonFormatDirectoryStructureTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/carbondata/core/util/path/CarbonFormatDirectoryStructureTest.java b/core/src/test/java/org/apache/carbondata/core/util/path/CarbonFormatDirectoryStructureTest.java index e52c737..340ac02 100644 --- a/core/src/test/java/org/apache/carbondata/core/util/path/CarbonFormatDirectoryStructureTest.java +++ b/core/src/test/java/org/apache/carbondata/core/util/path/CarbonFormatDirectoryStructureTest.java @@ -50,4 +50,12 @@ public class CarbonFormatDirectoryStructureTest { assertTrue(CarbonTablePath.DataFileUtil.getSegmentNo("part-3-4-0-999.carbondata") == null); assertTrue(CarbonTablePath.DataFileUtil.getSegmentNo("part-3-4-0-0-999.carbondata").equals("0")); } + + @Test public void testGetShardName() throws IOException { + assertTrue(CarbonTablePath.getShardName("part-1-2_batchno3-4-5-999.carbondata").equals("2_batchno3-4-5-999")); + + // check compatible for data generated before carbon version 1.4 which does not have segment id in filename + assertTrue(CarbonTablePath.getShardName("part-1-2_batchno3-4-999.carbondata").equals("2_batchno3-4-999")); + + } }