KYLIN-2330 fix CubeDesc returning redundant DeriveInfo
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/4d35edd7 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/4d35edd7 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/4d35edd7 Branch: refs/heads/yang22-cdh5.7 Commit: 4d35edd7b47af65e706465a7fe7c9f7ddd83fd9d Parents: 17e323e Author: Yang Li <liy...@apache.org> Authored: Thu Dec 29 07:13:12 2016 +0800 Committer: Yang Li <liy...@apache.org> Committed: Thu Dec 29 07:13:24 2016 +0800 ---------------------------------------------------------------------- .../org/apache/kylin/cube/model/CubeDesc.java | 46 +++++++++++++---- .../org/apache/kylin/cube/CubeDescTest.java | 53 ++++++++++++++++++-- .../test_case_data/localmeta/cube_desc/ssb.json | 12 ++++- 3 files changed, 95 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/4d35edd7/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java index 853571c..219435e 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java @@ -789,25 +789,49 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware { for (int i = 0; i < derivedCols.length; i++) { if (ArrayUtils.contains(hostCols, derivedCols[i])) { derivedCols = (TblColRef[]) ArrayUtils.remove(derivedCols, i); - extra = (String[]) ArrayUtils.remove(extra, i); + if (extra != null) + extra = (String[]) ArrayUtils.remove(extra, i); i--; } } + + if (derivedCols.length == 0) + return; - Map<TblColRef, DeriveInfo> toHostMap = derivedToHostMap; - Map<Array<TblColRef>, List<DeriveInfo>> hostToMap = hostToDerivedMap; + for (int i = 0; i < derivedCols.length; i++) { + TblColRef derivedCol = derivedCols[i]; + boolean isOneToOne = type == DeriveType.PK_FK || ArrayUtils.contains(hostCols, derivedCol) || (extra != null && extra[i].contains("1-1")); + derivedToHostMap.put(derivedCol, new DeriveInfo(type, dimension, hostCols, isOneToOne)); + } Array<TblColRef> hostColArray = new Array<TblColRef>(hostCols); - List<DeriveInfo> infoList = hostToMap.get(hostColArray); + List<DeriveInfo> infoList = hostToDerivedMap.get(hostColArray); if (infoList == null) { - hostToMap.put(hostColArray, infoList = new ArrayList<DeriveInfo>()); + hostToDerivedMap.put(hostColArray, infoList = new ArrayList<DeriveInfo>()); } - infoList.add(new DeriveInfo(type, dimension, derivedCols, false)); - - for (int i = 0; i < derivedCols.length; i++) { - TblColRef derivedCol = derivedCols[i]; - boolean isOneToOne = type == DeriveType.PK_FK || ArrayUtils.contains(hostCols, derivedCol) || (extra != null && extra[i].contains("1-1")); - toHostMap.put(derivedCol, new DeriveInfo(type, dimension, hostCols, isOneToOne)); + + // Merged duplicated derived column + List<TblColRef> whatsLeft = new ArrayList<>(); + for (TblColRef derCol : derivedCols) { + boolean merged = false; + for (DeriveInfo existing : infoList) { + if (existing.type == type && existing.dimension.getTableRef().equals(dimension.getTableRef())) { + if (ArrayUtils.contains(existing.columns, derCol)) { + merged = true; + break; + } + if (type == DeriveType.LOOKUP) { + existing.columns = (TblColRef[]) ArrayUtils.add(existing.columns, derCol); + merged = true; + break; + } + } + } + if (!merged) + whatsLeft.add(derCol); + } + if (whatsLeft.size() > 0) { + infoList.add(new DeriveInfo(type, dimension, (TblColRef[]) whatsLeft.toArray(new TblColRef[whatsLeft.size()]), false)); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/4d35edd7/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java index 27c154b..c192da0 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java @@ -18,19 +18,29 @@ package org.apache.kylin.cube; +import static org.junit.Assert.assertEquals; + import java.io.File; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; +import org.apache.kylin.common.util.Array; import org.apache.kylin.common.util.JsonUtil; import org.apache.kylin.common.util.LocalFileMetadataTestCase; +import org.apache.kylin.common.util.Pair; import org.apache.kylin.cube.model.AggregationGroup; import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.cube.model.CubeDesc.DeriveInfo; +import org.apache.kylin.cube.model.CubeDesc.DeriveType; import org.apache.kylin.cube.model.SelectRule; +import org.apache.kylin.metadata.model.TblColRef; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -46,7 +56,7 @@ import com.google.common.collect.Maps; public class CubeDescTest extends LocalFileMetadataTestCase { private static final String CUBE_WITH_SLR_DESC = "test_kylin_cube_with_slr_desc"; - + private String SELLER_ID; private String SLR_SEGMENT_CD; private String LSTG_FORMAT_NAME; @@ -55,14 +65,14 @@ public class CubeDescTest extends LocalFileMetadataTestCase { private String CATEG_LVL2_NAME; private String CATEG_LVL3_NAME; private String LEAF_CATEG_ID; - + @Rule public ExpectedException thrown = ExpectedException.none(); @Before public void setUp() throws Exception { this.createTestMetadata(); - + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc(CUBE_WITH_SLR_DESC); AggregationGroup g = cubeDesc.getAggregationGroups().get(0); SELLER_ID = getColInAggrGroup(g, "SELLER_ID"); @@ -230,7 +240,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase { } @Test - public void testCombinationIntOverflow() throws Exception { + public void testCombinationIntOverflow() throws Exception { for (File f : new File(LocalFileMetadataTestCase.LOCALMETA_TEMP_DATA, "cube_desc").listFiles()) { if (f.getName().endsWith(".bad")) { String path = f.getPath(); @@ -273,7 +283,42 @@ public class CubeDescTest extends LocalFileMetadataTestCase { Map<?, ?> map2 = JsonUtil.readValue(mapStr, HashMap.class); Assert.assertEquals(map, map2); + } + @Test + public void testDerivedInfo() { + { + CubeDesc cube = CubeDescManager.getInstance(getTestConfig()).getCubeDesc(CUBE_WITH_SLR_DESC); + List<TblColRef> givenCols = new ArrayList<>(); + givenCols.add(cube.findColumnRef("TEST_KYLIN_FACT", "LSTG_SITE_ID")); + givenCols.add(cube.findColumnRef("TEST_KYLIN_FACT", "LEAF_CATEG_ID")); + Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedInfo = cube.getHostToDerivedInfo(givenCols, null); + assertEquals(3, hostToDerivedInfo.size()); + assertEquals(Pair.newPair(3, 2), countDerivedInfo(hostToDerivedInfo)); + } + + { + CubeDesc cube = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("ssb"); + List<TblColRef> givenCols = new ArrayList<>(); + givenCols.add(cube.findColumnRef("V_LINEORDER", "LO_PARTKEY")); + Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedInfo = cube.getHostToDerivedInfo(givenCols, null); + assertEquals(1, hostToDerivedInfo.size()); + assertEquals(Pair.newPair(1, 1), countDerivedInfo(hostToDerivedInfo)); + } + } + + private Pair<Integer, Integer> countDerivedInfo(Map<Array<TblColRef>, List<DeriveInfo>> hostToDerivedInfo) { + int pkfkCount = 0; + int lookupCount = 0; + for (Entry<Array<TblColRef>, List<DeriveInfo>> entry : hostToDerivedInfo.entrySet()) { + for (DeriveInfo deriveInfo : entry.getValue()) { + if (deriveInfo.type == DeriveType.PK_FK) + pkfkCount++; + if (deriveInfo.type == DeriveType.LOOKUP) + lookupCount++; + } + } + return Pair.newPair(pkfkCount, lookupCount); } private Collection<String> sortStrs(String[] strs) { http://git-wip-us.apache.org/repos/asf/kylin/blob/4d35edd7/examples/test_case_data/localmeta/cube_desc/ssb.json ---------------------------------------------------------------------- diff --git a/examples/test_case_data/localmeta/cube_desc/ssb.json b/examples/test_case_data/localmeta/cube_desc/ssb.json index d3ea10b..a13ac53 100644 --- a/examples/test_case_data/localmeta/cube_desc/ssb.json +++ b/examples/test_case_data/localmeta/cube_desc/ssb.json @@ -6,7 +6,17 @@ "name" : "SSB.PART_DERIVED", "table" : "SSB.PART", "column" : null, - "derived" : [ "P_MFGR", "P_CATEGORY", "P_BRAND" ] + "derived" : [ "P_MFGR" ] + }, { + "name" : "SSB.PART_DERIVED", + "table" : "SSB.PART", + "column" : null, + "derived" : [ "P_CATEGORY" ] + }, { + "name" : "SSB.PART_DERIVED", + "table" : "SSB.PART", + "column" : null, + "derived" : [ "P_BRAND" ] }, { "name" : "C_CITY", "table" : "SSB.CUSTOMER",