KYLIN-1405 add validation when init cube desc Signed-off-by: Li, Yang <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/64800a10 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/64800a10 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/64800a10 Branch: refs/heads/2.x-staging Commit: 64800a107b7c24bffe8e8803e85c0bdfd2b407b4 Parents: f621290 Author: Chen Luwei <[email protected]> Authored: Tue Jan 26 11:03:44 2016 +0800 Committer: Li, Yang <[email protected]> Committed: Sun Feb 14 15:28:44 2016 +0800 ---------------------------------------------------------------------- .../org/apache/kylin/cube/model/CubeDesc.java | 161 +++++++++++++++-- .../org/apache/kylin/cube/CubeDescTest.java | 172 ++++++++++++++++++- 2 files changed, 315 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/64800a10/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java index a1a0531..be5e4ee 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java @@ -20,21 +20,13 @@ package org.apache.kylin.cube.model; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.Map.Entry; -import java.util.Set; import javax.annotation.Nullable; import org.apache.commons.codec.binary.Base64; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.KylinConfig; @@ -43,6 +35,9 @@ import org.apache.kylin.common.persistence.RootPersistentEntity; import org.apache.kylin.common.util.Array; import org.apache.kylin.common.util.CaseInsensitiveStringMap; import org.apache.kylin.common.util.JsonUtil; +import org.apache.kylin.cube.model.validation.IValidatorRule; +import org.apache.kylin.cube.model.validation.ResultLevel; +import org.apache.kylin.cube.model.validation.ValidateContext; import org.apache.kylin.measure.MeasureType; import org.apache.kylin.measure.extendedcolumn.ExtendedColumnMeasureType; import org.apache.kylin.metadata.MetadataConstants; @@ -65,12 +60,15 @@ import com.google.common.base.Function; import com.google.common.collect.Collections2; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** */ @SuppressWarnings("serial") @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) public class CubeDesc extends RootPersistentEntity { + private static final Logger logger = LoggerFactory.getLogger(CubeDesc.class); public static class CannotFilterExtendedColumnException extends RuntimeException { public CannotFilterExtendedColumnException(TblColRef tblColRef) { @@ -479,6 +477,9 @@ public class CubeDesc extends RootPersistentEntity { this.addError("The cubeDesc '" + this.getName() + "' doesn't have data model specified."); } + // check if aggregation group is valid + validate(this); + this.model = MetadataManager.getInstance(config).getDataModelDesc(this.modelName); if (this.model == null) { @@ -510,6 +511,146 @@ public class CubeDesc extends RootPersistentEntity { } } + public void validate(CubeDesc cube) { + inner(cube); + } + + private void inner(CubeDesc cube) { + int maxSize = getMaxAgrGroupSize(); + int index = 0; + + for (AggregationGroup agg : cube.getAggregationGroups()) { + if (agg.getIncludes() == null) { + logger.error("Aggregation group " + index + " includes field not set"); + throw new IllegalStateException("Aggregation group " + index + " includes field not set"); + } + + if (agg.getSelectRule() == null) { + logger.error("Aggregation group " + index + " includes field not set"); + throw new IllegalStateException("Aggregation group " + index + " select rule field not set"); + } + + Set<String> includeDims = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + getDims(includeDims, agg.getIncludes()); + + Set<String> mandatoryDims = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + getDims(mandatoryDims, agg.getSelectRule().mandatory_dims); + + ArrayList<Set<String>> hierarchyDimsList = new ArrayList (); + Set<String> hierarchyDims = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + getDims(hierarchyDimsList, hierarchyDims, agg.getSelectRule().hierarchy_dims); + + ArrayList<Set<String>> jointDimsList = new ArrayList (); + Set<String> jointDims = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + getDims(jointDimsList, jointDims, agg.getSelectRule().joint_dims); + + if (!includeDims.containsAll(mandatoryDims) || !containsAll(includeDims, hierarchyDimsList) || !containsAll(includeDims, jointDimsList)) { + logger.error("Aggregation group " + index + " Include dims not containing all the used dims"); + throw new IllegalStateException("Aggregation group " + index + " Include dims not containing all the used dims"); + } + + Set<String> normalDims = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + normalDims.addAll(includeDims); + normalDims.removeAll(mandatoryDims); + normalDims.removeAll(hierarchyDims); + normalDims.removeAll(jointDims); + + int normalDimSize = normalDims.size(); + int hierarchyDimSize = hierarchyDimsList.size(); + int jointDimSize = jointDimsList.size(); + + if (normalDimSize + hierarchyDimSize + jointDimSize > maxSize) { + logger.error("Aggregation group " + index + " has too many dimensions"); + throw new IllegalStateException("Aggregation group " + index + " has too many dimensions"); + } + + if (CollectionUtils.containsAny(mandatoryDims, hierarchyDims)) { + logger.warn("Aggregation group " + index + " mandatory dims overlap with hierarchy dims"); + } + if (CollectionUtils.containsAny(mandatoryDims, jointDims)) { + logger.warn("Aggregation group " + index + " mandatory dims overlap with joint dims"); + } + if (CollectionUtils.containsAny(hierarchyDims, jointDims)) { + logger.error("Aggregation group " + index + " hierarchy dims overlap with joint dims"); + throw new IllegalStateException("Aggregation group " + index + " hierarchy dims overlap with joint dims"); + } + if (hasSingle(hierarchyDimsList)) { + logger.error("Aggregation group " + index + " require at least 2 dims in a hierarchy"); + throw new IllegalStateException("Aggregation group " + index + " require at least 2 dims in a hierarchy"); + } + if (hasSingle(jointDimsList)) { + logger.error("Aggregation group " + index + " require at least 2 dims in a joint"); + throw new IllegalStateException("Aggregation group " + index + " require at least 2 dims in a joint"); + } + if (hasOverlap(hierarchyDimsList, hierarchyDims)) { + logger.error("Aggregation group " + index + " a dim exist in more than one hierarchy"); + throw new IllegalStateException("Aggregation group " + index + " a dim exist in more than one hierarchy"); + } + if (hasOverlap(jointDimsList, jointDims)) { + logger.error("Aggregation group " + index + " a dim exist in more than one joint"); + throw new IllegalStateException("Aggregation group " + index + " a dim exist in more than one joint"); + } + + index++; + } + } + + private void getDims (Set<String> dims, String [] stringSet) { + if (stringSet != null) { + for (String str : stringSet) { + dims.add(str); + } + } + } + + private void getDims (ArrayList<Set<String>> dimsList, Set<String> dims, String [][] stringSets) { + Set<String> temp = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + if (stringSets != null) { + for (String[] ss : stringSets) { + temp.clear(); + for (String s : ss) { + temp.add(s); + dims.add(s); + } + dimsList.add(temp); + } + } + } + + private boolean containsAll(Set<String> includeDims, ArrayList<Set<String>> dimsList) { + boolean b = true; + for (Set<String> dims : dimsList) { + if (!includeDims.containsAll(dims)) + b = false; + } + return b; + } + + private boolean hasSingle (ArrayList<Set<String>> dimsList) { + boolean hasSingle = false; + for (Set<String> dims : dimsList) { + if (dims.size() < 2) + hasSingle = true; + } + return hasSingle; + } + + private boolean hasOverlap (ArrayList<Set<String>> dimsList, Set<String> Dims) { + boolean hasOverlap = false; + int dimSize = 0; + for (Set<String> dims : dimsList) { + dimSize += dims.size(); + } + if (dimSize != Dims.size()) + hasOverlap = true; + return hasOverlap; + } + + protected int getMaxAgrGroupSize() { + String size = KylinConfig.getInstanceFromEnv().getOptional(IValidatorRule.KEY_MAX_AGR_GROUP_SIZE, String.valueOf(IValidatorRule.DEFAULT_MAX_AGR_GROUP_SIZE)); + return Integer.parseInt(size); + } + private void initDimensionColumns() { for (DimensionDesc dim : dimensions) { JoinDesc join = dim.getJoin(); http://git-wip-us.apache.org/repos/asf/kylin/blob/64800a10/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java index 13fcd4a..512f99f 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java @@ -18,24 +18,28 @@ package org.apache.kylin.cube; -import java.util.HashMap; -import java.util.Map; - +import com.google.common.collect.Maps; import org.apache.kylin.common.util.JsonUtil; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.model.CubeDesc; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.apache.kylin.cube.model.SelectRule; +import org.apache.kylin.metadata.MetadataManager; +import org.junit.*; +import org.junit.rules.ExpectedException; -import com.google.common.collect.Maps; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; /** * @author yangli9 */ public class CubeDescTest extends LocalFileMetadataTestCase { + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Before public void setUp() throws Exception { this.createTestMetadata(); @@ -47,6 +51,158 @@ public class CubeDescTest extends LocalFileMetadataTestCase { } @Test + public void testGoodInit() throws Exception { + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit1() throws Exception { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("Aggregation group 0 includes field not set"); + + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + String[] temp = null; + cubeDesc.getAggregationGroups().get(0).setIncludes(temp); + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit2() throws Exception { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("Aggregation group 0 select rule field not set"); + + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + SelectRule temp = null; + cubeDesc.getAggregationGroups().get(0).setSelectRule(temp); + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit3() throws Exception { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("Aggregation group 0 Include dims not containing all the used dims"); + + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + String[] temp = Arrays.asList(cubeDesc.getAggregationGroups().get(0).getIncludes()).subList(0, 3).toArray(new String[3]); + cubeDesc.getAggregationGroups().get(0).setIncludes(temp); + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit4() throws Exception { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("Aggregation group 0 has too many dimensions"); + + CubeDesc desc = new CubeDesc() { + @Override + protected int getMaxAgrGroupSize() { + return 3; + } + }; + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + desc.validate(cubeDesc); + } + + @Test + public void testBadInit5() throws Exception { + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[] { + "seller_id", "META_CATEG_NAME"}; + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit6() throws Exception { + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[] { + "seller_id", "lstg_format_name"}; + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit7() throws Exception { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("Aggregation group 0 require at least 2 dims in a joint"); + + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { + new String[] { "lstg_format_name" } }; + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit8() throws Exception { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("Aggregation group 0 hierarchy dims overlap with joint dims"); + + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { + new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME" } }; + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit9() throws Exception { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("Aggregation group 0 hierarchy dims overlap with joint dims"); + + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { + new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME" }, + new String[] { "lstg_format_name", "lstg_site_id" } }; + cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { + new String[] { "META_CATEG_NAME", "lstg_format_name" } }; + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit10() throws Exception { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("Aggregation group 0 a dim exist in more than one joint"); + + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { + new String[] { "lstg_format_name", "lstg_site_id", "slr_segment_cd" }, + new String[] { "lstg_format_name", "lstg_site_id", "leaf_categ_id"} }; + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit11() throws Exception { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("Aggregation group 0 require at least 2 dims in a hierarchy"); + + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { + new String[] { "META_CATEG_NAME" } }; + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test + public void testBadInit12() throws Exception { + thrown.expect(IllegalStateException.class); + thrown.expectMessage("Aggregation group 0 a dim exist in more than one hierarchy"); + + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); + cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { + new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME" }, + new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME" } }; + + cubeDesc.init(getTestConfig(), MetadataManager.getInstance(getTestConfig()).getAllTablesMap()); + } + + @Test public void testSerialize() throws Exception { CubeDesc desc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); String str = JsonUtil.writeValueAsIndentString(desc);
