This is an automated email from the ASF dual-hosted git repository. nic pushed a commit to branch 3.0.x in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 4300fa51a048a4033c54fa775e4e7dff58413c07 Author: zhangbushi5 <841643...@qq.com> AuthorDate: Fri Dec 6 17:04:22 2019 +0800 KYLIN-4280 SegmenPruner support 'OR' and 'NOT' --- .../apache/kylin/cube/common/SegmentPruner.java | 48 ++----- .../apache/kylin/cube/common/TupleFilterNode.java | 76 +++++++++++ .../kylin/cube/common/SegmentPrunerTest.java | 151 +++++++++++++++++++++ 3 files changed, 241 insertions(+), 34 deletions(-) diff --git a/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java b/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java index f3f2052..ae21a4d 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java @@ -19,9 +19,7 @@ package org.apache.kylin.cube.common; import java.util.ArrayList; -import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.Set; import org.apache.commons.lang.StringUtils; @@ -45,11 +43,11 @@ import org.slf4j.LoggerFactory; public class SegmentPruner { private static final Logger logger = LoggerFactory.getLogger(SegmentPruner.class); - final private Set<CompareTupleFilter> mustTrueCompares; + private TupleFilterNode node; public SegmentPruner(TupleFilter filter) { - this.mustTrueCompares = filter == null ? Collections.<CompareTupleFilter> emptySet() - : filter.findMustTrueCompareFilters(); + this.node = new TupleFilterNode(filter); + } public List<CubeSegment> listSegmentsForQuery(CubeInstance cube) { @@ -62,7 +60,7 @@ public class SegmentPruner { } public boolean check(CubeSegment seg) { - + if (seg.getInputRecords() == 0) { if (seg.getConfig().isSkippingEmptySegments()) { logger.debug("Prune segment {} due to 0 input record", seg); @@ -72,53 +70,35 @@ public class SegmentPruner { } } - Map<String, DimensionRangeInfo> segDimRangInfoMap = seg.getDimensionRangeInfoMap(); - for (CompareTupleFilter comp : mustTrueCompares) { - TblColRef col = comp.getColumn(); - - if (!col.getType().needCompare()) { - continue; - } - - DimensionRangeInfo dimRangeInfo = segDimRangInfoMap.get(col.getIdentity()); - if (dimRangeInfo == null) - dimRangeInfo = tryDeduceRangeFromPartitionCol(seg, col); - if (dimRangeInfo == null) - continue; - - String minVal = dimRangeInfo.getMin(); - String maxVal = dimRangeInfo.getMax(); - - if (!satisfy(comp, minVal, maxVal)) { - logger.debug("Prune segment {} due to given filter", seg); - return false; - } + if (!node.checkSeg(seg)) { + logger.debug("Prune segment {} due to given filter", seg); + return false; } logger.debug("Pruner passed on segment {}", seg); return true; } - private DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) { + public static DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) { DataModelDesc model = seg.getModel(); PartitionDesc part = model.getPartitionDesc(); - + if (!part.isPartitioned()) return null; if (!col.equals(part.getPartitionDateColumnRef())) return null; - + // deduce the dim range from TSRange TSRange tsRange = seg.getTSRange(); if (tsRange.start.isMin || tsRange.end.isMax) return null; // DimensionRangeInfo cannot express infinite - + String min = tsRangeToStr(tsRange.start.v, part); String max = tsRangeToStr(tsRange.end.v - 1, part); // note the -1, end side is exclusive return new DimensionRangeInfo(min, max); } - private String tsRangeToStr(long ts, PartitionDesc part) { + private static String tsRangeToStr(long ts, PartitionDesc part) { String value; DataType partitionColType = part.getPartitionDateColumnRef().getType(); if (partitionColType.isDate()) { @@ -138,7 +118,7 @@ public class SegmentPruner { return value; } - private boolean satisfy(CompareTupleFilter comp, String minVal, String maxVal) { + public static boolean satisfy(CompareTupleFilter comp, String minVal, String maxVal) { // When both min and max are null, it means all cells of the column are null. // In such case, return true to let query engine scan the segment, since the @@ -177,7 +157,7 @@ public class SegmentPruner { } } - private String toString(Object v) { + private static String toString(Object v) { return v == null ? null : v.toString(); } } diff --git a/core-cube/src/main/java/org/apache/kylin/cube/common/TupleFilterNode.java b/core-cube/src/main/java/org/apache/kylin/cube/common/TupleFilterNode.java new file mode 100644 index 0000000..8a84f67 --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/common/TupleFilterNode.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.common; + +import org.apache.kylin.cube.CubeSegment; +import org.apache.kylin.cube.DimensionRangeInfo; +import org.apache.kylin.metadata.filter.CompareTupleFilter; +import org.apache.kylin.metadata.filter.LogicalTupleFilter; +import org.apache.kylin.metadata.filter.TupleFilter; +import org.apache.kylin.metadata.model.TblColRef; + +public class TupleFilterNode { + private TupleFilter filter; + + public TupleFilterNode(TupleFilter filter) { + this.filter = filter; + } + + public boolean checkSeg(CubeSegment seg) { + if (filter == null) + return true; + if (filter instanceof CompareTupleFilter) { + CompareTupleFilter compareTupleFilter = (CompareTupleFilter) filter; + TblColRef col = compareTupleFilter.getColumn(); + if (col == null) { + return true; + } + DimensionRangeInfo dimRangeInfo = seg.getDimensionRangeInfoMap().get(col.getIdentity()); + if (dimRangeInfo == null) + dimRangeInfo = SegmentPruner.tryDeduceRangeFromPartitionCol(seg, col); + if (dimRangeInfo == null) + return true; + String minVal = dimRangeInfo.getMin(); + String maxVal = dimRangeInfo.getMax(); + return SegmentPruner.satisfy(compareTupleFilter, minVal, maxVal); + } + + if (filter instanceof LogicalTupleFilter) { + if (filter.getOperator() == TupleFilter.FilterOperatorEnum.AND) { + for (TupleFilter filter : filter.getChildren()) { + if (!new TupleFilterNode(filter).checkSeg(seg)) + return false; + } + return true; + + } else if (filter.getOperator() == TupleFilter.FilterOperatorEnum.OR) { + for (TupleFilter filter : filter.getChildren()) { + if (new TupleFilterNode(filter).checkSeg(seg)) + return true; + } + return false; + } else if (filter.getOperator() == TupleFilter.FilterOperatorEnum.NOT) { + for (TupleFilter filter : filter.getChildren()) { + return !(new TupleFilterNode(filter).checkSeg(seg)); + } + } + } + return true; + } +} diff --git a/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java b/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java index 603db97..69f9b10 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/common/SegmentPrunerTest.java @@ -18,15 +18,21 @@ package org.apache.kylin.cube.common; +import static org.apache.kylin.metadata.filter.TupleFilter.and; import static org.apache.kylin.metadata.filter.TupleFilter.compare; +import static org.apache.kylin.metadata.filter.TupleFilter.or; +import com.google.common.collect.Maps; import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.DateFormat; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.common.util.SetAndUnsetSystemProp; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.CubeSegment; +import org.apache.kylin.cube.DimensionRangeInfo; import org.apache.kylin.metadata.filter.ConstantTupleFilter; +import org.apache.kylin.metadata.filter.LogicalTupleFilter; import org.apache.kylin.metadata.filter.TupleFilter; import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum; import org.apache.kylin.metadata.model.SegmentRange.TSRange; @@ -39,6 +45,8 @@ import org.junit.Test; import com.google.common.collect.Sets; +import java.util.Map; + public class SegmentPrunerTest extends LocalFileMetadataTestCase { private CubeInstance cube; @@ -192,4 +200,147 @@ public class SegmentPrunerTest extends LocalFileMetadataTestCase { } } } + + @Test + public void testLegacyCubeSegWithOrFilter() { + // legacy cube segments does not have DimensionRangeInfo, but with TSRange can do some pruning + CubeInstance cube = CubeManager.getInstance(getTestConfig()) + .getCube("test_kylin_cube_without_slr_left_join_ready_2_segments"); + + TblColRef col = cube.getModel().findColumn("TEST_KYLIN_FACT.CAL_DT"); + TblColRef col2 = cube.getModel().findColumn("TEST_KYLIN_FACT.LSTG_SITE_ID"); + CubeSegment seg = cube.getSegments(SegmentStatusEnum.READY).get(0); + Map<String, DimensionRangeInfo> dimensionRangeInfoMap = Maps.newHashMap(); + dimensionRangeInfoMap.put("TEST_KYLIN_FACT.LSTG_SITE_ID", new DimensionRangeInfo("10", "20")); + seg.setDimensionRangeInfoMap(dimensionRangeInfoMap); + TSRange tsRange = seg.getTSRange(); + String start = DateFormat.formatToTimeStr(tsRange.start.v, "yyyy-MM-dd"); + + CubeSegment seg2 = cube.getSegments(SegmentStatusEnum.READY).get(1); + Map<String, DimensionRangeInfo> dimensionRangeInfoMap2 = Maps.newHashMap(); + dimensionRangeInfoMap2.put("TEST_KYLIN_FACT.LSTG_SITE_ID", new DimensionRangeInfo("20", "30")); + seg2.setDimensionRangeInfoMap(dimensionRangeInfoMap2); + TSRange tsRange2 = seg2.getTSRange(); + String start2 = DateFormat.formatToTimeStr(tsRange2.start.v, "yyyy-MM-dd"); + + try (SetAndUnsetSystemProp sns = new SetAndUnsetSystemProp("kylin.query.skip-empty-segments", "false")) { + { + TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start); + TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15"); + LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2); + + TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2); + TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.EQ, "25"); + LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4); + + LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2); + SegmentPruner segmentPruner = new SegmentPruner(finalFilter); + Assert.assertTrue(segmentPruner.check(seg)); + Assert.assertTrue(segmentPruner.check(seg2)); + } + + { + TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start); + TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15"); + LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2); + + TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2); + TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.EQ, "35"); + LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4); + + LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2); + SegmentPruner segmentPruner = new SegmentPruner(finalFilter); + Assert.assertTrue(segmentPruner.check(seg)); + Assert.assertFalse(segmentPruner.check(seg2)); + } + + { + TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start); + TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15"); + LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2); + + TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2); + TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.EQ, "35"); + LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4); + + LogicalTupleFilter finalFilter = and(logicalAndFilter, logicalAndFilter2); + SegmentPruner segmentPruner = new SegmentPruner(finalFilter); + Assert.assertFalse(segmentPruner.check(seg)); + Assert.assertFalse(segmentPruner.check(seg2)); + } + + { + TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start); + TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "15"); + LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2); + + TupleFilter andFilter3 = compare(col, FilterOperatorEnum.LT, start2); + TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35"); + LogicalTupleFilter logicalAndFilter2 = and(andFilter3, andFilter4); + + LogicalTupleFilter finalFilter = and(logicalAndFilter, logicalAndFilter2); + SegmentPruner segmentPruner = new SegmentPruner(finalFilter); + Assert.assertTrue(segmentPruner.check(seg)); + Assert.assertFalse(segmentPruner.check(seg2)); + } + + { + TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start); + TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "35"); + LogicalTupleFilter logicalAndFilter = or(andFilter1, andFilter2); + + TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2); + TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35"); + LogicalTupleFilter logicalAndFilter2 = or(andFilter3, andFilter4); + + LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2); + SegmentPruner segmentPruner = new SegmentPruner(finalFilter); + Assert.assertTrue(segmentPruner.check(seg)); + Assert.assertTrue(segmentPruner.check(seg2)); + } + + { + TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start); + TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.IN, "15"); + LogicalTupleFilter logicalAndFilter = and(andFilter1, andFilter2); + + TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2); + TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35"); + LogicalTupleFilter logicalAndFilter2 = or(andFilter3, andFilter4); + + LogicalTupleFilter finalFilter = or(logicalAndFilter, logicalAndFilter2); + SegmentPruner segmentPruner = new SegmentPruner(finalFilter); + Assert.assertTrue(segmentPruner.check(seg)); + Assert.assertTrue(segmentPruner.check(seg2)); + } + + { + TupleFilter andFilter1 = compare(col, FilterOperatorEnum.EQ, start); + TupleFilter andFilter2 = compare(col2, FilterOperatorEnum.EQ, "35"); + LogicalTupleFilter logicalAndFilter = or(andFilter1, andFilter2); + + TupleFilter andFilter3 = compare(col, FilterOperatorEnum.EQ, start2); + TupleFilter andFilter4 = compare(col2, FilterOperatorEnum.LT, "35"); + LogicalTupleFilter logicalAndFilter2 = or(andFilter3, andFilter4); + + LogicalTupleFilter finalFilter1 = or(logicalAndFilter, logicalAndFilter2); + + TupleFilter andFilter5 = compare(col, FilterOperatorEnum.EQ, start); + TupleFilter andFilter6 = compare(col2, FilterOperatorEnum.IN, "15"); + LogicalTupleFilter logicalAndFilter3 = and(andFilter5, andFilter6); + + TupleFilter andFilter7 = compare(col, FilterOperatorEnum.EQ, start2); + TupleFilter andFilter8 = compare(col2, FilterOperatorEnum.LT, "35"); + LogicalTupleFilter logicalAndFilter4 = or(andFilter7, andFilter8); + + LogicalTupleFilter finalFilter2 = or(logicalAndFilter3, logicalAndFilter4); + + LogicalTupleFilter finalFinalFilter = or(finalFilter1, finalFilter2); + + SegmentPruner segmentPruner = new SegmentPruner(finalFinalFilter); + Assert.assertTrue(segmentPruner.check(seg)); + Assert.assertTrue(segmentPruner.check(seg2)); + } + } + } }