This is an automated email from the ASF dual-hosted git repository. shaofengshi pushed a commit to branch 2.6.x in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/2.6.x by this push: new ae0f31e KYLIN-4314 extend intersect_count and add intersect_value UDAF ae0f31e is described below commit ae0f31e16eda8d1ebc188e8935f603080b77a54d Author: shaofengshi <shaofeng...@apache.org> AuthorDate: Mon Jul 29 08:58:25 2019 +0800 KYLIN-4314 extend intersect_count and add intersect_value UDAF --- .../org/apache/kylin/common/KylinConfigBase.java | 4 + .../BitmapIntersectDistinctCountAggFunc.java | 48 +-------- ...gFunc.java => BitmapIntersectValueAggFunc.java} | 56 +--------- .../kylin/measure/bitmap/BitmapMeasureType.java | 7 +- .../measure/bitmap/RetentionPartialResult.java | 120 +++++++++++++++++++++ .../storage/gtrecord/GTCubeStorageQueryBase.java | 5 +- .../query/sql_intersect_count/query04.sql | 33 ++++++ .../query/sql_intersect_count/query05.sql | 23 ++++ 8 files changed, 194 insertions(+), 102 deletions(-) diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 92f3cb5..3e6bd5f 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -1974,4 +1974,8 @@ abstract public class KylinConfigBase implements Serializable { public boolean isLimitPushDownEnabled() { return Boolean.parseBoolean(getOptional("kylin.storage.limit-push-down-enabled", TRUE)); } + + public String getIntersectFilterOrSeparator() { + return getOptional("kylin.query.intersect.separator", "|"); + } } diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java index 9771352..f1968e4 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java @@ -19,9 +19,7 @@ package org.apache.kylin.measure.bitmap; import org.apache.kylin.measure.ParamAsMeasureCount; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; /** * BitmapIntersectDistinctCountAggFunc is an UDAF used for calculating the intersection of two or more bitmaps @@ -30,56 +28,12 @@ import java.util.Map; * requires an bitmap count distinct measure of uuid, and an dimension of event */ public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount { - private static final BitmapCounterFactory factory = RoaringBitmapCounterFactory.INSTANCE; @Override public int getParamAsMeasureCount() { return -2; } - public static class RetentionPartialResult { - Map<Object, BitmapCounter> map; - List keyList; - - public RetentionPartialResult() { - map = new LinkedHashMap<>(); - } - - public void add(Object key, List keyList, Object value) { - if (this.keyList == null) { - this.keyList = keyList; - } - if (this.keyList != null && this.keyList.contains(key)) { - BitmapCounter counter = map.computeIfAbsent(key, o -> factory.newBitmap()); - - counter.orWith((BitmapCounter) value); - } - } - - public long result() { - if (keyList == null || keyList.isEmpty()) { - return 0; - } - // if any specified key not in map, the intersection must be 0 - for (Object key : keyList) { - if (!map.containsKey(key)) { - return 0; - } - } - BitmapCounter counter = null; - for (Object key : keyList) { - BitmapCounter c = map.get(key); - if (counter == null) { - counter = factory.newBitmap(); - counter.orWith(c); - } else { - counter.andWith(c); - } - } - return counter != null ? counter.getCount() : 0; - } - } - public static RetentionPartialResult init() { return new RetentionPartialResult(); } @@ -94,7 +48,7 @@ public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount } public static long result(RetentionPartialResult result) { - return result.result(); + return result.countResult(); } } diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectValueAggFunc.java similarity index 52% copy from core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java copy to core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectValueAggFunc.java index 9771352..7ec21b5 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectDistinctCountAggFunc.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapIntersectValueAggFunc.java @@ -17,11 +17,9 @@ */ package org.apache.kylin.measure.bitmap; -import org.apache.kylin.measure.ParamAsMeasureCount; - -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; + +import org.apache.kylin.measure.ParamAsMeasureCount; /** * BitmapIntersectDistinctCountAggFunc is an UDAF used for calculating the intersection of two or more bitmaps @@ -29,57 +27,13 @@ import java.util.Map; * Example: intersect_count(uuid, event, array['A', 'B', 'C']), meaning find the count of uuid in all A/B/C 3 bitmaps * requires an bitmap count distinct measure of uuid, and an dimension of event */ -public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount { - private static final BitmapCounterFactory factory = RoaringBitmapCounterFactory.INSTANCE; +public class BitmapIntersectValueAggFunc implements ParamAsMeasureCount { @Override public int getParamAsMeasureCount() { return -2; } - public static class RetentionPartialResult { - Map<Object, BitmapCounter> map; - List keyList; - - public RetentionPartialResult() { - map = new LinkedHashMap<>(); - } - - public void add(Object key, List keyList, Object value) { - if (this.keyList == null) { - this.keyList = keyList; - } - if (this.keyList != null && this.keyList.contains(key)) { - BitmapCounter counter = map.computeIfAbsent(key, o -> factory.newBitmap()); - - counter.orWith((BitmapCounter) value); - } - } - - public long result() { - if (keyList == null || keyList.isEmpty()) { - return 0; - } - // if any specified key not in map, the intersection must be 0 - for (Object key : keyList) { - if (!map.containsKey(key)) { - return 0; - } - } - BitmapCounter counter = null; - for (Object key : keyList) { - BitmapCounter c = map.get(key); - if (counter == null) { - counter = factory.newBitmap(); - counter.orWith(c); - } else { - counter.andWith(c); - } - } - return counter != null ? counter.getCount() : 0; - } - } - public static RetentionPartialResult init() { return new RetentionPartialResult(); } @@ -93,8 +47,8 @@ public class BitmapIntersectDistinctCountAggFunc implements ParamAsMeasureCount return add(result, value, key, keyList); } - public static long result(RetentionPartialResult result) { - return result.result(); + public static String result(RetentionPartialResult result) { + return result.valueResult(); } } diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java index f724257..9b36bef 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java @@ -46,6 +46,7 @@ import com.google.common.collect.ImmutableMap; public class BitmapMeasureType extends MeasureType<BitmapCounter> { public static final String FUNC_COUNT_DISTINCT = FunctionDesc.FUNC_COUNT_DISTINCT; public static final String FUNC_INTERSECT_COUNT_DISTINCT = "INTERSECT_COUNT"; + public static final String FUNC_INTERSECT_VALUE = "INTERSECT_VALUE"; public static final String DATATYPE_BITMAP = "bitmap"; public static class Factory extends MeasureTypeFactory<BitmapCounter> { @@ -164,7 +165,8 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> { static final Map<String, Class<?>> UDAF_MAP = ImmutableMap.of( FUNC_COUNT_DISTINCT, BitmapDistinctCountAggFunc.class, - FUNC_INTERSECT_COUNT_DISTINCT, BitmapIntersectDistinctCountAggFunc.class); + FUNC_INTERSECT_COUNT_DISTINCT, BitmapIntersectDistinctCountAggFunc.class, + FUNC_INTERSECT_VALUE, BitmapIntersectValueAggFunc.class); @Override public Map<String, Class<?>> getRewriteCalciteAggrFunctions() { @@ -174,7 +176,8 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> { @Override public void adjustSqlDigest(List<MeasureDesc> measureDescs, SQLDigest sqlDigest) { for (SQLCall call : sqlDigest.aggrSqlCalls) { - if (FUNC_INTERSECT_COUNT_DISTINCT.equals(call.function)) { + if (FUNC_INTERSECT_COUNT_DISTINCT.equals(call.function) + || FUNC_INTERSECT_VALUE.equals(call.function)) { TblColRef col = (TblColRef) call.args.get(1); if (!sqlDigest.groupbyColumns.contains(col)) sqlDigest.groupbyColumns.add(col); diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/RetentionPartialResult.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/RetentionPartialResult.java new file mode 100644 index 0000000..6366b3c --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/RetentionPartialResult.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kylin.measure.bitmap; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import org.apache.commons.lang3.StringUtils; +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.StringUtil; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +public class RetentionPartialResult { + + private static final BitmapCounterFactory factory = RoaringBitmapCounterFactory.INSTANCE; + public static final String FILTER_DELIMETER = KylinConfig.getInstanceFromEnv().getIntersectFilterOrSeparator(); + Map<String, BitmapCounter> map; + List<String> keyList; + Map<String, List<String>> childKeyToParentKey; + + public RetentionPartialResult() { + map = new LinkedHashMap<>(); + } + + public void add(Object key, List keyList, Object value) { + Preconditions.checkArgument(key != null); + Preconditions.checkArgument(keyList != null && keyList.size() >= 0); + if (this.keyList == null) { + this.keyList = Lists.transform(keyList, i -> i.toString()); + childKeyToParentKey = new HashMap<>(5); + + for (String sKey : this.keyList) { + String[] elements = StringUtil.splitAndTrim(sKey, FILTER_DELIMETER); + for (String s : elements) { + if (s != null && s.trim().length() > 0) { + List<String> parent = childKeyToParentKey.computeIfAbsent(s.trim(), o -> new ArrayList()); + parent.add(sKey); + } + } + } + + } + + if (this.keyList != null) { + if (this.keyList.contains(key.toString())) { + BitmapCounter counter = map.computeIfAbsent(key.toString(), o -> factory.newBitmap()); + counter.orWith((BitmapCounter) value); + } + + if (childKeyToParentKey.size() > 0) { + String sKey = key.toString(); + if (childKeyToParentKey.containsKey(sKey)) { + List<String> parents = childKeyToParentKey.get(sKey); + for (String parent : parents) { + BitmapCounter counter = map.computeIfAbsent(parent, o -> factory.newBitmap()); + counter.orWith((BitmapCounter) value); + } + } + } + } + } + + private BitmapCounter result() { + if (keyList == null || keyList.isEmpty()) { + return null; + } + // if any specified key not in map, the intersection must be 0 + for (String key : keyList) { + if (!map.containsKey(key)) { + return null; + } + } + BitmapCounter counter = null; + for (String key : keyList) { + BitmapCounter c = map.get(key); + if (counter == null) { + counter = factory.newBitmap(); + counter.orWith(c); + } else { + counter.andWith(c); + } + } + + return counter; + } + + public String valueResult() { + BitmapCounter counter = result(); + String result = ""; + if (counter != null && counter.getCount() > 0) { + result = "[" + StringUtils.join(counter.iterator(), ",") + "]"; + } + return result; + } + + public long countResult() { + BitmapCounter counter = result(); + return counter != null ? counter.getCount() : 0; + } + +} \ No newline at end of file diff --git a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java index 61d5b7f..59eda73 100644 --- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java +++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java @@ -598,8 +598,9 @@ public abstract class GTCubeStorageQueryBase implements IStorageQuery { } } for (SQLDigest.SQLCall aggrSQLCall : aggrSQLCalls) { - if (aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT)) { - logger.info("exactAggregation is false because has INTERSECT_COUNT"); + if (aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT) + || aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_VALUE)) { + logger.info("exactAggregation is false because has INTERSECT_COUNT OR INTERSECT_VALUE"); return false; } } diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql new file mode 100644 index 0000000..e5de89b --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql @@ -0,0 +1,33 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select +week_beg_dt as week, +intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC']) as a, +intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['Auction']) as b, +intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['Others']) as c, +intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction']) as ab, +intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Others']) as ac, +intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC', 'Auction', 'Others']) as abc, +intersect_count( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC|Auction', 'Others']) as a_or_b_and_c, +count(distinct TEST_COUNT_DISTINCT_BITMAP) as sellers, +count(*) as cnt +from test_kylin_fact left join edw.test_cal_dt on test_kylin_fact.cal_dt = edw.test_cal_dt.CAL_DT +where week_beg_dt in (DATE '2013-12-22', DATE '2012-06-23') +group by week_beg_dt + diff --git a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql new file mode 100644 index 0000000..6de4eac --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql @@ -0,0 +1,23 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +select +intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01']) as first_day, +intersect_count(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01', '2012-01-02']) as first_and_second_day +from test_kylin_fact +where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03') +