KYLIN-2826 add basic support classes for cube planner algorithms

Signed-off-by: Li Yang <liy...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e2029dd9
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e2029dd9
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e2029dd9

Branch: refs/heads/pr70
Commit: e2029dd9a6de405d5ab32f05f0a92e9f228abef3
Parents: 2edee49
Author: Zhong <nju_y...@apache.org>
Authored: Tue Aug 22 14:57:29 2017 +0800
Committer: Li Yang <liy...@apache.org>
Committed: Sat Sep 23 18:16:48 2017 +0800

----------------------------------------------------------------------
 .../kylin/cube/cuboid/TreeCuboidScheduler.java  |    3 +-
 .../algorithm/AbstractRecommendAlgorithm.java   |   82 +
 .../cube/cuboid/algorithm/BPUSCalculator.java   |  165 +
 .../cube/cuboid/algorithm/BenefitPolicy.java    |   41 +
 .../cuboid/algorithm/CuboidBenefitModel.java    |  210 +
 .../algorithm/CuboidRecommendAlgorithm.java     |   49 +
 .../cube/cuboid/algorithm/CuboidStats.java      |  272 ++
 .../cube/cuboid/algorithm/CuboidStatsUtil.java  |  166 +
 .../cube/cuboid/algorithm/PBPUSCalculator.java  |   53 +
 .../cube/cuboid/algorithm/SPBPUSCalculator.java |   41 +
 .../cuboid/algorithm/AlgorithmTestBase.java     |  310 ++
 .../cuboid/algorithm/CuboidStatsUtilTest.java   |  161 +
 core-cube/src/test/resources/statistics.txt     | 4092 ++++++++++++++++++
 13 files changed, 5643 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java
index 8e3b4dc..903e358 100644
--- 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java
@@ -89,8 +89,7 @@ public class TreeCuboidScheduler extends CuboidScheduler {
             return createFromCuboids(allCuboidIds, 
Cuboid.cuboidSelectComparator);
         }
 
-        @VisibleForTesting
-        static CuboidTree createFromCuboids(List<Long> allCuboidIds, 
Comparator<Long> cuboidComparator) {
+        public static CuboidTree createFromCuboids(List<Long> allCuboidIds, 
Comparator<Long> cuboidComparator) {
             // sort the cuboid ids in descending order, so that don't need to 
adjust
             // the cuboid tree when adding cuboid id to the tree.
             Collections.sort(allCuboidIds, new Comparator<Long>() {

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/AbstractRecommendAlgorithm.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/AbstractRecommendAlgorithm.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/AbstractRecommendAlgorithm.java
new file mode 100755
index 0000000..b35c738
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/AbstractRecommendAlgorithm.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class AbstractRecommendAlgorithm implements 
CuboidRecommendAlgorithm {
+    private static final Logger logger = 
LoggerFactory.getLogger(AbstractRecommendAlgorithm.class);
+
+    private CuboidStats cuboidStats;
+    private BenefitPolicy benefitPolicy;
+
+    private AtomicBoolean cancelRequested = new AtomicBoolean(false);
+    private AtomicBoolean canceled = new AtomicBoolean(false);
+
+    private long timeoutMillis;
+
+    public AbstractRecommendAlgorithm(final long timeout, BenefitPolicy 
benefitPolicy, CuboidStats cuboidStats) {
+        if (timeout <= 0) {
+            this.timeoutMillis = Long.MAX_VALUE;
+        } else {
+            this.timeoutMillis = timeout;
+        }
+        this.cuboidStats = cuboidStats;
+        this.benefitPolicy = benefitPolicy;
+    }
+
+    @Override
+    public void cancel() {
+        cancelRequested.set(true);
+    }
+
+    /**
+     * Checks whether the algorithm has been canceled or timed out.
+     * 
+     */
+    protected boolean shouldCancel() {
+        if (canceled.get()) {
+            return true;
+        }
+        if (cancelRequested.get()) {
+            canceled.set(true);
+            cancelRequested.set(false);
+            logger.warn("Algorithm is canceled.");
+            return true;
+        }
+        final long currentTimeMillis = System.currentTimeMillis();
+        if (currentTimeMillis > timeoutMillis) {
+            canceled.set(true);
+            logger.warn("Algorithm exceeds time limit.");
+            return true;
+        }
+        return false;
+    }
+
+    public CuboidStats getCuboidStats() {
+        return cuboidStats;
+    }
+
+    public BenefitPolicy getBenefitPolicy() {
+        return benefitPolicy;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BPUSCalculator.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BPUSCalculator.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BPUSCalculator.java
new file mode 100755
index 0000000..6d0b654
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BPUSCalculator.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+/**
+ * Calculate the benefit based on Benefit Per Unit Space.
+ */
+public class BPUSCalculator implements BenefitPolicy {
+
+    private static Logger logger = 
LoggerFactory.getLogger(BPUSCalculator.class);
+
+    protected CuboidStats cuboidStats;
+    protected Map<Long, Long> cuboidAggCostMap;
+
+    public BPUSCalculator(CuboidStats cuboidStats) {
+        this.cuboidStats = cuboidStats;
+        this.cuboidAggCostMap = Maps.newHashMap();
+    }
+
+    @Override
+    public void initBeforeStart() {
+        cuboidAggCostMap.clear();
+        //Initialize stats for mandatory cuboids
+        for (Long cuboid : cuboidStats.getAllCuboidsForMandatory()) {
+            if (getCuboidCost(cuboid) != null) {
+                cuboidAggCostMap.put(cuboid, getCuboidCost(cuboid));
+            }
+        }
+        Set<Long> mandatoryCuboidSetWithStats = cuboidAggCostMap.keySet();
+        //Initialize stats for selection cuboids
+        long baseCuboidCost = getCuboidCost(cuboidStats.getBaseCuboid());
+        for (Long cuboid : cuboidStats.getAllCuboidsForSelection()) {
+            long leastCost = baseCuboidCost;
+            for (Long cuboidTarget : mandatoryCuboidSetWithStats) {
+                if ((cuboid | cuboidTarget) == cuboidTarget) {
+                    if (leastCost > cuboidAggCostMap.get(cuboidTarget)) {
+                        leastCost = cuboidAggCostMap.get(cuboidTarget);
+                    }
+                }
+            }
+            cuboidAggCostMap.put(cuboid, leastCost);
+        }
+    }
+
+    @Override
+    public CuboidBenefitModel.BenefitModel calculateBenefit(long cuboid, 
Set<Long> selected) {
+        double totalCostSaving = 0;
+        int benefitCount = 0;
+        for (Long descendant : cuboidStats.getAllDescendants(cuboid)) {
+            if (!selected.contains(descendant)) {
+                double costSaving = getCostSaving(descendant, cuboid);
+                if (costSaving > 0) {
+                    totalCostSaving += costSaving;
+                    benefitCount++;
+                }
+            }
+        }
+
+        double spaceCost = calculateSpaceCost(cuboid);
+        double benefitPerUnitSpace = totalCostSaving / spaceCost;
+        return new CuboidBenefitModel.BenefitModel(benefitPerUnitSpace, 
benefitCount);
+    }
+
+    @Override
+    public CuboidBenefitModel.BenefitModel calculateBenefitTotal(List<Long> 
cuboidsToAdd, Set<Long> selected) {
+        Set<Long> selectedInner = Sets.newHashSet(selected);
+        Map<Long, Long> cuboidAggCostMapSnapshot = 
Maps.newHashMap(cuboidAggCostMap);
+        for (Long cuboid : cuboidsToAdd) {
+            selectedInner.add(cuboid);
+            propagateAggregationCost(cuboid, selectedInner);
+        }
+        double totalCostSaving = 0;
+        int benefitCount = 0;
+        for (Long cuboid : cuboidAggCostMap.keySet()) {
+            if (cuboidAggCostMap.get(cuboid) < 
cuboidAggCostMapSnapshot.get(cuboid)) {
+                totalCostSaving += cuboidAggCostMapSnapshot.get(cuboid) - 
cuboidAggCostMap.get(cuboid);
+                benefitCount++;
+            }
+        }
+        cuboidAggCostMap = cuboidAggCostMapSnapshot;
+
+        double benefitPerUnitSpace = totalCostSaving;
+        return new CuboidBenefitModel.BenefitModel(benefitPerUnitSpace, 
benefitCount);
+    }
+
+    protected double getCostSaving(long descendant, long cuboid) {
+        long cuboidCost = getCuboidCost(cuboid);
+        long descendantAggCost = getCuboidAggregationCost(descendant);
+        return descendantAggCost - cuboidCost;
+    }
+
+    protected Long getCuboidCost(long cuboid) {
+        return cuboidStats.getCuboidCount(cuboid);
+    }
+
+    private long getCuboidAggregationCost(long cuboid) {
+        return cuboidAggCostMap.get(cuboid);
+    }
+
+    @Override
+    public boolean ifEfficient(CuboidBenefitModel best) {
+        if (best.getBenefit() < getMinBenefitRatio()) {
+            logger.info(String.format("The recommended cuboid %s doesn't meet 
minimum benifit ratio %f", best,
+                    getMinBenefitRatio()));
+            return false;
+        }
+        return true;
+    }
+
+    public double getMinBenefitRatio() {
+        return 0.01;
+    }
+
+    @Override
+    public void propagateAggregationCost(long cuboid, Set<Long> selected) {
+        long aggregationCost = getCuboidCost(cuboid);
+        Set<Long> childrenCuboids = cuboidStats.getAllDescendants(cuboid);
+        for (Long child : childrenCuboids) {
+            if (!selected.contains(child) && (aggregationCost < 
getCuboidAggregationCost(child))) {
+                cuboidAggCostMap.put(child, aggregationCost);
+            }
+        }
+    }
+
+    /**
+     * Return the space cost of building a cuboid.
+     *
+     */
+    public double calculateSpaceCost(long cuboid) {
+        return cuboidStats.getCuboidCount(cuboid);
+    }
+
+    @Override
+    public BenefitPolicy getInstance() {
+        BPUSCalculator bpusCalculator = new BPUSCalculator(this.cuboidStats);
+        bpusCalculator.cuboidAggCostMap.putAll(this.cuboidAggCostMap);
+        return bpusCalculator;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BenefitPolicy.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BenefitPolicy.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BenefitPolicy.java
new file mode 100755
index 0000000..43bd3af
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BenefitPolicy.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+import java.util.List;
+import java.util.Set;
+
+/**
+* Calculate the benefit of building the cuboid, the already selected cuboids 
will affect the benefits of the given cuboid
+* 
+*/
+public interface BenefitPolicy {
+
+    public BenefitPolicy getInstance();
+
+    public void initBeforeStart();
+
+    public CuboidBenefitModel.BenefitModel calculateBenefit(long cuboid, 
Set<Long> selected);
+
+    public CuboidBenefitModel.BenefitModel calculateBenefitTotal(List<Long> 
cuboidsToAdd, Set<Long> selected);
+
+    public boolean ifEfficient(CuboidBenefitModel best);
+
+    public void propagateAggregationCost(long cuboid, Set<Long> selected);
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidBenefitModel.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidBenefitModel.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidBenefitModel.java
new file mode 100755
index 0000000..42f1ecf
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidBenefitModel.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+public class CuboidBenefitModel {
+    private CuboidModel cuboidModel;
+    private BenefitModel benefitModel;
+
+    public CuboidBenefitModel(CuboidModel cuboidModel, BenefitModel 
benefitModel) {
+        this.cuboidModel = cuboidModel;
+        this.benefitModel = benefitModel;
+    }
+
+    public void reset(CuboidModel cuboidModel, BenefitModel benefitModel) {
+        this.cuboidModel = cuboidModel;
+        this.benefitModel = benefitModel;
+    }
+
+    public Long getCuboidId() {
+        return cuboidModel == null ? null : cuboidModel.getCuboidId();
+    }
+
+    public Double getBenefit() {
+        return benefitModel == null ? null : benefitModel.getBenefit();
+    }
+
+    @Override
+    public String toString() {
+        return "CuboidBenefitModel [cuboidModel=" + cuboidModel + ", 
benefitModel=" + benefitModel + "]";
+    }
+
+    @Override
+    public int hashCode() {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + ((cuboidModel == null) ? 0 : 
cuboidModel.hashCode());
+        result = prime * result + ((benefitModel == null) ? 0 : 
benefitModel.hashCode());
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj)
+            return true;
+        if (obj == null)
+            return false;
+        if (getClass() != obj.getClass())
+            return false;
+        CuboidBenefitModel other = (CuboidBenefitModel) obj;
+        if (cuboidModel == null) {
+            if (other.cuboidModel != null)
+                return false;
+        } else if (!cuboidModel.equals(other.cuboidModel))
+            return false;
+        if (benefitModel == null) {
+            if (other.benefitModel != null)
+                return false;
+        } else if (!benefitModel.equals(other.benefitModel))
+            return false;
+        return true;
+    }
+
+    public static class CuboidModel {
+        private long cuboidId;
+
+        private long recordCount;
+        private double spaceSize;
+
+        private double hitProbability;
+        private long scanCount;
+
+        public CuboidModel(long cuboId, long recordCount, double spaceSize, 
double hitProbability, long scanCount) {
+            this.cuboidId = cuboId;
+            this.recordCount = recordCount;
+            this.spaceSize = spaceSize;
+            this.hitProbability = hitProbability;
+            this.scanCount = scanCount;
+        }
+
+        public long getCuboidId() {
+            return cuboidId;
+        }
+
+        public long getRecordCount() {
+            return recordCount;
+        }
+
+        public double getSpaceSize() {
+            return spaceSize;
+        }
+
+        public double getHitProbability() {
+            return hitProbability;
+        }
+
+        public long getScanCount() {
+            return scanCount;
+        }
+
+        @Override
+        public String toString() {
+            return "CuboidModel [cuboidId=" + cuboidId + ", recordCount=" + 
recordCount + ", spaceSize=" + spaceSize
+                    + ", hitProbability=" + hitProbability + ", scanCount=" + 
scanCount + "]";
+        }
+
+        @Override
+        public int hashCode() {
+            final int prime = 31;
+            int result = 1;
+            result = prime * result + (int) (cuboidId ^ (cuboidId >>> 32));
+            result = prime * result + (int) (recordCount ^ (recordCount >>> 
32));
+            long temp;
+            temp = Double.doubleToLongBits(spaceSize);
+            result = prime * result + (int) (temp ^ (temp >>> 32));
+            temp = Double.doubleToLongBits(hitProbability);
+            result = prime * result + (int) (temp ^ (temp >>> 32));
+            result = prime * result + (int) (scanCount ^ (scanCount >>> 32));
+            return result;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if (this == obj)
+                return true;
+            if (obj == null)
+                return false;
+            if (getClass() != obj.getClass())
+                return false;
+
+            CuboidModel other = (CuboidModel) obj;
+            if (cuboidId != other.cuboidId)
+                return false;
+            if (recordCount != other.recordCount)
+                return false;
+            if (Double.doubleToLongBits(spaceSize) != 
Double.doubleToLongBits(other.spaceSize))
+                return false;
+            if (hitProbability != other.hitProbability)
+                return false;
+            if (scanCount != other.scanCount)
+                return false;
+            return true;
+        }
+    }
+
+    public static class BenefitModel {
+        private double benefit;
+        private int benefitCount;
+
+        public BenefitModel(double benefit, int benefitCount) {
+            this.benefit = benefit;
+            this.benefitCount = benefitCount;
+        }
+
+        public double getBenefit() {
+            return benefit;
+        }
+
+        public int getBenefitCount() {
+            return benefitCount;
+        }
+
+        @Override
+        public String toString() {
+            return "BenefitModel [benefit=" + benefit + ", benefitCount=" + 
benefitCount + "]";
+        }
+
+        @Override
+        public int hashCode() {
+            final int prime = 31;
+            int result = 1;
+            long temp;
+            temp = Double.doubleToLongBits(benefit);
+            result = prime * result + (int) (temp ^ (temp >>> 32));
+            result = prime * result + benefitCount;
+            return result;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if (this == obj)
+                return true;
+            if (obj == null)
+                return false;
+            if (getClass() != obj.getClass())
+                return false;
+            BenefitModel other = (BenefitModel) obj;
+            if (Double.doubleToLongBits(benefit) != 
Double.doubleToLongBits(other.benefit))
+                return false;
+            if (benefitCount != other.benefitCount)
+                return false;
+            return true;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommendAlgorithm.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommendAlgorithm.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommendAlgorithm.java
new file mode 100755
index 0000000..b9f39ec
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommendAlgorithm.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+import java.util.List;
+
+/**
+ * Algorithm to calculate the cuboid benifit and recommend cost-effective 
cuboid list based on the cube statistics.
+ */
+public interface CuboidRecommendAlgorithm {
+
+    /**
+     * Return a list of recommended cuboids for the building segment based on 
expansionRate
+     *
+     */
+    List<Long> recommend(double expansionRate);
+
+    /**
+     * Start the Algorithm running
+     * 
+     * @param maxSpaceLimit
+     * @return
+     */
+    List<Long> start(double maxSpaceLimit);
+
+    /**
+     * Cancel the Algorithm running
+     *
+     *  Users can call this method from another thread to can the Algorithm 
and return the result earlier
+     *
+     */
+    void cancel();
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java
new file mode 100755
index 0000000..1775d5a
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+public class CuboidStats {
+    private static final Logger logger = 
LoggerFactory.getLogger(CuboidStats.class);
+
+    public static class Builder {
+
+        private static final long THRESHOLD_ROLL_UP_FOR_MANDATORY = 1000L;
+
+        // Required parameters
+        private String key;
+        private Long baseCuboid;
+        private Map<Long, Long> statistics;
+        private Map<Long, Double> size;
+
+        // Optional parameters - initialized to default values
+        private Set<Long> mandatoryCuboids = null;
+        //// These two properties are for generating mandatory cuboids
+        private Map<Long, Map<Long, Long>> rollingUpCountSourceMap = null;
+        private Long rollUpThresholdForMandatory = null;
+
+        private Map<Long, Long> hitFrequencyMap = null;
+        private Map<Long, Map<Long, Long>> scanCountSourceMap = null;
+
+        public Builder(String key, Long baseCuboid, Map<Long, Long> 
statistics, Map<Long, Double> size) {
+            this.key = key;
+            this.baseCuboid = baseCuboid;
+            this.statistics = statistics;
+            this.size = size;
+        }
+
+        public Builder setRollingUpCountSourceMap(Map<Long, Map<Long, Long>> 
rollingUpCountSourceMap) {
+            this.rollingUpCountSourceMap = rollingUpCountSourceMap;
+            this.rollUpThresholdForMandatory = THRESHOLD_ROLL_UP_FOR_MANDATORY;
+            return this;
+        }
+
+        public Builder setRollingUpCountSourceMap(Map<Long, Map<Long, Long>> 
rollingUpCountSourceMap,
+                long rollUpThresholdForMandatory) {
+            this.rollingUpCountSourceMap = rollingUpCountSourceMap;
+            this.rollUpThresholdForMandatory = rollUpThresholdForMandatory;
+            return this;
+        }
+
+        public Builder setMandatoryCuboids(Set<Long> mandatoryCuboids) {
+            this.mandatoryCuboids = mandatoryCuboids;
+            return this;
+        }
+
+        public Builder setHitFrequencyMap(Map<Long, Long> hitFrequencyMap) {
+            this.hitFrequencyMap = hitFrequencyMap;
+            return this;
+        }
+
+        public Builder setScanCountSourceMap(Map<Long, Map<Long, Long>> 
scanCountSourceMap) {
+            this.scanCountSourceMap = scanCountSourceMap;
+            return this;
+        }
+
+        public CuboidStats build() {
+            Preconditions.checkNotNull(key, "key should not be null");
+            Preconditions.checkNotNull(baseCuboid, "baseCuboid should not be 
null");
+            Preconditions.checkNotNull(statistics, "statistics should not be 
null");
+            Preconditions.checkNotNull(size, "size should not be null");
+            Preconditions.checkNotNull(statistics.get(baseCuboid),
+                    "row count should exist for base cuboid " + baseCuboid);
+            Preconditions.checkState(statistics.keySet().equals(size.keySet()),
+                    "statistics & size should own the same key set");
+            if (mandatoryCuboids == null) {
+                mandatoryCuboids = Sets.newHashSet();
+            }
+            if (rollingUpCountSourceMap != null) {
+                
mandatoryCuboids.addAll(CuboidStatsUtil.generateMandatoryCuboidSet(statistics, 
hitFrequencyMap,
+                        rollingUpCountSourceMap, rollUpThresholdForMandatory));
+            }
+
+            return new CuboidStats(key, baseCuboid, mandatoryCuboids, 
statistics, size, hitFrequencyMap,
+                    scanCountSourceMap);
+        }
+    }
+
+    private static final double WEIGHT_FOR_UN_QUERY = 0.2;
+
+    private String key;
+    private long baseCuboid;
+    private ImmutableSet<Long> mandatoryCuboidSet;
+    private ImmutableSet<Long> selectionCuboidSet;
+    private ImmutableMap<Long, Long> cuboidCountMap;
+    private ImmutableMap<Long, Double> cuboidSizeMap;
+    private ImmutableMap<Long, Double> cuboidHitProbabilityMap;
+    private ImmutableMap<Long, Long> cuboidScanCountMap;
+
+    private ImmutableMap<Long, Set<Long>> allDescendantsCache;
+
+    private CuboidStats(String key, long baseCuboidId, Set<Long> 
mandatoryCuboids, Map<Long, Long> statistics,
+            Map<Long, Double> size, Map<Long, Long> hitFrequencyMap, Map<Long, 
Map<Long, Long>> scanCountSourceMap) {
+
+        this.key = key;
+        this.baseCuboid = baseCuboidId;
+        /** Initial mandatory cuboids */
+        Set<Long> cuboidsForMandatory = Sets.newHashSet(mandatoryCuboids);
+        //Always add base cuboid.
+        if (!cuboidsForMandatory.contains(baseCuboid)) {
+            cuboidsForMandatory.add(baseCuboid);
+        }
+        logger.info("Mandatory cuboids: " + cuboidsForMandatory);
+
+        /** Initial selection cuboids */
+        Set<Long> cuboidsForSelection = Sets.newHashSet(statistics.keySet());
+        cuboidsForSelection.removeAll(cuboidsForMandatory);
+
+        //There's no overlap between mandatoryCuboidSet and selectionCuboidSet
+        this.mandatoryCuboidSet = ImmutableSet.<Long> 
builder().addAll(cuboidsForMandatory).build();
+        this.selectionCuboidSet = ImmutableSet.<Long> 
builder().addAll(cuboidsForSelection).build();
+        if (selectionCuboidSet.isEmpty()) {
+            logger.warn("The selection set should not be empty!!!");
+        }
+
+        /** Initialize row count for mandatory cuboids */
+        CuboidStatsUtil.complementRowCountForMandatoryCuboids(statistics, 
baseCuboid, mandatoryCuboidSet);
+
+        this.cuboidCountMap = ImmutableMap.<Long, Long> 
builder().putAll(statistics).build();
+        this.cuboidSizeMap = ImmutableMap.<Long, Double> 
builder().putAll(size).build();
+
+        /** Initialize the hit probability for each selection cuboid */
+        Map<Long, Double> tmpCuboidHitProbabilityMap = 
Maps.newHashMapWithExpectedSize(selectionCuboidSet.size());
+        if (hitFrequencyMap != null) {
+            long totalHitFrequency = 0L;
+            for (Map.Entry<Long, Long> hitFrequency : 
hitFrequencyMap.entrySet()) {
+                if (selectionCuboidSet.contains(hitFrequency.getKey())) {
+                    totalHitFrequency += hitFrequency.getValue();
+                }
+            }
+
+            final double unitUncertainProb = WEIGHT_FOR_UN_QUERY / 
selectionCuboidSet.size();
+            for (Long cuboid : selectionCuboidSet) {
+                //Calculate hit probability for each cuboid
+                if (hitFrequencyMap.get(cuboid) != null) {
+                    tmpCuboidHitProbabilityMap.put(cuboid, unitUncertainProb
+                            + (1 - WEIGHT_FOR_UN_QUERY) * 
hitFrequencyMap.get(cuboid) / totalHitFrequency);
+                } else {
+                    tmpCuboidHitProbabilityMap.put(cuboid, unitUncertainProb);
+                }
+            }
+        } else {
+            for (Long cuboid : selectionCuboidSet) {
+                tmpCuboidHitProbabilityMap.put(cuboid, 1.0 / 
selectionCuboidSet.size());
+            }
+        }
+        this.cuboidHitProbabilityMap = ImmutableMap.<Long, Double> 
builder().putAll(tmpCuboidHitProbabilityMap).build();
+
+        /** Initialize the scan count when query for each selection cuboid + 
one base cuboid */
+        Map<Long, Long> tmpCuboidScanCountMap = 
Maps.newHashMapWithExpectedSize(1 + selectionCuboidSet.size());
+        tmpCuboidScanCountMap.put(baseCuboid, getExpScanCount(baseCuboid, 
statistics, scanCountSourceMap));
+        for (Long cuboid : selectionCuboidSet) {
+            tmpCuboidScanCountMap.put(cuboid, getExpScanCount(cuboid, 
statistics, scanCountSourceMap));
+        }
+        this.cuboidScanCountMap = ImmutableMap.<Long, Long> 
builder().putAll(tmpCuboidScanCountMap).build();
+
+        this.allDescendantsCache = ImmutableMap.<Long, Set<Long>> builder()
+                
.putAll(CuboidStatsUtil.createAllDescendantsCache(statistics.keySet())).build();
+    }
+
+    private long getExpScanCount(long sourceCuboid, Map<Long, Long> statistics,
+            Map<Long, Map<Long, Long>> scanCountSourceMap) {
+        Preconditions.checkNotNull(statistics.get(sourceCuboid),
+                "The statistics for source cuboid " + sourceCuboid + " does 
not exist!!!");
+        if (scanCountSourceMap == null || scanCountSourceMap.get(sourceCuboid) 
== null
+                || scanCountSourceMap.get(sourceCuboid).size() <= 0) {
+            return statistics.get(sourceCuboid);
+        } else {
+            //TODO some improvement can be done by assigning weights based on 
distance between source cuboid and target cuboid
+            Map<Long, Long> scanCountTargetMap = 
scanCountSourceMap.get(sourceCuboid);
+            long totalEstScanCount = 0L;
+            for (Map.Entry<Long, Long> subEntry : 
scanCountTargetMap.entrySet()) {
+                long targetCuboid = subEntry.getKey();
+                Preconditions.checkNotNull(statistics.get(targetCuboid),
+                        "The statistics for target cuboid " + targetCuboid + " 
does not exist!!!");
+                // Consider the ratio of row count between source cuboid and 
target cuboid
+                totalEstScanCount += subEntry.getValue() * 
statistics.get(sourceCuboid) / statistics.get(targetCuboid);
+            }
+            return totalEstScanCount / scanCountTargetMap.size();
+        }
+    }
+
+    public Set<Long> getAllDescendants(long cuboid) {
+        Set<Long> allDescendants = Sets.newLinkedHashSet();
+        if (selectionCuboidSet.contains(cuboid)) {
+            return allDescendantsCache.get(cuboid);
+        }
+        return allDescendants;
+    }
+
+    public Set<Long> getAllCuboidsForSelection() {
+        return selectionCuboidSet;
+    }
+
+    public Set<Long> getAllCuboidsForMandatory() {
+        return mandatoryCuboidSet;
+    }
+
+    public Long getCuboidQueryCost(long cuboid) {
+        return cuboidScanCountMap.get(cuboid);
+    }
+
+    public Long getCuboidCount(long cuboid) {
+        return cuboidCountMap.get(cuboid);
+    }
+
+    public Double getCuboidSize(long cuboid) {
+        return cuboidSizeMap.get(cuboid);
+    }
+
+    public double getCuboidHitProbability(long cuboid) {
+        if (mandatoryCuboidSet.contains(cuboid)) {
+            return 1;
+        } else {
+            return cuboidHitProbabilityMap.get(cuboid) == null ? 0 : 
cuboidHitProbabilityMap.get(cuboid);
+        }
+    }
+
+    public Map<Long, Long> getStatistics() {
+        return cuboidCountMap;
+    }
+
+    public double getBaseCuboidSize() {
+        return getCuboidSize(baseCuboid);
+    }
+
+    public long getBaseCuboid() {
+        return baseCuboid;
+    }
+
+    public String getKey() {
+        return key;
+    }
+
+    public CuboidBenefitModel.CuboidModel getCuboidModel(long cuboid) {
+        return new CuboidBenefitModel.CuboidModel(cuboid, 
getCuboidCount(cuboid), getCuboidSize(cuboid),
+                getCuboidHitProbability(cuboid), getCuboidQueryCost(cuboid));
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java
new file mode 100644
index 0000000..6d5bbe5
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+public class CuboidStatsUtil {
+
+    /**
+     * For generating mandatory cuboids,
+     * a cuboid is mandatory if the expectation of rolling up count exceeds a 
threshold
+     * */
+    public static Set<Long> generateMandatoryCuboidSet(Map<Long, Long> 
statistics, Map<Long, Long> hitFrequencyMap,
+            Map<Long, Map<Long, Long>> rollingUpCountSourceMap, final long 
rollUpThresholdForMandatory) {
+        Set<Long> mandatoryCuboidSet = Sets.newHashSet();
+        if (hitFrequencyMap == null || hitFrequencyMap.isEmpty() || 
rollingUpCountSourceMap == null
+                || rollingUpCountSourceMap.isEmpty()) {
+            return mandatoryCuboidSet;
+        }
+        long totalHitFrequency = 0L;
+        for (long hitFrequency : hitFrequencyMap.values()) {
+            totalHitFrequency += hitFrequency;
+        }
+
+        if (totalHitFrequency == 0) {
+            return mandatoryCuboidSet;
+        }
+
+        for (Map.Entry<Long, Long> hitFrequency : hitFrequencyMap.entrySet()) {
+            long cuboid = hitFrequency.getKey();
+            if (statistics.get(cuboid) != null) {
+                continue;
+            }
+            if (rollingUpCountSourceMap.get(cuboid) == null || 
rollingUpCountSourceMap.get(cuboid).isEmpty()) {
+                continue;
+            }
+            long totalEstScanCount = 0L;
+            for (long estScanCount : 
rollingUpCountSourceMap.get(cuboid).values()) {
+                totalEstScanCount += estScanCount;
+            }
+            totalEstScanCount /= rollingUpCountSourceMap.get(cuboid).size();
+            if ((hitFrequency.getValue() * 1.0 / totalHitFrequency)
+                    * totalEstScanCount >= rollUpThresholdForMandatory) {
+                mandatoryCuboidSet.add(cuboid);
+            }
+        }
+        return mandatoryCuboidSet;
+    }
+
+    /**
+     * Complement row count for mandatory cuboids
+     * with its best parent's row count
+     * */
+    public static void complementRowCountForMandatoryCuboids(Map<Long, Long> 
statistics, long baseCuboid,
+            Set<Long> mandatoryCuboidSet) {
+        // Sort entries order by row count asc
+        SortedSet<Map.Entry<Long, Long>> sortedStatsSet = new 
TreeSet<Map.Entry<Long, Long>>(
+                new Comparator<Map.Entry<Long, Long>>() {
+                    public int compare(Map.Entry<Long, Long> o1, 
Map.Entry<Long, Long> o2) {
+                        return o1.getValue().compareTo(o2.getValue());
+                    }
+                });
+        sortedStatsSet.addAll(statistics.entrySet());
+        for (Long cuboid : mandatoryCuboidSet) {
+            if (statistics.get(cuboid) == null) {
+                // Get estimate row count for mandatory cuboid
+                long tmpRowCount = -1;
+                for (Map.Entry<Long, Long> entry : sortedStatsSet) {
+                    if (isDescendant(cuboid, entry.getKey())) {
+                        tmpRowCount = entry.getValue();
+                    }
+                }
+                statistics.put(cuboid, tmpRowCount < 0 ? 
statistics.get(baseCuboid) : tmpRowCount);
+            }
+        }
+    }
+
+    /** Using dynamic programming to use extra space to reduce repetitive 
computation*/
+    public static Map<Long, Set<Long>> createAllDescendantsCache(final 
Set<Long> cuboidSet) {
+        List<Long> latticeCuboidList = Lists.newArrayList(cuboidSet);
+        Collections.sort(latticeCuboidList);
+
+        Map<Long, Set<Long>> allDescendantsCache = Maps.newHashMap();
+        Set<Long> preNoneDescendants = Sets.newHashSet();
+        for (int i = 0; i < latticeCuboidList.size(); i++) {
+            Long currentCuboid = latticeCuboidList.get(i);
+            Set<Long> descendants = Sets.newHashSet(currentCuboid);
+            Set<Long> curNoneDescendants = Sets.newHashSet();
+            if (i > 0) {
+                long preCuboid = latticeCuboidList.get(i - 1);
+                if (isDescendant(preCuboid, currentCuboid)) {
+                    descendants.addAll(allDescendantsCache.get(preCuboid));
+                } else {
+                    curNoneDescendants.add(preCuboid);
+                    for (long cuboidToCheck : 
allDescendantsCache.get(preCuboid)) {
+                        if (isDescendant(cuboidToCheck, currentCuboid)) {
+                            
descendants.addAll(allDescendantsCache.get(cuboidToCheck));
+                        }
+                    }
+                }
+            }
+            for (long cuboidToCheck : preNoneDescendants) {
+                if (isDescendant(cuboidToCheck, currentCuboid)) {
+                    descendants.addAll(allDescendantsCache.get(cuboidToCheck));
+                } else {
+                    curNoneDescendants.add(cuboidToCheck);
+                }
+            }
+
+            allDescendantsCache.put(currentCuboid, descendants);
+            preNoneDescendants = curNoneDescendants;
+        }
+
+        return allDescendantsCache;
+    }
+
+    @VisibleForTesting
+    static Map<Long, Set<Long>> createAllDescendantsCache2(final Set<Long> 
cuboidSet) {
+        List<Long> latticeCuboidList = Lists.newArrayList(cuboidSet);
+
+        Map<Long, Set<Long>> allDescendantsCache = Maps.newHashMap();
+        for (int i = 0; i < latticeCuboidList.size(); i++) {
+            Long currentCuboid = latticeCuboidList.get(i);
+            Set<Long> descendantSet = Sets.newHashSet(currentCuboid);
+            for (int j = 0; j < i; j++) {
+                Long checkCuboid = latticeCuboidList.get(j);
+                if (isDescendant(checkCuboid, currentCuboid)) {
+                    descendantSet.add(checkCuboid);
+                }
+            }
+            allDescendantsCache.put(currentCuboid, descendantSet);
+        }
+        return allDescendantsCache;
+    }
+
+    public static boolean isDescendant(long cuboidToCheck, long parentCuboid) {
+        return (cuboidToCheck & parentCuboid) == cuboidToCheck;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/PBPUSCalculator.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/PBPUSCalculator.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/PBPUSCalculator.java
new file mode 100755
index 0000000..6d3ddc7
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/PBPUSCalculator.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Calculate the benefit based on Benefit Per Unit Space with query 
probability distribution.
+ */
+public class PBPUSCalculator extends BPUSCalculator {
+
+    public PBPUSCalculator(final CuboidStats cuboidStats) {
+        super(cuboidStats);
+    }
+
+    @Override
+    protected double getCostSaving(long descendant, long cuboid) {
+        return getCuboidHitProbability(descendant) * 
super.getCostSaving(descendant, cuboid);
+    }
+
+    protected double getCuboidHitProbability(long cuboid) {
+        return cuboidStats.getCuboidHitProbability(cuboid);
+    }
+
+    public double getMinBenefitRatio() {
+        
Preconditions.checkArgument(cuboidStats.getAllCuboidsForSelection().size() > 0,
+                "The set of cuboids for selection is empty!!!");
+        return super.getMinBenefitRatio() / 
cuboidStats.getAllCuboidsForSelection().size();
+    }
+
+    @Override
+    public BenefitPolicy getInstance() {
+        PBPUSCalculator pbpusCalculator = new PBPUSCalculator(cuboidStats);
+        pbpusCalculator.cuboidAggCostMap.putAll(this.cuboidAggCostMap);
+        return pbpusCalculator;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/SPBPUSCalculator.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/SPBPUSCalculator.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/SPBPUSCalculator.java
new file mode 100755
index 0000000..c89bf49
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/SPBPUSCalculator.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+/**
+ * Calculate the benefit based on Benefit Per Unit Space with query 
probability distribution and updated cost.
+ */
+public class SPBPUSCalculator extends PBPUSCalculator {
+
+    public SPBPUSCalculator(final CuboidStats cuboidStats) {
+        super(cuboidStats);
+    }
+
+    @Override
+    protected Long getCuboidCost(long cuboid) {
+        return cuboidStats.getCuboidQueryCost(cuboid);
+    }
+
+    @Override
+    public BenefitPolicy getInstance() {
+        SPBPUSCalculator spbpusCalculator = new SPBPUSCalculator(cuboidStats);
+        spbpusCalculator.cuboidAggCostMap.putAll(this.cuboidAggCostMap);
+        return spbpusCalculator;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/AlgorithmTestBase.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/AlgorithmTestBase.java
 
b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/AlgorithmTestBase.java
new file mode 100755
index 0000000..02927c3
--- /dev/null
+++ 
b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/AlgorithmTestBase.java
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.kylin.cube.cuboid.TreeCuboidScheduler.CuboidCostComparator;
+import org.apache.kylin.cube.cuboid.TreeCuboidScheduler.CuboidTree;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+public class AlgorithmTestBase {
+
+    public CuboidStats cuboidStats;
+
+    private Set<Long> mandatoryCuboids;
+
+    @Before
+    public void setUp() throws Exception {
+
+        mandatoryCuboids = Sets.newHashSet();
+        mandatoryCuboids.add(3000L);
+        mandatoryCuboids.add(1888L);
+        mandatoryCuboids.add(88L);
+        cuboidStats = new CuboidStats.Builder("test", 4095L, simulateCount(), 
simulateSpaceSize())
+                
.setMandatoryCuboids(mandatoryCuboids).setHitFrequencyMap(simulateHitFrequency())
+                .setScanCountSourceMap(simulateScanCount()).build();
+    }
+
+    @After
+    public void after() throws Exception {
+    }
+
+    @Test
+    public void testMandatoryRowCountEstimation() {
+        for (long mandatoryCuboid : mandatoryCuboids) {
+            System.out.println("Cuboid id: " + mandatoryCuboid + "; Row Count: 
"
+                    + cuboidStats.getStatistics().get(mandatoryCuboid));
+        }
+    }
+
+    /** better if closer to 1, worse if closer to 0*/
+    public double getQueryCostRatio(CuboidStats cuboidStats, List<Long> 
recommendList) {
+        CuboidTree cuboidTree = CuboidTree.createFromCuboids(recommendList,
+                new CuboidCostComparator(cuboidStats.getStatistics()));
+        double queryCostBest = 0;
+        for (Long cuboidId : cuboidStats.getAllCuboidsForSelection()) {
+            if (cuboidStats.getCuboidQueryCost(cuboidId) != null) {
+                queryCostBest += cuboidStats.getCuboidHitProbability(cuboidId) 
* cuboidStats.getCuboidCount(cuboidId);
+                //                queryCostBest += 
cuboidStats.getCuboidHitProbability(cuboidId) * 
cuboidStats.getCuboidQueryCost(cuboidId);
+            }
+        }
+
+        double queryCost = 0;
+        for (Long cuboidId : cuboidStats.getAllCuboidsForSelection()) {
+            long matchCuboidId = cuboidTree.findBestMatchCuboid(cuboidId);
+            if (cuboidStats.getCuboidQueryCost(matchCuboidId) != null) {
+                queryCost += cuboidStats.getCuboidHitProbability(cuboidId) * 
cuboidStats.getCuboidCount(matchCuboidId);
+                //                queryCost += 
cuboidStats.getCuboidHitProbability(cuboidId) * 
cuboidStats.getCuboidQueryCost(matchCuboidId);
+            }
+        }
+
+        return queryCostBest / queryCost;
+    }
+
+    protected Map<Long, Long> simulateCount() {
+        Map<Long, Long> countMap = Maps.newHashMap();
+        BufferedReader br = null;
+
+        try {
+
+            String sCurrentLine;
+
+            br = new BufferedReader(new 
FileReader("src/test/resources/statistics.txt"));
+
+            while ((sCurrentLine = br.readLine()) != null) {
+                String[] statPair = sCurrentLine.split(" ");
+                countMap.put(Long.valueOf(statPair[0]), 
Long.valueOf(statPair[1]));
+            }
+
+        } catch (IOException e) {
+            e.printStackTrace();
+        } finally {
+            try {
+                if (br != null)
+                    br.close();
+            } catch (IOException ex) {
+                ex.printStackTrace();
+            }
+        }
+
+        return countMap;
+    }
+
+    protected Map<Long, Double> simulateSpaceSize() {
+        Map<Long, Double> sizeMap = Maps.newHashMap();
+        Map<Long, Long> countMap = simulateCount();
+        for (Map.Entry<Long, Long> entry : countMap.entrySet()) {
+            sizeMap.put(entry.getKey(), entry.getValue() * 1.0);
+        }
+        return sizeMap;
+    }
+
+    protected Map<Long, Long> simulateHitFrequency() {
+        Map<Long, Long> hitFrequencyMap = Maps.newHashMap();
+
+        hitFrequencyMap.put(4095L, 10L);
+        hitFrequencyMap.put(3849L, 15L);
+        hitFrequencyMap.put(3780L, 31L);
+
+        hitFrequencyMap.put(3459L, 16L);
+        hitFrequencyMap.put(3145L, 29L);
+
+        hitFrequencyMap.put(2861L, 21L);
+        hitFrequencyMap.put(2768L, 40L);
+
+        hitFrequencyMap.put(1528L, 10L);
+        hitFrequencyMap.put(1440L, 9L);
+        hitFrequencyMap.put(1152L, 21L);
+
+        hitFrequencyMap.put(256L, 23L);
+
+        hitFrequencyMap.put(128L, 7L);
+        hitFrequencyMap.put(272L, 8L);
+        hitFrequencyMap.put(288L, 10L);
+        hitFrequencyMap.put(384L, 2L);
+        hitFrequencyMap.put(320L, 3L);
+        hitFrequencyMap.put(432L, 5L);
+        hitFrequencyMap.put(258L, 8L);
+        hitFrequencyMap.put(336L, 10L);
+        hitFrequencyMap.put(274L, 22L);
+        hitFrequencyMap.put(488L, 41L);
+        hitFrequencyMap.put(352L, 10L);
+
+        hitFrequencyMap.put(16L, 1L);
+        hitFrequencyMap.put(32L, 5L);
+        hitFrequencyMap.put(34L, 1L);
+
+        hitFrequencyMap.put(2L, 21L);
+
+        return hitFrequencyMap;
+    }
+
+    protected Map<Long, Map<Long, Long>> simulateScanCount() {
+        Map<Long, Map<Long, Long>> scanCountMap = Maps.newLinkedHashMap();
+        scanCountMap.put(4094L, new HashMap<Long, Long>() {
+            {
+                put(4095L, 1833041L);
+            }
+        });
+        scanCountMap.put(3849L, new HashMap<Long, Long>() {
+            {
+                put(3849L, 276711L);
+            }
+        });
+        scanCountMap.put(3780L, new HashMap<Long, Long>() {
+            {
+                put(3780L, 129199L);
+            }
+        });
+        scanCountMap.put(3459L, new HashMap<Long, Long>() {
+            {
+                put(3459L, 168109L);
+            }
+        });
+        scanCountMap.put(3145L, new HashMap<Long, Long>() {
+            {
+                put(3145L, 299991L);
+            }
+        });
+        scanCountMap.put(2861L, new HashMap<Long, Long>() {
+            {
+                put(2861L, 2121L);
+            }
+        });
+        scanCountMap.put(2768L, new HashMap<Long, Long>() {
+            {
+                put(2768L, 40231L);
+            }
+        });
+        scanCountMap.put(256L, new HashMap<Long, Long>() {
+            {
+                put(256L, 1L);
+            }
+        });
+        scanCountMap.put(16L, new HashMap<Long, Long>() {
+            {
+                put(16L, 1L);
+            }
+        });
+        scanCountMap.put(32L, new HashMap<Long, Long>() {
+            {
+                put(32L, 2L);
+            }
+        });
+        scanCountMap.put(128L, new HashMap<Long, Long>() {
+            {
+                put(128L, 3L);
+            }
+        });
+        scanCountMap.put(272L, new HashMap<Long, Long>() {
+            {
+                put(272L, 2L);
+            }
+        });
+        scanCountMap.put(288L, new HashMap<Long, Long>() {
+            {
+                put(288L, 3L);
+            }
+        });
+        scanCountMap.put(2L, new HashMap<Long, Long>() {
+            {
+                put(2L, 1L);
+            }
+        });
+        scanCountMap.put(384L, new HashMap<Long, Long>() {
+            {
+                put(384L, 2L);
+            }
+        });
+        scanCountMap.put(320L, new HashMap<Long, Long>() {
+            {
+                put(320L, 3L);
+            }
+        });
+        scanCountMap.put(432L, new HashMap<Long, Long>() {
+            {
+                put(432L, 5L);
+            }
+        });
+        scanCountMap.put(1152L, new HashMap<Long, Long>() {
+            {
+                put(1152L, 21L);
+            }
+        });
+        scanCountMap.put(258L, new HashMap<Long, Long>() {
+            {
+                put(258L, 2L);
+            }
+        });
+        scanCountMap.put(1440L, new HashMap<Long, Long>() {
+            {
+                put(1440L, 9L);
+            }
+        });
+        scanCountMap.put(336L, new HashMap<Long, Long>() {
+            {
+                put(336L, 2L);
+            }
+        });
+        scanCountMap.put(336L, new HashMap<Long, Long>() {
+            {
+                put(336L, 2L);
+            }
+        });
+        scanCountMap.put(274L, new HashMap<Long, Long>() {
+            {
+                put(274L, 1L);
+            }
+        });
+        scanCountMap.put(488L, new HashMap<Long, Long>() {
+            {
+                put(488L, 16L);
+            }
+        });
+        scanCountMap.put(352L, new HashMap<Long, Long>() {
+            {
+                put(352L, 3L);
+            }
+        });
+        scanCountMap.put(1528L, new HashMap<Long, Long>() {
+            {
+                put(1528L, 21L);
+            }
+        });
+        scanCountMap.put(34L, new HashMap<Long, Long>() {
+            {
+                put(34L, 1L);
+            }
+        });
+
+        return scanCountMap;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java
 
b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java
new file mode 100644
index 0000000..ca70820
--- /dev/null
+++ 
b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid.algorithm;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.google.common.base.Stopwatch;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+public class CuboidStatsUtilTest {
+
+    /**
+     *                 (11111111)
+     *               /     |      \
+     *      10011111   (11101111)  00110010
+     *           \         |         /
+     *             \   11000111     /
+     *               \     |       /
+     *                 00000110   /
+     *              /      |     /
+     *         00000100  00000010
+     * */
+    private Set<Long> generateCuboidSet() {
+        return Sets.newHashSet(255L, 159L, 239L, 50L, 199L, 6L, 4L, 2L);
+    }
+
+    private Map<Long, Long> simulateCount() {
+        Map<Long, Long> countMap = Maps.newHashMap();
+
+        countMap.put(255L, 10000L);
+        countMap.put(159L, 10000L);
+        countMap.put(50L, 10000L);
+        countMap.put(199L, 10000L);
+        countMap.put(6L, 10000L);
+        countMap.put(4L, 10000L);
+        countMap.put(2L, 10000L);
+
+        return countMap;
+    }
+
+    private Map<Long, Long> simulateHitFrequency() {
+        Map<Long, Long> hitFrequencyMap = Maps.newHashMap();
+
+        long totalHitFrequency = 10000L;
+
+        hitFrequencyMap.put(239L, (long) (totalHitFrequency * 0.5));
+        hitFrequencyMap.put(50L, (long) (totalHitFrequency * 0.2));
+        hitFrequencyMap.put(2L, (long) (totalHitFrequency * 0.25));
+        hitFrequencyMap.put(178L, (long) (totalHitFrequency * 0.05));
+
+        return hitFrequencyMap;
+    }
+
+    private Map<Long, Map<Long, Long>> simulateRollingUpCount() {
+        Map<Long, Map<Long, Long>> rollingUpCountMap = Maps.newLinkedHashMap();
+
+        rollingUpCountMap.put(239L, new HashMap<Long, Long>() {
+            {
+                put(255L, 4000L);
+            }
+        });
+
+        rollingUpCountMap.put(178L, new HashMap<Long, Long>() {
+            {
+                put(255L, 5000L);
+            }
+        });
+
+        return rollingUpCountMap;
+    }
+
+    @Test
+    public void isDescendantTest() {
+        Assert.assertTrue(CuboidStatsUtil.isDescendant(6L, 239L));
+        Assert.assertTrue(!CuboidStatsUtil.isDescendant(4L, 50L));
+    }
+
+    @Test
+    public void generateMandatoryCuboidSetTest() {
+        Set<Long> mandatoryCuboidSet = 
CuboidStatsUtil.generateMandatoryCuboidSet(simulateCount(),
+                simulateHitFrequency(), simulateRollingUpCount(), 1000L);
+        Assert.assertTrue(mandatoryCuboidSet.contains(239L));
+        Assert.assertTrue(!mandatoryCuboidSet.contains(178L));
+    }
+
+    @Test
+    public void complementRowCountForMandatoryCuboidsTest() {
+        Map<Long, Long> countMap = simulateCount();
+        Set<Long> mandatoryCuboidSet = 
CuboidStatsUtil.generateMandatoryCuboidSet(countMap, simulateHitFrequency(),
+                simulateRollingUpCount(), 1000L);
+        for (long mandatoryCuboid : mandatoryCuboidSet) {
+            Assert.assertNull(countMap.get(mandatoryCuboid));
+        }
+        CuboidStatsUtil.complementRowCountForMandatoryCuboids(countMap, 255L, 
mandatoryCuboidSet);
+        for (long mandatoryCuboid : mandatoryCuboidSet) {
+            Assert.assertNotNull(countMap.get(mandatoryCuboid));
+        }
+        Assert.assertTrue(countMap.get(239L) == 10000L);
+    }
+
+    @Test
+    public void createAllDescendantsCacheTest() {
+        Set<Long> cuboidSet = generateCuboidSet();
+        Map<Long, Set<Long>> allDescendantsCache = 
CuboidStatsUtil.createAllDescendantsCache(cuboidSet);
+
+        
Assert.assertTrue(allDescendantsCache.get(255L).containsAll(cuboidSet));
+
+        Assert.assertTrue(allDescendantsCache.get(239L).size() == 5);
+
+        
Assert.assertTrue(allDescendantsCache.get(50L).containsAll(Sets.newHashSet(50L, 
2L)));
+        Assert.assertTrue(!allDescendantsCache.get(50L).contains(4L));
+    }
+
+    private Set<Long> generateMassCuboidSet() {
+        Set<Long> cuboidSet = Sets.newHashSet();
+        long maxCuboid = (1L << 16);
+        for (long i = 1; i < maxCuboid; i++) {
+            cuboidSet.add(i);
+        }
+        return cuboidSet;
+    }
+
+    @Test
+    public void createAllDescendantsCacheStressTest() {
+        Stopwatch sw = new Stopwatch();
+        sw.start();
+        Set<Long> cuboidSet = generateMassCuboidSet();
+        System.out.println("Time elapsed for creating sorted cuboid list: " + 
sw.elapsed(TimeUnit.MILLISECONDS));
+        sw.reset();
+        sw.start();
+        CuboidStatsUtil.createAllDescendantsCache(cuboidSet);
+        System.out.println("Time elapsed for creating descendants cache: " + 
sw.elapsed(TimeUnit.MILLISECONDS));
+        sw.reset();
+        sw.start();
+        CuboidStatsUtil.createAllDescendantsCache2(cuboidSet);
+        System.out.println("Time elapsed for creating descendants cache2: " + 
sw.elapsed(TimeUnit.MILLISECONDS));
+    }
+}

Reply via email to