Repository: phoenix Updated Branches: refs/heads/4.x-HBase-1.1 0ea9f3cc5 -> bce276f58
PHOENIX-2965 Use DistinctPrefixFilter logic for COUNT(DISTINCT ...) and COUNT(...) GROUP BY; recommit. Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/bce276f5 Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/bce276f5 Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/bce276f5 Branch: refs/heads/4.x-HBase-1.1 Commit: bce276f58427a6d10b0db2ce2e231eab15d99f15 Parents: 0ea9f3c Author: Lars Hofhansl <la...@apache.org> Authored: Sun Jun 12 11:19:53 2016 -0700 Committer: Lars Hofhansl <la...@apache.org> Committed: Sun Jun 12 11:20:49 2016 -0700 ---------------------------------------------------------------------- .../phoenix/end2end/DistinctPrefixFilterIT.java | 18 +++-- .../apache/phoenix/compile/GroupByCompiler.java | 75 ++++++++++++++++---- .../phoenix/iterate/BaseResultIterators.java | 3 +- .../phoenix/compile/QueryCompilerTest.java | 25 +++++++ 4 files changed, 102 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/phoenix/blob/bce276f5/phoenix-core/src/it/java/org/apache/phoenix/end2end/DistinctPrefixFilterIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/DistinctPrefixFilterIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/DistinctPrefixFilterIT.java index 4050314..8229243 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/DistinctPrefixFilterIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/DistinctPrefixFilterIT.java @@ -135,10 +135,9 @@ public class DistinctPrefixFilterIT extends BaseHBaseManagedTimeTableReuseIT { private void testCommonPlans(String testTable, String contains) throws Exception { testPlan("SELECT DISTINCT prefix1 FROM "+testTable, true); - // COUNT(DISTINCT) is not yet optimized - testPlan("SELECT COUNT(DISTINCT prefix1) FROM "+testTable, false); - testPlan("SELECT COUNT(DISTINCT prefix1), COUNT(DISTINCT prefix2) FROM "+testTable, false); - testPlan("SELECT COUNT(DISTINCT prefix1), COUNT(DISTINCT (prefix1,prefix2)) FROM "+testTable, false); + testPlan("SELECT COUNT(DISTINCT prefix1) FROM "+testTable, true); + testPlan("SELECT COUNT(DISTINCT prefix1), COUNT(DISTINCT prefix2) FROM "+testTable, true); + testPlan("SELECT COUNT(DISTINCT prefix1), COUNT(DISTINCT (prefix1,prefix2)) FROM "+testTable, true); // a plain aggregate, cannot optimize testPlan("SELECT COUNT(prefix1), COUNT(DISTINCT prefix1) FROM "+testTable, false); testPlan("SELECT COUNT(*) FROM (SELECT DISTINCT(prefix1) FROM "+testTable+")", true); @@ -163,6 +162,15 @@ public class DistinctPrefixFilterIT extends BaseHBaseManagedTimeTableReuseIT { testPlan("SELECT prefix1, 1, 2 FROM "+testTable+" GROUP BY prefix1", true); testPlan("SELECT prefix1 FROM "+testTable+" GROUP BY prefix1, col1", false); testPlan("SELECT DISTINCT prefix1, prefix2 FROM "+testTable+" WHERE col1 > 0.5", true); + + testPlan("SELECT COUNT(DISTINCT prefix1) FROM "+testTable+" HAVING COUNT(col1) > 10", false); + testPlan("SELECT COUNT(DISTINCT prefix1) FROM "+testTable+" ORDER BY COUNT(col1)", false); + + // can't optimize the following, yet, even though it would be possible + testPlan("SELECT COUNT(DISTINCT prefix1) FROM "+testTable+" HAVING COUNT(DISTINCT prefix2) > 10", false); + testPlan("SELECT COUNT(DISTINCT prefix1) FROM "+testTable+" HAVING COUNT(DISTINCT prefix1) > 10", false); + testPlan("SELECT COUNT(DISTINCT prefix1) / 10 FROM "+testTable, false); + testPlan("SELECT COUNT(DISTINCT prefix1) FROM "+testTable+" ORDER BY COUNT(prefix1)", false); } private void testPlan(String query, boolean optimizable) throws Exception { @@ -245,6 +253,8 @@ public class DistinctPrefixFilterIT extends BaseHBaseManagedTimeTableReuseIT { testCount("SELECT %s COUNT(DISTINCT prefix1), COUNT(DISTINCT prefix2) FROM " + testTable, 4, 4); testCount("SELECT %s COUNT(DISTINCT prefix1), COUNT(DISTINCT (prefix1, prefix2)) FROM " + testTable, 4, 11); testCount("SELECT %s COUNT(DISTINCT prefix1), COUNT(DISTINCT (prefix1, prefix2)) FROM " + testTable + " WHERE col1 > 0.99", -1, -1); + + testCount("SELECT %s COUNT(DISTINCT col1) FROM " + testTable, -1); } @Test http://git-wip-us.apache.org/repos/asf/phoenix/blob/bce276f5/phoenix-core/src/main/java/org/apache/phoenix/compile/GroupByCompiler.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/compile/GroupByCompiler.java b/phoenix-core/src/main/java/org/apache/phoenix/compile/GroupByCompiler.java index fd6d6e9..948b1c9 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/compile/GroupByCompiler.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/compile/GroupByCompiler.java @@ -33,6 +33,8 @@ import org.apache.phoenix.execute.TupleProjector; import org.apache.phoenix.expression.CoerceExpression; import org.apache.phoenix.expression.Expression; import org.apache.phoenix.parse.AliasedNode; +import org.apache.phoenix.parse.DistinctCountParseNode; +import org.apache.phoenix.parse.HintNode.Hint; import org.apache.phoenix.parse.ParseNode; import org.apache.phoenix.parse.SelectStatement; import org.apache.phoenix.schema.AmbiguousColumnException; @@ -59,6 +61,7 @@ public class GroupByCompiler { private final List<Expression> keyExpressions; private final boolean isOrderPreserving; private final int orderPreservingColumnCount; + private final boolean isUngroupedAggregate; public static final GroupByCompiler.GroupBy EMPTY_GROUP_BY = new GroupBy(new GroupByBuilder()) { @Override public GroupBy compile(StatementContext context, TupleProjector tupleProjector) throws SQLException { @@ -98,6 +101,7 @@ public class GroupByCompiler { ImmutableList.copyOf(builder.keyExpressions); this.isOrderPreserving = builder.isOrderPreserving; this.orderPreservingColumnCount = builder.orderPreservingColumnCount; + this.isUngroupedAggregate = builder.isUngroupedAggregate; } public List<Expression> getExpressions() { @@ -109,9 +113,13 @@ public class GroupByCompiler { } public String getScanAttribName() { - return isOrderPreserving ? - BaseScannerRegionObserver.KEY_ORDERED_GROUP_BY_EXPRESSIONS : - BaseScannerRegionObserver.UNORDERED_GROUP_BY_EXPRESSIONS; + if (isUngroupedAggregate) { + return BaseScannerRegionObserver.UNGROUPED_AGG; + } else if (isOrderPreserving) { + return BaseScannerRegionObserver.KEY_ORDERED_GROUP_BY_EXPRESSIONS; + } else { + return BaseScannerRegionObserver.UNORDERED_GROUP_BY_EXPRESSIONS; + } } public boolean isEmpty() { @@ -122,6 +130,10 @@ public class GroupByCompiler { return isOrderPreserving; } + public boolean isUngroupedAggregate() { + return isUngroupedAggregate; + } + public int getOrderPreservingColumnCount() { return orderPreservingColumnCount; } @@ -145,7 +157,9 @@ public class GroupByCompiler { if (isOrderPreserving) { return new GroupBy.GroupByBuilder(this).setOrderPreservingColumnCount(orderPreservingColumnCount).build(); } - + if (isUngroupedAggregate) { + return UNGROUPED_GROUP_BY; + } List<Expression> expressions = Lists.newArrayListWithExpectedSize(this.expressions.size()); List<Expression> keyExpressions = expressions; List<Pair<Integer,Expression>> groupBys = Lists.newArrayListWithExpectedSize(this.expressions.size()); @@ -243,6 +257,7 @@ public class GroupByCompiler { private int orderPreservingColumnCount; private List<Expression> expressions = Collections.emptyList(); private List<Expression> keyExpressions = Collections.emptyList(); + private boolean isUngroupedAggregate; public GroupByBuilder() { } @@ -252,6 +267,7 @@ public class GroupByCompiler { this.orderPreservingColumnCount = groupBy.orderPreservingColumnCount; this.expressions = groupBy.expressions; this.keyExpressions = groupBy.keyExpressions; + this.isUngroupedAggregate = groupBy.isUngroupedAggregate; } public GroupByBuilder setExpressions(List<Expression> expressions) { @@ -269,6 +285,11 @@ public class GroupByCompiler { return this; } + public GroupByBuilder setIsUngroupedAggregate(boolean isUngroupedAggregate) { + this.isUngroupedAggregate = isUngroupedAggregate; + return this; + } + public GroupByBuilder setOrderPreservingColumnCount(int orderPreservingColumnCount) { this.orderPreservingColumnCount = orderPreservingColumnCount; return this; @@ -280,7 +301,9 @@ public class GroupByCompiler { } public void explain(List<String> planSteps, Integer limit) { - if (isOrderPreserving) { + if (isUngroupedAggregate) { + planSteps.add(" SERVER AGGREGATE INTO SINGLE ROW"); + } else if (isOrderPreserving) { planSteps.add(" SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY " + getExpressions() + (limit == null ? "" : " LIMIT " + limit + " GROUP" + (limit.intValue() == 1 ? "" : "S"))); } else { planSteps.add(" SERVER AGGREGATE INTO DISTINCT ROWS BY " + getExpressions() + (limit == null ? "" : " LIMIT " + limit + " GROUP" + (limit.intValue() == 1 ? "" : "S"))); @@ -303,18 +326,39 @@ public class GroupByCompiler { * Otherwise, we need to insert a step after the Merge that dedups. * Order by only allowed on columns in the select distinct */ + boolean isUngroupedAggregate = false; if (groupByNodes.isEmpty()) { if (statement.isAggregate()) { - return GroupBy.UNGROUPED_GROUP_BY; - } - if (!statement.isDistinct()) { + // do not optimize if + // 1. we were asked not to optimize + // 2. there's any HAVING clause + // 3. there's any ORDER BY clause + // TODO: PHOENIX-2989 suggests some ways to optimize the latter two cases + if (statement.getHint().hasHint(Hint.RANGE_SCAN) || + statement.getHaving() != null || + !statement.getOrderBy().isEmpty()) { + return GroupBy.UNGROUPED_GROUP_BY; + } + groupByNodes = Lists.newArrayListWithExpectedSize(statement.getSelect().size()); + for (AliasedNode aliasedNode : statement.getSelect()) { + if (aliasedNode.getNode() instanceof DistinctCountParseNode) { + // only add children of DistinctCount nodes + groupByNodes.addAll(aliasedNode.getNode().getChildren()); + } else { + // if we found anything else, do not attempt any further optimization + return GroupBy.UNGROUPED_GROUP_BY; + } + } + isUngroupedAggregate = true; + } else if (statement.isDistinct()) { + groupByNodes = Lists.newArrayListWithExpectedSize(statement.getSelect().size()); + for (AliasedNode aliasedNode : statement.getSelect()) { + // for distinct at all select expression as group by conditions + groupByNodes.add(aliasedNode.getNode()); + } + } else { return GroupBy.EMPTY_GROUP_BY; } - - groupByNodes = Lists.newArrayListWithExpectedSize(statement.getSelect().size()); - for (AliasedNode aliasedNode : statement.getSelect()) { - groupByNodes.add(aliasedNode.getNode()); - } } // Accumulate expressions in GROUP BY @@ -337,7 +381,10 @@ public class GroupByCompiler { if (expressions.isEmpty()) { return GroupBy.EMPTY_GROUP_BY; } - GroupBy groupBy = new GroupBy.GroupByBuilder().setIsOrderPreserving(isOrderPreserving).setExpressions(expressions).setKeyExpressions(expressions).build(); + GroupBy groupBy = new GroupBy.GroupByBuilder() + .setIsOrderPreserving(isOrderPreserving) + .setExpressions(expressions).setKeyExpressions(expressions) + .setIsUngroupedAggregate(isUngroupedAggregate).build(); return groupBy; } http://git-wip-us.apache.org/repos/asf/phoenix/blob/bce276f5/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java b/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java index 856cd7b..57458ce 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java @@ -229,7 +229,8 @@ public abstract class BaseResultIterators extends ExplainTable implements Result if (cols > 0 && !plan.getStatement().getHint().hasHint(HintNode.Hint.RANGE_SCAN) && cols < plan.getTableRef().getTable().getRowKeySchema().getFieldCount() && - plan.getGroupBy().isOrderPreserving() && context.getAggregationManager().isEmpty()) + plan.getGroupBy().isOrderPreserving() && + (context.getAggregationManager().isEmpty() || plan.getGroupBy().isUngroupedAggregate())) { ScanUtil.andFilterAtEnd(context.getScan(), new DistinctPrefixFilter(plan.getTableRef().getTable().getRowKeySchema(), http://git-wip-us.apache.org/repos/asf/phoenix/blob/bce276f5/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java index 1db90a9..e67a0e3 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java @@ -834,6 +834,31 @@ public class QueryCompilerTest extends BaseConnectionlessQueryTest { } @Test + public void testSelectDistinctAndOrderBy() throws Exception { + long ts = nextTimestamp(); + String query = "select /*+ RANGE_SCAN */ count(distinct organization_id) from atable order by organization_id"; + String query1 = "select count(distinct organization_id) from atable order by organization_id"; + String url = getUrl() + ";" + PhoenixRuntime.CURRENT_SCN_ATTRIB + "=" + (ts + 5); // Run query at timestamp 5 + Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); + Connection conn = DriverManager.getConnection(url, props); + try { + PreparedStatement statement = conn.prepareStatement(query); + statement.executeQuery(); + fail(); + } catch (SQLException e) { // expected + assertEquals(SQLExceptionCode.AGGREGATE_WITH_NOT_GROUP_BY_COLUMN.getErrorCode(), e.getErrorCode()); + } + try { + PreparedStatement statement = conn.prepareStatement(query1); + statement.executeQuery(); + fail(); + } catch (SQLException e) { // expected + assertEquals(SQLExceptionCode.AGGREGATE_WITH_NOT_GROUP_BY_COLUMN.getErrorCode(), e.getErrorCode()); + } + conn.close(); + } + + @Test public void testOrderByNotInSelectDistinctAgg() throws Exception { long ts = nextTimestamp(); String query = "SELECT distinct count(1) from atable order by x_integer";