[
https://issues.apache.org/jira/browse/PHOENIX-5105?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
chenglei updated PHOENIX-5105:
------------------------------
Description:
Given two tables:
{code}
CREATE TABLE Merge1 (
aid INTEGER PRIMARY KEY,
age INTEGER)
CREATE TABLE Merge2 (
bid INTEGER PRIMARY KEY,
code INTEGER)
{code}
for following sql :
{code}
select /*+ USE_SORT_MERGE_JOIN */ a.aid,b.code from
(select aid,age from merge1 where age >=11 and age<=33 order by age limit 3) a
inner join
(select bid,code from merge2 order by code limit 1) b on a.aid=b.bid where
b.code > 50
{code}
For the RHS of SortMergeJoin, at first the where condition {{b.code > 50}} is
pushed down to RHS as its {{JoinCompiler.Table.postFilters}}, and then {{order
by b.bid}} is appended to it , finally the RHS is rewritten to as
{{select bid,code from (select bid,code from merge2 order by code limit 1)
order by bid}} by following line 211 in {{QueryCompiler.compileJoinQuery}}.
The rewritten sql is then compiled to ClientScanPlan by following line 221
,and previously pushed down {{b.code > 50}} is compiled by
{{table.compilePostFilterExpression}} by following line 224 to filter the
result of
the ClientScanPlan. The problem here is that we execute the {{order by bid}}
first and then the postFilter {{b.code > 50}}, it is inefficient. In fact, we
can rewrite the RHS as
{{select bid,code from (select bid,code from merge2 order by code limit 1)
order by bid where code > 50}} to
first filter {{b.code > 50}} and then execute the {{order by bid}} .
{code}
208 protected QueryPlan compileJoinQuery(StatementContext context,
List<Object> binds, JoinTable joinTable, boolean asSubquery, boolean
projectPKColumns, List<OrderByNode> orderBy) throws SQLException {
209 if (joinTable.getJoinSpecs().isEmpty()) {
210 Table table = joinTable.getTable();
211 SelectStatement subquery = table.getAsSubquery(orderBy);
212 if (!table.isSubselect()) {
213 context.setCurrentTable(table.getTableRef());
214 PTable projectedTable =
table.createProjectedTable(!projectPKColumns, context);
215 TupleProjector projector = new
TupleProjector(projectedTable);
216
TupleProjector.serializeProjectorIntoScan(context.getScan(), projector);
217
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable,
context.getConnection(), subquery.getUdfParseNodes()));
218 table.projectColumns(context.getScan());
219 return compileSingleFlatQuery(context, subquery, binds,
asSubquery, !asSubquery, null, projectPKColumns ? projector : null, true);
220 }
221 QueryPlan plan = compileSubquery(subquery, false);
222 PTable projectedTable =
table.createProjectedTable(plan.getProjector());
223
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable,
context.getConnection(), subquery.getUdfParseNodes()));
224 return new TupleProjectionPlan(plan, new
TupleProjector(plan.getProjector()),
table.compilePostFilterExpression(context));
225 }
{code}
was:
Given two tables:
{code}
CREATE TABLE Merge1 (
aid INTEGER PRIMARY KEY,
age INTEGER)
CREATE TABLE Merge2 (
bid INTEGER PRIMARY KEY,
code INTEGER)
{code}
for following sql :
{code}
select /*+ USE_SORT_MERGE_JOIN */ a.aid,b.code from
(select aid,age from merge1 where age >=11 and age<=33 order by age limit 3) a
inner join
(select bid,code from merge2 order by code limit 1) b on a.aid=b.bid where
b.code > 50
{code}
As the RHS of SortMergeJoin, at first the where condition {{b.code > 50}} is
pushed down to RHS as its {{JoinCompiler.Table.postFilters}}, and then {{order
by b.bid}} is appended to it , finally the RHS is rewritten to as
{{select bid,code from (select bid,code from merge2 order by code limit 1)
order by bid}} by following line 211 in {{QueryCompiler.compileJoinQuery}}.
The rewritten sql is then compiled to ClientScanPlan by following line 221
,and previously pushed down {{b.code > 50}} is compiled by
{{table.compilePostFilterExpression}} by following line 224 to filter the
result of
the ClientScanPlan. The problem here is that we execute the {{order by bid}}
first and then the postFilter {{b.code > 50}}, it is inefficient. In fact, we
can rewrite the RHS as
{{select bid,code from (select bid,code from merge2 order by code limit 1)
order by bid where code > 50}} to
first filter {{b.code > 50}} and then execute the {{order by bid}} .
{code}
208 protected QueryPlan compileJoinQuery(StatementContext context,
List<Object> binds, JoinTable joinTable, boolean asSubquery, boolean
projectPKColumns, List<OrderByNode> orderBy) throws SQLException {
209 if (joinTable.getJoinSpecs().isEmpty()) {
210 Table table = joinTable.getTable();
211 SelectStatement subquery = table.getAsSubquery(orderBy);
212 if (!table.isSubselect()) {
213 context.setCurrentTable(table.getTableRef());
214 PTable projectedTable =
table.createProjectedTable(!projectPKColumns, context);
215 TupleProjector projector = new
TupleProjector(projectedTable);
216
TupleProjector.serializeProjectorIntoScan(context.getScan(), projector);
217
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable,
context.getConnection(), subquery.getUdfParseNodes()));
218 table.projectColumns(context.getScan());
219 return compileSingleFlatQuery(context, subquery, binds,
asSubquery, !asSubquery, null, projectPKColumns ? projector : null, true);
220 }
221 QueryPlan plan = compileSubquery(subquery, false);
222 PTable projectedTable =
table.createProjectedTable(plan.getProjector());
223
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable,
context.getConnection(), subquery.getUdfParseNodes()));
224 return new TupleProjectionPlan(plan, new
TupleProjector(plan.getProjector()),
table.compilePostFilterExpression(context));
225 }
{code}
> Push Filter through Sort for SortMergeJoin
> ------------------------------------------
>
> Key: PHOENIX-5105
> URL: https://issues.apache.org/jira/browse/PHOENIX-5105
> Project: Phoenix
> Issue Type: Improvement
> Affects Versions: 4.14.1
> Reporter: chenglei
> Priority: Major
>
> Given two tables:
> {code}
> CREATE TABLE Merge1 (
> aid INTEGER PRIMARY KEY,
> age INTEGER)
>
> CREATE TABLE Merge2 (
> bid INTEGER PRIMARY KEY,
> code INTEGER)
> {code}
> for following sql :
> {code}
> select /*+ USE_SORT_MERGE_JOIN */ a.aid,b.code from
> (select aid,age from merge1 where age >=11 and age<=33 order by age limit 3)
> a inner join
> (select bid,code from merge2 order by code limit 1) b on a.aid=b.bid where
> b.code > 50
> {code}
> For the RHS of SortMergeJoin, at first the where condition {{b.code > 50}} is
> pushed down to RHS as its {{JoinCompiler.Table.postFilters}}, and then
> {{order by b.bid}} is appended to it , finally the RHS is rewritten to as
> {{select bid,code from (select bid,code from merge2 order by code limit 1)
> order by bid}} by following line 211 in {{QueryCompiler.compileJoinQuery}}.
> The rewritten sql is then compiled to ClientScanPlan by following line 221
> ,and previously pushed down {{b.code > 50}} is compiled by
> {{table.compilePostFilterExpression}} by following line 224 to filter the
> result of
> the ClientScanPlan. The problem here is that we execute the {{order by bid}}
> first and then the postFilter {{b.code > 50}}, it is inefficient. In fact, we
> can rewrite the RHS as
> {{select bid,code from (select bid,code from merge2 order by code limit 1)
> order by bid where code > 50}} to
> first filter {{b.code > 50}} and then execute the {{order by bid}} .
> {code}
> 208 protected QueryPlan compileJoinQuery(StatementContext context,
> List<Object> binds, JoinTable joinTable, boolean asSubquery, boolean
> projectPKColumns, List<OrderByNode> orderBy) throws SQLException {
> 209 if (joinTable.getJoinSpecs().isEmpty()) {
> 210 Table table = joinTable.getTable();
> 211 SelectStatement subquery = table.getAsSubquery(orderBy);
> 212 if (!table.isSubselect()) {
> 213 context.setCurrentTable(table.getTableRef());
> 214 PTable projectedTable =
> table.createProjectedTable(!projectPKColumns, context);
> 215 TupleProjector projector = new
> TupleProjector(projectedTable);
> 216
> TupleProjector.serializeProjectorIntoScan(context.getScan(), projector);
> 217
> context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable,
> context.getConnection(), subquery.getUdfParseNodes()));
> 218 table.projectColumns(context.getScan());
> 219 return compileSingleFlatQuery(context, subquery, binds,
> asSubquery, !asSubquery, null, projectPKColumns ? projector : null, true);
> 220 }
> 221 QueryPlan plan = compileSubquery(subquery, false);
> 222 PTable projectedTable =
> table.createProjectedTable(plan.getProjector());
> 223
> context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable,
> context.getConnection(), subquery.getUdfParseNodes()));
> 224 return new TupleProjectionPlan(plan, new
> TupleProjector(plan.getProjector()),
> table.compilePostFilterExpression(context));
> 225 }
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)