[ 
https://issues.apache.org/jira/browse/PHOENIX-5105?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

chenglei updated PHOENIX-5105:
------------------------------
    Description: 
Given two tables:
{code}
          CREATE TABLE Merge1 ( 
                    aid INTEGER PRIMARY KEY,
                    age INTEGER)
          

          CREATE TABLE Merge2  ( 
                    bid INTEGER PRIMARY KEY,
                    code INTEGER)
{code}

for following sql :
{code}
select /*+ USE_SORT_MERGE_JOIN */ a.aid,b.code from 
(select aid,age from merge1 where age >=11 and age<=33 order by age limit 3) a 
inner join 
(select bid,code from merge2 order by code limit 1) b on a.aid=b.bid where 
b.code > 50
{code}

For the RHS of SortMergeJoin, at first the where condition {{b.code > 50}} is 
pushed down to RHS as its {{JoinCompiler.Table.postFilters}}, and then {{order 
by b.bid}} is appended to RHS , finally the RHS is rewritten to as 
 {{select bid,code from (select bid,code from merge2 order by code limit 1) 
order by bid}} by following line 211 in {{QueryCompiler.compileJoinQuery}}. 

The rewritten sql  is  then compiled to ClientScanPlan by following line 221 
,and previously pushed down {{b.code > 50}} is compiled by 
{{table.compilePostFilterExpression}} by following line 224 to filter the 
result of 
the ClientScanPlan. The problem here is that we execute the {{order by bid}} 
first and then the postFilter {{b.code > 50}}, it is inefficient. In fact, we 
can rewrite the RHS as 
{{select bid,code from (select bid,code from merge2 order by code limit 1) 
order by bid where code > 50}} to
first filter {{b.code > 50}} and then execute the {{order by bid}} .



{code}
208    protected QueryPlan compileJoinQuery(StatementContext context, 
List<Object> binds, JoinTable joinTable, boolean asSubquery, boolean 
projectPKColumns, List<OrderByNode> orderBy) throws SQLException {
209         if (joinTable.getJoinSpecs().isEmpty()) {
210              Table table = joinTable.getTable();
211               SelectStatement subquery = table.getAsSubquery(orderBy);
212              if (!table.isSubselect()) {
213                  context.setCurrentTable(table.getTableRef());
214                  PTable projectedTable = 
table.createProjectedTable(!projectPKColumns, context);
215                  TupleProjector projector = new 
TupleProjector(projectedTable);
216                  
TupleProjector.serializeProjectorIntoScan(context.getScan(), projector);
217                  
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, 
context.getConnection(), subquery.getUdfParseNodes()));
218                  table.projectColumns(context.getScan());
219                  return compileSingleFlatQuery(context, subquery, binds, 
asSubquery, !asSubquery, null, projectPKColumns ? projector : null, true);
220            }
221            QueryPlan plan = compileSubquery(subquery, false);
222            PTable projectedTable = 
table.createProjectedTable(plan.getProjector());
223            
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, 
context.getConnection(), subquery.getUdfParseNodes()));
224            return new TupleProjectionPlan(plan, new 
TupleProjector(plan.getProjector()), 
table.compilePostFilterExpression(context));
225        }
{code}

  was:
Given two tables:
{code}
          CREATE TABLE Merge1 ( 
                    aid INTEGER PRIMARY KEY,
                    age INTEGER)
          

          CREATE TABLE Merge2  ( 
                    bid INTEGER PRIMARY KEY,
                    code INTEGER)
{code}

for following sql :
{code}
select /*+ USE_SORT_MERGE_JOIN */ a.aid,b.code from 
(select aid,age from merge1 where age >=11 and age<=33 order by age limit 3) a 
inner join 
(select bid,code from merge2 order by code limit 1) b on a.aid=b.bid where 
b.code > 50
{code}

For the RHS of SortMergeJoin, at first the where condition {{b.code > 50}} is 
pushed down to RHS as its {{JoinCompiler.Table.postFilters}}, and then {{order 
by b.bid}} is appended to it , finally the RHS is rewritten to as 
 {{select bid,code from (select bid,code from merge2 order by code limit 1) 
order by bid}} by following line 211 in {{QueryCompiler.compileJoinQuery}}. 

The rewritten sql  is  then compiled to ClientScanPlan by following line 221 
,and previously pushed down {{b.code > 50}} is compiled by 
{{table.compilePostFilterExpression}} by following line 224 to filter the 
result of 
the ClientScanPlan. The problem here is that we execute the {{order by bid}} 
first and then the postFilter {{b.code > 50}}, it is inefficient. In fact, we 
can rewrite the RHS as 
{{select bid,code from (select bid,code from merge2 order by code limit 1) 
order by bid where code > 50}} to
first filter {{b.code > 50}} and then execute the {{order by bid}} .



{code}
208    protected QueryPlan compileJoinQuery(StatementContext context, 
List<Object> binds, JoinTable joinTable, boolean asSubquery, boolean 
projectPKColumns, List<OrderByNode> orderBy) throws SQLException {
209         if (joinTable.getJoinSpecs().isEmpty()) {
210              Table table = joinTable.getTable();
211               SelectStatement subquery = table.getAsSubquery(orderBy);
212              if (!table.isSubselect()) {
213                  context.setCurrentTable(table.getTableRef());
214                  PTable projectedTable = 
table.createProjectedTable(!projectPKColumns, context);
215                  TupleProjector projector = new 
TupleProjector(projectedTable);
216                  
TupleProjector.serializeProjectorIntoScan(context.getScan(), projector);
217                  
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, 
context.getConnection(), subquery.getUdfParseNodes()));
218                  table.projectColumns(context.getScan());
219                  return compileSingleFlatQuery(context, subquery, binds, 
asSubquery, !asSubquery, null, projectPKColumns ? projector : null, true);
220            }
221            QueryPlan plan = compileSubquery(subquery, false);
222            PTable projectedTable = 
table.createProjectedTable(plan.getProjector());
223            
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, 
context.getConnection(), subquery.getUdfParseNodes()));
224            return new TupleProjectionPlan(plan, new 
TupleProjector(plan.getProjector()), 
table.compilePostFilterExpression(context));
225        }
{code}


> Push Filter through Sort for SortMergeJoin
> ------------------------------------------
>
>                 Key: PHOENIX-5105
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-5105
>             Project: Phoenix
>          Issue Type: Improvement
>    Affects Versions: 4.14.1
>            Reporter: chenglei
>            Priority: Major
>
> Given two tables:
> {code}
>           CREATE TABLE Merge1 ( 
>                     aid INTEGER PRIMARY KEY,
>                     age INTEGER)
>           
>           CREATE TABLE Merge2  ( 
>                     bid INTEGER PRIMARY KEY,
>                     code INTEGER)
> {code}
> for following sql :
> {code}
> select /*+ USE_SORT_MERGE_JOIN */ a.aid,b.code from 
> (select aid,age from merge1 where age >=11 and age<=33 order by age limit 3) 
> a inner join 
> (select bid,code from merge2 order by code limit 1) b on a.aid=b.bid where 
> b.code > 50
> {code}
> For the RHS of SortMergeJoin, at first the where condition {{b.code > 50}} is 
> pushed down to RHS as its {{JoinCompiler.Table.postFilters}}, and then 
> {{order by b.bid}} is appended to RHS , finally the RHS is rewritten to as 
>  {{select bid,code from (select bid,code from merge2 order by code limit 1) 
> order by bid}} by following line 211 in {{QueryCompiler.compileJoinQuery}}. 
> The rewritten sql  is  then compiled to ClientScanPlan by following line 221 
> ,and previously pushed down {{b.code > 50}} is compiled by 
> {{table.compilePostFilterExpression}} by following line 224 to filter the 
> result of 
> the ClientScanPlan. The problem here is that we execute the {{order by bid}} 
> first and then the postFilter {{b.code > 50}}, it is inefficient. In fact, we 
> can rewrite the RHS as 
> {{select bid,code from (select bid,code from merge2 order by code limit 1) 
> order by bid where code > 50}} to
> first filter {{b.code > 50}} and then execute the {{order by bid}} .
> {code}
> 208    protected QueryPlan compileJoinQuery(StatementContext context, 
> List<Object> binds, JoinTable joinTable, boolean asSubquery, boolean 
> projectPKColumns, List<OrderByNode> orderBy) throws SQLException {
> 209         if (joinTable.getJoinSpecs().isEmpty()) {
> 210              Table table = joinTable.getTable();
> 211               SelectStatement subquery = table.getAsSubquery(orderBy);
> 212              if (!table.isSubselect()) {
> 213                  context.setCurrentTable(table.getTableRef());
> 214                  PTable projectedTable = 
> table.createProjectedTable(!projectPKColumns, context);
> 215                  TupleProjector projector = new 
> TupleProjector(projectedTable);
> 216                  
> TupleProjector.serializeProjectorIntoScan(context.getScan(), projector);
> 217                  
> context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, 
> context.getConnection(), subquery.getUdfParseNodes()));
> 218                  table.projectColumns(context.getScan());
> 219                  return compileSingleFlatQuery(context, subquery, binds, 
> asSubquery, !asSubquery, null, projectPKColumns ? projector : null, true);
> 220            }
> 221            QueryPlan plan = compileSubquery(subquery, false);
> 222            PTable projectedTable = 
> table.createProjectedTable(plan.getProjector());
> 223            
> context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, 
> context.getConnection(), subquery.getUdfParseNodes()));
> 224            return new TupleProjectionPlan(plan, new 
> TupleProjector(plan.getProjector()), 
> table.compilePostFilterExpression(context));
> 225        }
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to