Re: [HACKERS] [PATCH] Push limit to sort through a subquery

Douglas Doole Fri, 18 Aug 2017 08:43:33 -0700

Thanks for the feedback on my original patch Robert. Here's an updated
patch that will tunnel through multiple SubqueryScanStates.


- Doug
Salesforce

On Thu, Aug 17, 2017 at 6:33 PM Robert Haas <robertmh...@gmail.com> wrote:

> On Thu, Aug 17, 2017 at 11:36 AM, Douglas Doole <dougdo...@gmail.com>
> wrote:
>
>> I completely agree. The further a limit can be pushed down, the better.
>>
>> The patch looks good to me.
>>
>
> It seems like a somewhat ad-hoc approach; it supposes that we can take any
> query produced by deparseSelectStmtForRel() and stick a LIMIT clause onto
> the very end and all will be well.  Maybe that's not a problematic
> assumption, not sure.  The grammar happens to allow both FOR UPDATE LIMIT n
> and LIMIT n FOR UPDATE even though only the latter syntax is documented.
>
> Regarding the other patch on this thread, you mentioned upthread that "If
> it is possible to get more than one SubqueryScanState and/or ResultState
> between the limit and sort, then the first block of code could be placed in
> a while loop."  I think that's not possible for a ResultState, but I think
> it *is* possible for a SubqueryScanState.
>
> --
> Robert Haas
> EnterpriseDB: http://www.enterprisedb.com
> The Enterprise PostgreSQL Company
>

*** a/src/backend/executor/nodeLimit.c
--- b/src/backend/executor/nodeLimit.c
***************
*** 308,313 **** recompute_limits(LimitState *node)
--- 308,315 ----
   * since the MergeAppend surely need read no more than that many tuples from
   * any one input.  We also have to be prepared to look through a Result,
   * since the planner might stick one atop MergeAppend for projection purposes.
+  * We can also accept a subquery that has no quals or SRFs (that is, the
+  * subquery is just projecting columns) between the LIMIT and any of the above.
   *
   * This is a bit of a kluge, but we don't have any more-abstract way of
   * communicating between the two nodes; and it doesn't seem worth trying
***************
*** 320,325 **** recompute_limits(LimitState *node)
--- 322,348 ----
  static void
  pass_down_bound(LimitState *node, PlanState *child_node)
  {
+ 	/*
+ 	 * If the child is a subquery that does no filtering (no predicates)
+ 	 * and does not have any SRFs in the target list then we can potentially
+ 	 * push the limit through the subquery. It is possible that we could have
+      * multiple subqueries, so tunnel through them all.
+ 	 */
+ 	while (IsA(child_node, SubqueryScanState))
+ 	{
+ 		SubqueryScanState *subqueryScanState = (SubqueryScanState *) child_node;
+ 
+ 		/*
+ 		 * Non-empty predicates or an SRF means we cannot push down the limit.
+ 		 */
+ 		if (subqueryScanState->ss.ps.qual != NULL ||
+ 			expression_returns_set((Node *) child_node->plan->targetlist))
+ 			return;
+ 
+ 		/* Use the child in the following checks */
+ 		child_node = subqueryScanState->subplan;
+ 	}
+ 
  	if (IsA(child_node, SortState))
  	{
  		SortState  *sortState = (SortState *) child_node;
*** a/src/test/regress/expected/subselect.out
--- b/src/test/regress/expected/subselect.out
***************
*** 1041,1043 **** NOTICE:  x = 9, y = 13
--- 1041,1077 ----
  (3 rows)
  
  drop function tattle(x int, y int);
+ -------------------------------------------------
+ --TEST LIMIT pushdown through subquery scan node
+ -------------------------------------------------
+ create table sq_limit (pk int primary key, c1 int, c2 int);
+ insert into sq_limit values
+     (1, 1, 1),
+     (2, 2, 2),
+     (3, 3, 3),
+     (4, 4, 4),
+     (5, 1, 1),
+     (6, 2, 2),
+     (7, 3, 3),
+     (8, 4, 4);
+ explain (analyze, summary off, timing off, costs off)
+ select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3;
+                            QUERY PLAN                           
+ ----------------------------------------------------------------
+  Limit (actual rows=3 loops=1)
+    ->  Subquery Scan on x (actual rows=3 loops=1)
+          ->  Sort (actual rows=3 loops=1)
+                Sort Key: sq_limit.c1, sq_limit.pk
+                Sort Method: top-N heapsort  Memory: 25kB
+                ->  Seq Scan on sq_limit (actual rows=8 loops=1)
+ (6 rows)
+ 
+ select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3;
+  pk | c2 
+ ----+----
+   1 |  1
+   5 |  1
+   2 |  2
+ (3 rows)
+ 
+ drop table sq_limit;
*** a/src/test/regress/sql/subselect.sql
--- b/src/test/regress/sql/subselect.sql
***************
*** 540,542 **** select * from
--- 540,563 ----
    where tattle(x, u);
  
  drop function tattle(x int, y int);
+ 
+ -------------------------------------------------
+ --TEST LIMIT pushdown through subquery scan node
+ -------------------------------------------------
+ create table sq_limit (pk int primary key, c1 int, c2 int);
+ insert into sq_limit values
+     (1, 1, 1),
+     (2, 2, 2),
+     (3, 3, 3),
+     (4, 4, 4),
+     (5, 1, 1),
+     (6, 2, 2),
+     (7, 3, 3),
+     (8, 4, 4);
+ 
+ explain (analyze, summary off, timing off, costs off)
+ select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3;
+ 
+ select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3;
+ 
+ drop table sq_limit;

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [PATCH] Push limit to sort through a subquery

Reply via email to