Re: [HACKERS] Optimizer questions

Konstantin Knizhnik Fri, 08 Jan 2016 00:59:35 -0800


Attached please find improved version of the optimizer patch for LIMIT clause.
Now I try to apply this optimization only for non-trivial columns requiring 
evaluation.
May be it will be better to take in account cost of this columns evaluation but 
right now I just detect non-variable columns.

On 01/06/2016 12:03 PM, David Rowley wrote:

On 6 January 2016 at 13:13, Alexander Korotkov <a.korot...@postgrespro.ru 
<mailto:a.korot...@postgrespro.ru>> wrote:

    On Wed, Jan 6, 2016 at 12:08 AM, Tom Lane <t...@sss.pgh.pa.us 
<mailto:t...@sss.pgh.pa.us>> wrote:

        konstantin knizhnik <k.knizh...@postgrespro.ru 
<mailto:k.knizh...@postgrespro.ru>> writes:
        > 1. The cost compared in grouping_planner doesn't take in account 
price of get_authorized_users - it is not changed when I am altering function 
cost. Is it correct behavior?

        The general problem of accounting for tlist eval cost is not handled 
very
        well now, but especially not with respect to the idea that different 
paths
        might have different tlist costs.  I'm working on an upper-planner 
rewrite
        which should make this better, or at least make it practical to make it
        better.


    Hmm... Besides costing it would be nice to postpone calculation of 
expensive tlist functions after LIMIT.


I'd agree that it would be more than the costings that would need to be 
improved here.

The most simple demonstration of the problem I can think of is, if I apply the 
following:

diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index 29d92a7..2ec9822 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -641,6 +641,8 @@ int4pl(PG_FUNCTION_ARGS)

        result = arg1 + arg2;

+       elog(NOTICE, "int4pl(%d, %d)", arg1,arg2);
+
        /*
         * Overflow check.  If the inputs are of different signs then their sum
         * cannot overflow.  If the inputs are of the same sign, their sum had

Then do:

create table a (b int);
insert into a select generate_series(1,10);
select b+b as bb from a order by b limit 1;
NOTICE:  int4pl(1, 1)
NOTICE:  int4pl(2, 2)
NOTICE:  int4pl(3, 3)
NOTICE:  int4pl(4, 4)
NOTICE:  int4pl(5, 5)
NOTICE:  int4pl(6, 6)
NOTICE:  int4pl(7, 7)
NOTICE:  int4pl(8, 8)
NOTICE:  int4pl(9, 9)
NOTICE:  int4pl(10, 10)
 bb
----
  2
(1 row)

We can see that int4pl() is needlessly called 9 times. Although, I think this 
does only apply to queries with LIMIT. I agree that it does seem like an 
interesting route for optimisation.

It seems worthwhile to investigate how we might go about improving this so that 
the evaluation of the target list happens after LIMIT, at least for the columns 
which are not required before LIMIT.

Konstantin, are you thinking of looking into this more, with plans to implement 
code to improve this?

--
 David Rowley http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services



--
Konstantin Knizhnik
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 797df31..4cbccac 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -106,6 +106,7 @@ static bool choose_hashed_distinct(PlannerInfo *root,
 static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
 					   AttrNumber **groupColIdx, bool *need_tlist_eval);
 static int	get_grouping_column_index(Query *parse, TargetEntry *tle);
+static int	get_sort_column_index(Query *parse, TargetEntry *tle);
 static void locate_grouping_columns(PlannerInfo *root,
 						List *tlist,
 						List *sub_tlist,
@@ -2402,6 +2403,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 										  parse->limitCount,
 										  offset_est,
 										  count_est);
+		if (parse->sortClause && tlist != result_plan->targetlist)
+		{			
+			result_plan = (Plan *) make_result(root,
+											   tlist,
+											   NULL,
+											   result_plan);			
+		}
 	}
 
 	/*
@@ -3964,8 +3972,8 @@ make_subplanTargetList(PlannerInfo *root,
 					   bool *need_tlist_eval)
 {
 	Query	   *parse = root->parse;
-	List	   *sub_tlist;
-	List	   *non_group_cols;
+	List	   *sub_tlist = NIL;
+	List	   *non_group_cols = NIL;
 	List	   *non_group_vars;
 	int			numCols;
 
@@ -3978,6 +3986,61 @@ make_subplanTargetList(PlannerInfo *root,
 	if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets && !root->hasHavingQual &&
 		!parse->hasWindowFuncs)
 	{
+		if (parse->sortClause && limit_needed(parse)) {
+			ListCell   *tl;
+			bool contains_non_vars = false;
+			*need_tlist_eval = false;	/* only eval if not flat tlist */
+			foreach(tl, tlist)
+			{
+				TargetEntry *tle = (TargetEntry *) lfirst(tl);
+				int			colno;
+				
+				colno = get_sort_column_index(parse, tle);
+				if (colno >= 0)
+				{
+					TargetEntry *newtle;
+					
+					newtle = makeTargetEntry(tle->expr,
+											 list_length(sub_tlist) + 1,
+											 NULL,
+											 false);
+					sub_tlist = lappend(sub_tlist, newtle);
+					if (!(newtle->expr && IsA(newtle->expr, Var)))
+						*need_tlist_eval = true;	/* tlist contains non Vars */
+				}
+				else
+				{
+					/*
+					 * Non-sorting column, so just remember the expression for
+					 * later call to pull_var_clause.  There's no need for
+					 * pull_var_clause to examine the TargetEntry node itself.
+					 */
+					non_group_cols = lappend(non_group_cols, tle->expr);
+					contains_non_vars |= !(tle->expr && IsA(tle->expr, Var));
+				}
+			}
+						
+			if (non_group_cols) /* there are some columns not used in order by */
+			{ 
+				non_group_vars = pull_var_clause((Node *) non_group_cols,
+												 PVC_RECURSE_AGGREGATES,
+												 PVC_INCLUDE_PLACEHOLDERS);
+				sub_tlist = add_to_flat_tlist(sub_tlist, non_group_vars);
+				/* clean up cruft */
+				list_free(non_group_vars);
+				list_free(non_group_cols);
+
+				if (contains_non_vars ) 
+				{ 
+					/*
+					 * This optimization makes sense only if target list contains some complex expressions, 
+					 * for example functions calls. May be it is better to check cost of this expressions,
+					 * but right now just apply this optimization if there are non-vars columns 
+					 */
+					return sub_tlist;
+				}
+			}
+		} 
 		*need_tlist_eval = true;
 		return tlist;
 	}
@@ -3986,8 +4049,6 @@ make_subplanTargetList(PlannerInfo *root,
 	 * Otherwise, we must build a tlist containing all grouping columns, plus
 	 * any other Vars mentioned in the targetlist and HAVING qual.
 	 */
-	sub_tlist = NIL;
-	non_group_cols = NIL;
 	*need_tlist_eval = false;	/* only eval if not flat tlist */
 
 	numCols = list_length(parse->groupClause);
@@ -4110,6 +4171,37 @@ get_grouping_column_index(Query *parse, TargetEntry *tle)
 }
 
 /*
+ * get_sort_column_index
+ *		Get the ORDER BY column position, if any, of a targetlist entry.
+ *
+ * Returns the index (counting from 0) of the TLE in the ORDER BY list, or -1
+ * if it's not a sorting column.  Note: the result is unique because the
+ * parser won't make multiple sortClause entries for the same TLE.
+ */
+static int
+get_sort_column_index(Query *parse, TargetEntry *tle)
+{
+	int			colno = 0;
+	Index		ressortgroupref = tle->ressortgroupref;
+	ListCell   *gl;
+
+	/* No need to search groupClause if TLE hasn't got a sortgroupref */
+	if (ressortgroupref == 0)
+		return -1;
+
+	foreach(gl, parse->sortClause)
+	{
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(gl);
+
+		if (sortcl->tleSortGroupRef == ressortgroupref)
+			return colno;
+		colno++;
+	}
+
+	return -1;
+}
+
+/*
  * locate_grouping_columns
  *		Locate grouping columns in the tlist chosen by create_plan.
  *
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index b72e65d..25e8da1 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1208,12 +1208,13 @@ SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
 
 EXPLAIN (COSTS OFF)
 SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
-                    QUERY PLAN                     
----------------------------------------------------
- Limit
-   ->  Index Scan using ggcircleind on gcircle_tbl
-         Order By: (f1 <-> '(200,300)'::point)
-(3 rows)
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Result
+   ->  Limit
+         ->  Index Scan using ggcircleind on gcircle_tbl
+               Order By: (f1 <-> '(200,300)'::point)
+(4 rows)
 
 SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
  circle_center  | radius

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Optimizer questions

Reply via email to