Re: [HACKERS] Optimizer questions

Konstantin Knizhnik Tue, 08 Mar 2016 10:44:50 -0800

On 03/08/2016 07:01 AM, Tom Lane wrote:

Konstantin Knizhnik <k.knizh...@postgrespro.ru> writes:

Attached please find improved version of the optimizer patch for LIMIT clause.

This patch isn't anywhere close to working after 3fc6e2d7f5b652b4.
(TBH, the reason I was negative about this upthread is that I had that
one in the oven and knew it would conflict spectacularly.)  I encourage
you to think about how an optimization of this sort could be made to
work in a non-kluge fashion in the new code structure.


I've not spent a lot of time on this, but I think maybe what would make
sense is to consider both the case where function calculations are
postponed to after ORDER BY and the case where they aren't, and generate
Paths for both.  Neither approach is a slam-dunk win.  For example,
suppose that one of the tlist columns is md5(wide_column) --- it will
likely not be preferable to pass the wide column data through the sort
step rather than reducing it to a hash first.  This would require some
work in grouping_planner to track two possible pathtargets, and work in
create_ordered_paths to generate paths for both possibilities.  A possible
objection is that this would add planning work even when no real benefit
is possible; so maybe we should only consider the new way if the tlist has
significant eval cost?  Not sure about that.  There is also something
to be said for the idea that we should try to guarantee consistent
semantics when the tlist contains volatile functions.

For now, I've set this commitfest entry to Waiting on Author.  There's
still time to consider a rewrite in this 'fest, if you can get it done
in a week or two.

                        regards, tom lane


Attached please find rebased patch.
Unfortunately 3fc6e2d7f5b652b4 still doesn't fix the problem with "lazy" 
evaluation of target list.
This is why my patch is still useful. But frankly speaking I am not sure that 
it is best way of fixing this problem,
because it takes in account only one case: sort+limit. May be the same 
optimization can be useful for other queries.


--
Konstantin Knizhnik
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 5fc8e5b..709d1ad 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -126,8 +126,9 @@ static RelOptInfo *create_ordered_paths(PlannerInfo *root,
 					 RelOptInfo *input_rel,
 					 double limit_tuples);
 static PathTarget *make_scanjoin_target(PlannerInfo *root, List *tlist,
-					 AttrNumber **groupColIdx);
+										AttrNumber **groupColIdx, bool* splitted_projection);
 static int	get_grouping_column_index(Query *parse, TargetEntry *tle);
+static int	get_sort_column_index(Query *parse, TargetEntry *tle);
 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
 static List *make_windowInputTargetList(PlannerInfo *root,
@@ -1381,6 +1382,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 	RelOptInfo *current_rel;
 	RelOptInfo *final_rel;
 	ListCell   *lc;
+	bool splitted_projection = false;
 
 	/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
 	if (parse->limitCount || parse->limitOffset)
@@ -1657,7 +1659,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		 * that were obtained within query_planner().
 		 */
 		sub_target = make_scanjoin_target(root, tlist,
-										  &groupColIdx);
+										  &groupColIdx, &splitted_projection);
 
 		/*
 		 * Forcibly apply that tlist to all the Paths for the scan/join rel.
@@ -1801,6 +1803,13 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 	{
 		Path	   *path = (Path *) lfirst(lc);
 
+		if (splitted_projection)
+		{			
+			path = apply_projection_to_path(root, current_rel,
+											path, create_pathtarget(root, tlist));
+		}
+
+
 		/*
 		 * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
 		 * (Note: we intentionally test parse->rowMarks not root->rowMarks
@@ -3775,15 +3784,17 @@ create_ordered_paths(PlannerInfo *root,
 static PathTarget *
 make_scanjoin_target(PlannerInfo *root,
 					 List *tlist,
-					 AttrNumber **groupColIdx)
+					 AttrNumber **groupColIdx,
+	                 bool* splitted_projection)
 {
 	Query	   *parse = root->parse;
-	List	   *sub_tlist;
-	List	   *non_group_cols;
+	List	   *sub_tlist = NIL;
+	List	   *non_group_cols = NIL;
 	List	   *non_group_vars;
 	int			numCols;
 
 	*groupColIdx = NULL;
+	*splitted_projection = false;
 
 	/*
 	 * If we're not grouping or aggregating or windowing, there's nothing to
@@ -3791,14 +3802,66 @@ make_scanjoin_target(PlannerInfo *root,
 	 */
 	if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets &&
 		!root->hasHavingQual && !parse->hasWindowFuncs)
+	{
+		if (parse->sortClause && limit_needed(parse)) {
+			ListCell   *tl;
+			bool contains_non_vars = false;
+			foreach(tl, tlist)
+			{
+				TargetEntry *tle = (TargetEntry *) lfirst(tl);
+				int			colno;
+				
+				colno = get_sort_column_index(parse, tle);
+				if (colno >= 0)
+				{
+					TargetEntry *newtle;
+					
+					newtle = makeTargetEntry(tle->expr,
+											 list_length(sub_tlist) + 1,
+											 NULL,
+											 false);
+					sub_tlist = lappend(sub_tlist, newtle);
+				}
+				else
+				{
+					/*
+					 * Non-sorting column, so just remember the expression for
+					 * later call to pull_var_clause.  There's no need for
+					 * pull_var_clause to examine the TargetEntry node itself.
+					 */
+					non_group_cols = lappend(non_group_cols, tle->expr);
+					contains_non_vars |= !(tle->expr && IsA(tle->expr, Var));
+				}
+			}
+						
+			if (non_group_cols) /* there are some columns not used in order by */
+			{ 
+				non_group_vars = pull_var_clause((Node *) non_group_cols,
+												 PVC_RECURSE_AGGREGATES,
+												 PVC_INCLUDE_PLACEHOLDERS);
+				sub_tlist = add_to_flat_tlist(sub_tlist, non_group_vars);
+				/* clean up cruft */
+				list_free(non_group_vars);
+				list_free(non_group_cols);
+
+				if (contains_non_vars) 
+				{ 
+					/*
+					 * This optimization makes sense only if target list contains some complex expressions, 
+					 * for example functions calls. May be it is better to check cost of this expressions,
+					 * but right now just apply this optimization if there are non-vars columns 
+					 */
+					tlist = sub_tlist;
+					*splitted_projection = true;
+				}
+			}
+		} 
 		return create_pathtarget(root, tlist);
-
+	}
 	/*
 	 * Otherwise, we must build a tlist containing all grouping columns, plus
 	 * any other Vars mentioned in the targetlist and HAVING qual.
 	 */
-	sub_tlist = NIL;
-	non_group_cols = NIL;
 
 	numCols = list_length(parse->groupClause);
 	if (numCols > 0)
@@ -3918,6 +3981,38 @@ get_grouping_column_index(Query *parse, TargetEntry *tle)
 }
 
 /*
+ * get_sort_column_index
+ *		Get the ORDER BY column position, if any, of a targetlist entry.
+ *
+ * Returns the index (counting from 0) of the TLE in the ORDER BY list, or -1
+ * if it's not a sorting column.  Note: the result is unique because the
+ * parser won't make multiple sortClause entries for the same TLE.
+ */
+static int
+get_sort_column_index(Query *parse, TargetEntry *tle)
+{
+	int			colno = 0;
+	Index		ressortgroupref = tle->ressortgroupref;
+	ListCell   *gl;
+
+	/* No need to search groupClause if TLE hasn't got a sortgroupref */
+	if (ressortgroupref == 0)
+		return -1;
+
+	foreach(gl, parse->sortClause)
+	{
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(gl);
+
+		if (sortcl->tleSortGroupRef == ressortgroupref)
+			return colno;
+		colno++;
+	}
+
+	return -1;
+}
+
+
+/*
  * postprocess_setop_tlist
  *	  Fix up targetlist returned by plan_set_operations().
  *

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Optimizer questions

Reply via email to