Attached please find improved version of the optimizer patch for LIMIT clause.
Now I try to apply this optimization only for non-trivial columns requiring
evaluation.
May be it will be better to take in account cost of this columns evaluation but
right now I just detect non-variable columns.
On 01/06/2016 12:03 PM, David Rowley wrote:
On 6 January 2016 at 13:13, Alexander Korotkov <a.korot...@postgrespro.ru
<mailto:a.korot...@postgrespro.ru>> wrote:
On Wed, Jan 6, 2016 at 12:08 AM, Tom Lane <t...@sss.pgh.pa.us
<mailto:t...@sss.pgh.pa.us>> wrote:
konstantin knizhnik <k.knizh...@postgrespro.ru
<mailto:k.knizh...@postgrespro.ru>> writes:
> 1. The cost compared in grouping_planner doesn't take in account
price of get_authorized_users - it is not changed when I am altering function
cost. Is it correct behavior?
The general problem of accounting for tlist eval cost is not handled
very
well now, but especially not with respect to the idea that different
paths
might have different tlist costs. I'm working on an upper-planner
rewrite
which should make this better, or at least make it practical to make it
better.
Hmm... Besides costing it would be nice to postpone calculation of
expensive tlist functions after LIMIT.
I'd agree that it would be more than the costings that would need to be
improved here.
The most simple demonstration of the problem I can think of is, if I apply the
following:
diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index 29d92a7..2ec9822 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -641,6 +641,8 @@ int4pl(PG_FUNCTION_ARGS)
result = arg1 + arg2;
+ elog(NOTICE, "int4pl(%d, %d)", arg1,arg2);
+
/*
* Overflow check. If the inputs are of different signs then their sum
* cannot overflow. If the inputs are of the same sign, their sum had
Then do:
create table a (b int);
insert into a select generate_series(1,10);
select b+b as bb from a order by b limit 1;
NOTICE: int4pl(1, 1)
NOTICE: int4pl(2, 2)
NOTICE: int4pl(3, 3)
NOTICE: int4pl(4, 4)
NOTICE: int4pl(5, 5)
NOTICE: int4pl(6, 6)
NOTICE: int4pl(7, 7)
NOTICE: int4pl(8, 8)
NOTICE: int4pl(9, 9)
NOTICE: int4pl(10, 10)
bb
----
2
(1 row)
We can see that int4pl() is needlessly called 9 times. Although, I think this
does only apply to queries with LIMIT. I agree that it does seem like an
interesting route for optimisation.
It seems worthwhile to investigate how we might go about improving this so that
the evaluation of the target list happens after LIMIT, at least for the columns
which are not required before LIMIT.
Konstantin, are you thinking of looking into this more, with plans to implement
code to improve this?
--
David Rowley http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
--
Konstantin Knizhnik
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 797df31..4cbccac 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -106,6 +106,7 @@ static bool choose_hashed_distinct(PlannerInfo *root,
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
AttrNumber **groupColIdx, bool *need_tlist_eval);
static int get_grouping_column_index(Query *parse, TargetEntry *tle);
+static int get_sort_column_index(Query *parse, TargetEntry *tle);
static void locate_grouping_columns(PlannerInfo *root,
List *tlist,
List *sub_tlist,
@@ -2402,6 +2403,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
parse->limitCount,
offset_est,
count_est);
+ if (parse->sortClause && tlist != result_plan->targetlist)
+ {
+ result_plan = (Plan *) make_result(root,
+ tlist,
+ NULL,
+ result_plan);
+ }
}
/*
@@ -3964,8 +3972,8 @@ make_subplanTargetList(PlannerInfo *root,
bool *need_tlist_eval)
{
Query *parse = root->parse;
- List *sub_tlist;
- List *non_group_cols;
+ List *sub_tlist = NIL;
+ List *non_group_cols = NIL;
List *non_group_vars;
int numCols;
@@ -3978,6 +3986,61 @@ make_subplanTargetList(PlannerInfo *root,
if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets && !root->hasHavingQual &&
!parse->hasWindowFuncs)
{
+ if (parse->sortClause && limit_needed(parse)) {
+ ListCell *tl;
+ bool contains_non_vars = false;
+ *need_tlist_eval = false; /* only eval if not flat tlist */
+ foreach(tl, tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(tl);
+ int colno;
+
+ colno = get_sort_column_index(parse, tle);
+ if (colno >= 0)
+ {
+ TargetEntry *newtle;
+
+ newtle = makeTargetEntry(tle->expr,
+ list_length(sub_tlist) + 1,
+ NULL,
+ false);
+ sub_tlist = lappend(sub_tlist, newtle);
+ if (!(newtle->expr && IsA(newtle->expr, Var)))
+ *need_tlist_eval = true; /* tlist contains non Vars */
+ }
+ else
+ {
+ /*
+ * Non-sorting column, so just remember the expression for
+ * later call to pull_var_clause. There's no need for
+ * pull_var_clause to examine the TargetEntry node itself.
+ */
+ non_group_cols = lappend(non_group_cols, tle->expr);
+ contains_non_vars |= !(tle->expr && IsA(tle->expr, Var));
+ }
+ }
+
+ if (non_group_cols) /* there are some columns not used in order by */
+ {
+ non_group_vars = pull_var_clause((Node *) non_group_cols,
+ PVC_RECURSE_AGGREGATES,
+ PVC_INCLUDE_PLACEHOLDERS);
+ sub_tlist = add_to_flat_tlist(sub_tlist, non_group_vars);
+ /* clean up cruft */
+ list_free(non_group_vars);
+ list_free(non_group_cols);
+
+ if (contains_non_vars )
+ {
+ /*
+ * This optimization makes sense only if target list contains some complex expressions,
+ * for example functions calls. May be it is better to check cost of this expressions,
+ * but right now just apply this optimization if there are non-vars columns
+ */
+ return sub_tlist;
+ }
+ }
+ }
*need_tlist_eval = true;
return tlist;
}
@@ -3986,8 +4049,6 @@ make_subplanTargetList(PlannerInfo *root,
* Otherwise, we must build a tlist containing all grouping columns, plus
* any other Vars mentioned in the targetlist and HAVING qual.
*/
- sub_tlist = NIL;
- non_group_cols = NIL;
*need_tlist_eval = false; /* only eval if not flat tlist */
numCols = list_length(parse->groupClause);
@@ -4110,6 +4171,37 @@ get_grouping_column_index(Query *parse, TargetEntry *tle)
}
/*
+ * get_sort_column_index
+ * Get the ORDER BY column position, if any, of a targetlist entry.
+ *
+ * Returns the index (counting from 0) of the TLE in the ORDER BY list, or -1
+ * if it's not a sorting column. Note: the result is unique because the
+ * parser won't make multiple sortClause entries for the same TLE.
+ */
+static int
+get_sort_column_index(Query *parse, TargetEntry *tle)
+{
+ int colno = 0;
+ Index ressortgroupref = tle->ressortgroupref;
+ ListCell *gl;
+
+ /* No need to search groupClause if TLE hasn't got a sortgroupref */
+ if (ressortgroupref == 0)
+ return -1;
+
+ foreach(gl, parse->sortClause)
+ {
+ SortGroupClause *sortcl = (SortGroupClause *) lfirst(gl);
+
+ if (sortcl->tleSortGroupRef == ressortgroupref)
+ return colno;
+ colno++;
+ }
+
+ return -1;
+}
+
+/*
* locate_grouping_columns
* Locate grouping columns in the tlist chosen by create_plan.
*
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index b72e65d..25e8da1 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1208,12 +1208,13 @@ SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
EXPLAIN (COSTS OFF)
SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
- QUERY PLAN
----------------------------------------------------
- Limit
- -> Index Scan using ggcircleind on gcircle_tbl
- Order By: (f1 <-> '(200,300)'::point)
-(3 rows)
+ QUERY PLAN
+---------------------------------------------------------
+ Result
+ -> Limit
+ -> Index Scan using ggcircleind on gcircle_tbl
+ Order By: (f1 <-> '(200,300)'::point)
+(4 rows)
SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
circle_center | radius
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers