Consider the following schema:
create table foo_archive (a int, b timestamp);
create index foo_archive_idx on foo_archive(b);
CREATE TABLE foo_archive_2007_01_01 (CONSTRAINT
foo_archive_2007_01_01_b_check CHECK (((b >= '2007-01-01'::date) AND (b <
'2007-01-02'::date)))) INHERITS (foo_archive);
CREATE INDEX foo_archive_2007_01_01_idx ON foo_archive_2007_01_01 USING
btree (b);
CREATE TABLE foo_archive_2007_01_02 (CONSTRAINT
foo_archive_2007_01_02_b_check CHECK (((b >= '2007-01-02'::date) AND (b <
'2007-01-03'::date)))) INHERITS (foo_archive);
CREATE INDEX foo_archive_2007_01_02_idx ON foo_archive_2007_01_02 USING
btree (b);
...
Currently the optimizer yields the following plan:
postgres=# explain select max(b) from foo_archive;
QUERY
PLAN
-----------------------------------------------------------------------------------------------------
Aggregate (cost=18602.00..18602.01 rows=1 width=8)
-> Append (cost=0.00..16005.00 rows=1038800 width=8)
-> Seq Scan on foo_archive (cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_01 foo_archive
(cost=0.00..1331.99 rows=86399 width=8)
-> Seq Scan on foo_archive_2007_01_02 foo_archive
(cost=0.00..1332.00 rows=86400 width=8)
-> Seq Scan on foo_archive_2007_01_03 foo_archive
(cost=0.00..1332.00 rows=86400 width=8)
-> Seq Scan on foo_archive_2007_01_04 foo_archive
(cost=0.00..1332.00 rows=86400 width=8)
-> Seq Scan on foo_archive_2007_01_05 foo_archive
(cost=0.00..1332.00 rows=86400 width=8)
-> Seq Scan on foo_archive_2007_01_06 foo_archive
(cost=0.00..1332.00 rows=86400 width=8)
-> Seq Scan on foo_archive_2007_01_07 foo_archive
(cost=0.00..1332.00 rows=86400 width=8)
-> Seq Scan on foo_archive_2007_01_08 foo_archive
(cost=0.00..1332.00 rows=86400 width=8)
-> Seq Scan on foo_archive_2007_01_09 foo_archive
(cost=0.00..1332.00 rows=86400 width=8)
-> Seq Scan on foo_archive_2007_01_10 foo_archive
(cost=0.00..1332.00 rows=86400 width=8)
-> Seq Scan on foo_archive_2007_01_11 foo_archive
(cost=0.00..1332.00 rows=86400 width=8)
-> Seq Scan on foo_archive_2007_01_12 foo_archive
(cost=0.00..765.01 rows=49601 width=8)
-> Seq Scan on foo_archive_2007_01_13 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_14 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_15 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_16 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_17 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_18 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_19 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_20 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_21 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_22 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_23 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_24 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_25 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_26 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_27 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_28 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_29 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_30 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
-> Seq Scan on foo_archive_2007_01_31 foo_archive
(cost=0.00..29.40 rows=1940 width=8)
(34 rows)
As we can see, the optimizer does not take advantage of the indexes on
column b in the children relations.
Attached is a patch that will take advantage of the indexes (when they're
available and if the index path is cheaper) and yield the following plan.
postgres=# explain select max(b) from foo_archive;
QUERY
PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------
Aggregate (cost=1.54..1.55 rows=1 width=0)
InitPlan 1 (returns $0)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_idx on foo_archive
(cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 2 (returns $1)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_01_idx on
foo_archive_2007_01_01 foo_archive (cost=0.00..2719.24 rows=86399 width=8)
Filter: (b IS NOT NULL)
InitPlan 3 (returns $2)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_02_idx on
foo_archive_2007_01_02 foo_archive (cost=0.00..2719.26 rows=86400 width=8)
Filter: (b IS NOT NULL)
InitPlan 4 (returns $3)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_03_idx on
foo_archive_2007_01_03 foo_archive (cost=0.00..2719.26 rows=86400 width=8)
Filter: (b IS NOT NULL)
InitPlan 5 (returns $4)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_04_idx on
foo_archive_2007_01_04 foo_archive (cost=0.00..2719.26 rows=86400 width=8)
Filter: (b IS NOT NULL)
InitPlan 6 (returns $5)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_05_idx on
foo_archive_2007_01_05 foo_archive (cost=0.00..2719.26 rows=86400 width=8)
Filter: (b IS NOT NULL)
InitPlan 7 (returns $6)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_06_idx on
foo_archive_2007_01_06 foo_archive (cost=0.00..2719.26 rows=86400 width=8)
Filter: (b IS NOT NULL)
InitPlan 8 (returns $7)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_07_idx on
foo_archive_2007_01_07 foo_archive (cost=0.00..2719.26 rows=86400 width=8)
Filter: (b IS NOT NULL)
InitPlan 9 (returns $8)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_08_idx on
foo_archive_2007_01_08 foo_archive (cost=0.00..2719.26 rows=86400 width=8)
Filter: (b IS NOT NULL)
InitPlan 10 (returns $9)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_09_idx on
foo_archive_2007_01_09 foo_archive (cost=0.00..2719.26 rows=86400 width=8)
Filter: (b IS NOT NULL)
InitPlan 11 (returns $10)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_10_idx on
foo_archive_2007_01_10 foo_archive (cost=0.00..2719.26 rows=86400 width=8)
Filter: (b IS NOT NULL)
InitPlan 12 (returns $11)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_11_idx on
foo_archive_2007_01_11 foo_archive (cost=0.00..2719.26 rows=86400 width=8)
Filter: (b IS NOT NULL)
InitPlan 13 (returns $12)
-> Limit (cost=0.00..0.03 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_12_idx on
foo_archive_2007_01_12 foo_archive (cost=0.00..1568.27 rows=49601 width=8)
Filter: (b IS NOT NULL)
InitPlan 14 (returns $13)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_13_idx on
foo_archive_2007_01_13 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 15 (returns $14)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_14_idx on
foo_archive_2007_01_14 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 16 (returns $15)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_15_idx on
foo_archive_2007_01_15 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 17 (returns $16)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_16_idx on
foo_archive_2007_01_16 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 18 (returns $17)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_17_idx on
foo_archive_2007_01_17 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 19 (returns $18)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_18_idx on
foo_archive_2007_01_18 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 20 (returns $19)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_19_idx on
foo_archive_2007_01_19 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 21 (returns $20)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_20_idx on
foo_archive_2007_01_20 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 22 (returns $21)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_21_idx on
foo_archive_2007_01_21 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 23 (returns $22)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_22_idx on
foo_archive_2007_01_22 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 24 (returns $23)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_23_idx on
foo_archive_2007_01_23 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 25 (returns $24)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_24_idx on
foo_archive_2007_01_24 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 26 (returns $25)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_25_idx on
foo_archive_2007_01_25 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 27 (returns $26)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_26_idx on
foo_archive_2007_01_26 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 28 (returns $27)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_27_idx on
foo_archive_2007_01_27 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 29 (returns $28)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_28_idx on
foo_archive_2007_01_28 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 30 (returns $29)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_29_idx on
foo_archive_2007_01_29 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 31 (returns $30)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_30_idx on
foo_archive_2007_01_30 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
InitPlan 32 (returns $31)
-> Limit (cost=0.00..0.04 rows=1 width=8)
-> Index Scan Backward using foo_archive_2007_01_31_idx on
foo_archive_2007_01_31 foo_archive (cost=0.00..73.35 rows=1940 width=8)
Filter: (b IS NOT NULL)
-> Append (cost=0.00..0.32 rows=32 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
-> Result (cost=0.00..0.01 rows=1 width=0)
(162 rows)
Does this patch make sense for 8.5 to optimize for this particular class of
queries? Specifically queries of the form:
SELECT MAX(col) FROM tab WHERE ...;
SELECT MIN(col) FROM tab WHERE ...;
Thanks, Alan
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 762dfb1..5f88466 100644
*** a/src/backend/optimizer/plan/createplan.c
--- b/src/backend/optimizer/plan/createplan.c
***************
*** 27,32 ****
--- 27,33 ----
#include "optimizer/plancat.h"
#include "optimizer/planmain.h"
#include "optimizer/predtest.h"
+ #include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
#include "optimizer/var.h"
*************** create_append_plan(PlannerInfo *root, Ap
*** 546,551 ****
--- 547,553 ----
List *tlist = build_relation_tlist(best_path->path.parent);
List *subplans = NIL;
ListCell *subpaths;
+ bool can_optimize_minmax;
/*
* It is possible for the subplans list to contain only one entry, or even
*************** create_append_plan(PlannerInfo *root, Ap
*** 566,577 ****
NULL);
}
/* Normal case with multiple subpaths */
foreach(subpaths, best_path->subpaths)
{
Path *subpath = (Path *) lfirst(subpaths);
! subplans = lappend(subplans, create_plan(root, subpath));
}
plan = make_append(subplans, false, tlist);
--- 568,618 ----
NULL);
}
+ can_optimize_minmax = can_optimize_append_minmax(root);
+
/* Normal case with multiple subpaths */
foreach(subpaths, best_path->subpaths)
{
Path *subpath = (Path *) lfirst(subpaths);
! RelOptInfo *parentrel = best_path->path.parent;
! RelOptInfo *childrel = subpath->parent;
! AppendRelInfo *appinfo = NULL;
! ListCell *lc;
! Plan *subplan = NULL;
!
! if (!can_optimize_minmax)
! {
! subplans = lappend(subplans, create_plan(root, subpath));
! continue;
! }
!
! /* only try to optimize children rel's */
! foreach (lc, root->append_rel_list)
! {
! AppendRelInfo *a = (AppendRelInfo *) lfirst(lc);
!
! if (a->child_relid == childrel->relid &&
! a->parent_relid == parentrel->relid)
! {
! appinfo = a;
! break;
! }
! }
!
! if (appinfo)
! {
! List *targetlist =
! (List *) adjust_appendrel_attrs((Node *) root->parse->targetList,
! appinfo);
! subplan = do_optimize_minmax_aggregates(root, targetlist, childrel,
! subpath);
! }
!
! if (subplan)
! subplans = lappend(subplans, subplan);
! else
! subplans = lappend(subplans, create_plan(root, subpath));
}
plan = make_append(subplans, false, tlist);
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index 34e9617..959cc2c 100644
*** a/src/backend/optimizer/plan/planagg.c
--- b/src/backend/optimizer/plan/planagg.c
*************** static void make_agg_subplan(PlannerInfo
*** 53,59 ****
static Node *replace_aggs_with_params_mutator(Node *node, List **context);
static Oid fetch_agg_sort_op(Oid aggfnoid);
-
/*
* optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
*
--- 53,58 ----
*************** optimize_minmax_aggregates(PlannerInfo *
*** 77,89 ****
RangeTblRef *rtr;
RangeTblEntry *rte;
RelOptInfo *rel;
- List *aggs_list;
- ListCell *l;
- Cost total_cost;
- Path agg_p;
- Plan *plan;
- Node *hqual;
- QualCost tlist_cost;
/* Nothing to do if query has no aggregates */
if (!parse->hasAggs)
--- 76,81 ----
*************** optimize_minmax_aggregates(PlannerInfo *
*** 124,129 ****
--- 116,137 ----
return NULL;
rel = find_base_rel(root, rtr->rtindex);
+ return do_optimize_minmax_aggregates(root, tlist, rel, best_path);
+ }
+
+ Plan *
+ do_optimize_minmax_aggregates(PlannerInfo *root, List *tlist, RelOptInfo *rel,
+ Path *best_path)
+ {
+ Query *parse = root->parse;
+ List *aggs_list;
+ ListCell *l;
+ Cost total_cost;
+ Path agg_p;
+ Plan *plan;
+ Node *hqual;
+ QualCost tlist_cost;
+
/*
* Since this optimization is not applicable all that often, we want to
* fall out before doing very much work if possible. Therefore we do the
*************** optimize_minmax_aggregates(PlannerInfo *
*** 214,219 ****
--- 222,273 ----
}
/*
+ * can_optimize_append_minmax - check whether optimizing MIN/MAX via indexes
+ * can potentially be pushed down to child relations.
+ *
+ * This checks whether the query is of the form:
+ * 1. SELECT MAX(col) FROM tab WHERE ...;
+ * 2. SELECT MIN(col) FROM tab WHERE ...;
+ */
+ bool
+ can_optimize_append_minmax(PlannerInfo *root)
+ {
+ Query *parse = root->parse;
+ List *tlist = parse->targetList;
+ List *aggs_list;
+
+ /* Nothing to do if query has no aggregates */
+ if (!parse->hasAggs)
+ return false;
+
+ Assert(!parse->setOperations); /* shouldn't get here if a setop */
+ Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
+
+ /*
+ * Reject unoptimizable cases.
+ *
+ * We don't handle GROUP BY or windowing, because our current
+ * implementations of grouping require looking at all the rows anyway, and
+ * so there's not much point in optimizing MIN/MAX.
+ */
+ if (parse->groupClause || parse->hasWindowFuncs)
+ return false;
+
+ /* We're only supporting a single MIN/MAX in the targetlist. */
+ if (list_length(tlist) > 1 || parse->havingQual)
+ return false;
+
+ aggs_list = NIL;
+ if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
+ return false;
+
+ if (list_length(aggs_list) > 1)
+ return false;
+
+ return true;
+ }
+
+ /*
* find_minmax_aggs_walker
* Recursively scan the Aggref nodes in an expression tree, and check
* that each one is a MIN/MAX aggregate. If so, build a list of the
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 0dd2bcc..939c1ef 100644
*** a/src/include/optimizer/planmain.h
--- b/src/include/optimizer/planmain.h
*************** extern void query_planner(PlannerInfo *r
*** 34,39 ****
--- 34,42 ----
*/
extern Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist,
Path *best_path);
+ extern Plan *do_optimize_minmax_aggregates(PlannerInfo *root, List *tlist,
+ RelOptInfo *rel, Path *best_path);
+ extern bool can_optimize_append_minmax(PlannerInfo *root);
/*
* prototypes for plan/createplan.c
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers