Find attached cleaned up patch.
For now, I updated the regress/expected/, but I think the test maybe has to be
updated to do what it was written to do.
>From 36f547d69b8fee25869d6ef3ef26d327a8ba1205 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <[email protected]>
Date: Tue, 1 Jan 2019 16:17:28 -0600
Subject: [PATCH v1] Use correlation statistic in costing bitmap scans..
Same as for an index scan, a correlated bitmap which accesses pages across a
small portion of the table should have a cost estimate much less than an
uncorrelated scan (like modulus) across the entire length of the table, the
latter having a high component of random access.
Note, Tom points out that there are cases where a column could be
tightly-"clumped" without being highly-ordered. Since we have correlation
already, we use that until such time as someone implements a new statistic for
clumpiness. This patch only intends to make costing of bitmap heap scan on par
with the same cost of index scan without bitmap.
---
contrib/postgres_fdw/expected/postgres_fdw.out | 15 ++--
src/backend/optimizer/path/costsize.c | 94 +++++++++++++++++++++-----
src/backend/optimizer/path/indxpath.c | 10 ++-
src/include/nodes/pathnodes.h | 3 +
src/include/optimizer/cost.h | 2 +-
src/test/regress/expected/create_index.out | 16 ++---
src/test/regress/expected/join.out | 18 +++--
src/test/regress/sql/create_index.sql | 8 ++-
8 files changed, 118 insertions(+), 48 deletions(-)
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index c915885..fb4c7f2 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2257,11 +2257,12 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f
-> Foreign Scan on public.ft1
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) FOR UPDATE
- -> Materialize
+ -> Sort
Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*
+ Sort Key: ft2.c1
-> Foreign Scan on public.ft2
Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*
- Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) ORDER BY "C 1" ASC NULLS LAST FOR UPDATE
+ Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) FOR UPDATE
-> Sort
Output: ft4.c1, ft4.c2, ft4.c3, ft4.*
Sort Key: ft4.c1
@@ -2276,7 +2277,7 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4" FOR UPDATE
-> Index Scan using local_tbl_pkey on public.local_tbl
Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid
-(47 rows)
+(48 rows)
SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = ft4.c1
AND ft1.c2 = ft5.c1 AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft2.c1 < 100 FOR UPDATE;
@@ -3318,10 +3319,12 @@ select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum fr
Sort Key: t1.c2
-> Nested Loop
Output: t1.c2, qry.sum
- -> Index Scan using t1_pkey on "S 1"."T 1" t1
+ -> Bitmap Heap Scan on "S 1"."T 1" t1
Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
- Index Cond: (t1."C 1" < 100)
+ Recheck Cond: (t1."C 1" < 100)
Filter: (t1.c2 < 3)
+ -> Bitmap Index Scan on t1_pkey
+ Index Cond: (t1."C 1" < 100)
-> Subquery Scan on qry
Output: qry.sum, t2.c1
Filter: ((t1.c2 * 2) = qry.sum)
@@ -3329,7 +3332,7 @@ select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum fr
Output: (sum((t2.c1 + t1."C 1"))), t2.c1
Relations: Aggregate on (public.ft2 t2)
Remote SQL: SELECT sum(("C 1" + $1::integer)), "C 1" FROM "S 1"."T 1" GROUP BY 2
-(16 rows)
+(18 rows)
select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum from ft2 t2 group by t2.c1) qry where t1.c2 * 2 = qry.sum and t1.c2 < 3 and t1."C 1" < 100 order by 1;
c2 | sum
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b5a0033..d39e12d 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -549,11 +549,13 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
/*
* Save amcostestimate's results for possible use in bitmap scan planning.
- * We don't bother to save indexStartupCost or indexCorrelation, because a
- * bitmap scan doesn't care about either.
+ * We don't bother to save indexStartupCost, because a bitmap scan
+ * can't start until the index scan completes, so only cares about its
+ * total cost.
*/
path->indextotalcost = indexTotalCost;
path->indexselectivity = indexSelectivity;
+ path->indexCorrelation = indexCorrelation;
/* all costs for touching index itself included here */
startup_cost += indexStartupCost;
@@ -986,12 +988,31 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
* appropriate to charge spc_seq_page_cost apiece. The effect is
* nonlinear, too. For lack of a better idea, interpolate like this to
* determine the cost per page.
+ * Note this works at PAGE granularity, so even if we read 1% of a
+ * table's tuples, if we have to read nearly every page, it should be
+ * considered sequential.
*/
- if (pages_fetched >= 2.0)
+ if (pages_fetched >= 2.0) {
+ double correlation = ((IndexPath *)bitmapqual)->indexCorrelation;
+ double cost_per_page_corr;
+ /*
+ * Interpolate based on pages_fetched and correlation from seq_page_cost to rand_page_cost.
+ * A highly correlated bitmap scan 1) likely reads fewer pages (handled in
+ * compute_bitmap_pages); and, 2) at higher "density" (more sequential).
+ */
cost_per_page = spc_random_page_cost -
(spc_random_page_cost - spc_seq_page_cost)
* sqrt(pages_fetched / T);
- else
+ cost_per_page_corr = spc_random_page_cost -
+ (spc_random_page_cost - spc_seq_page_cost)
+ * (1-correlation*correlation);
+
+ /*
+ * We expect sequential reads and low cost_per_page when *either*
+ * T is high or correlation is high.
+ */
+ cost_per_page = Min(cost_per_page,cost_per_page_corr);
+ } else
cost_per_page = spc_random_page_cost;
run_cost += pages_fetched * cost_per_page;
@@ -1035,15 +1056,18 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
/*
* cost_bitmap_tree_node
- * Extract cost and selectivity from a bitmap tree node (index/and/or)
+ * Extract cost, selectivity, and correlation from a bitmap tree node (index/and/or)
*/
void
-cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
+cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec, double *correlation)
{
if (IsA(path, IndexPath))
{
*cost = ((IndexPath *) path)->indextotalcost;
- *selec = ((IndexPath *) path)->indexselectivity;
+ if (selec)
+ *selec = ((IndexPath *) path)->indexselectivity;
+ if (correlation)
+ *correlation = ((IndexPath *) path)->indexCorrelation;
/*
* Charge a small amount per retrieved tuple to reflect the costs of
@@ -1056,12 +1080,18 @@ cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
else if (IsA(path, BitmapAndPath))
{
*cost = path->total_cost;
- *selec = ((BitmapAndPath *) path)->bitmapselectivity;
+ if (selec)
+ *selec = ((BitmapAndPath *) path)->bitmapselectivity;
+ if (correlation)
+ *correlation = ((BitmapAndPath *) path)->bitmapcorrelation;
}
else if (IsA(path, BitmapOrPath))
{
*cost = path->total_cost;
- *selec = ((BitmapOrPath *) path)->bitmapselectivity;
+ if (selec)
+ *selec = ((BitmapOrPath *) path)->bitmapselectivity;
+ if (correlation)
+ *correlation = ((BitmapOrPath *) path)->bitmapcorrelation;
}
else
{
@@ -1084,8 +1114,9 @@ void
cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root)
{
Cost totalCost;
- Selectivity selec;
+ Selectivity selec, minsubselec;
ListCell *l;
+ double correlation;
/*
* We estimate AND selectivity on the assumption that the inputs are
@@ -1097,22 +1128,31 @@ cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root)
* definitely too simplistic?
*/
totalCost = 0.0;
- selec = 1.0;
+ minsubselec = selec = 1.0;
+ correlation = 0;
foreach(l, path->bitmapquals)
{
Path *subpath = (Path *) lfirst(l);
Cost subCost;
Selectivity subselec;
+ double subcorrelation;
- cost_bitmap_tree_node(subpath, &subCost, &subselec);
+ cost_bitmap_tree_node(subpath, &subCost, &subselec, &subcorrelation);
selec *= subselec;
+ /* For an AND node, use the correlation of its most-selective subpath */
+ if (subselec <= minsubselec) {
+ correlation = subcorrelation;
+ minsubselec = subselec;
+ }
+
totalCost += subCost;
if (l != list_head(path->bitmapquals))
totalCost += 100.0 * cpu_operator_cost;
}
path->bitmapselectivity = selec;
+ path->bitmapcorrelation = correlation;
path->path.rows = 0; /* per above, not used */
path->path.startup_cost = totalCost;
path->path.total_cost = totalCost;
@@ -1128,8 +1168,9 @@ void
cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root)
{
Cost totalCost;
- Selectivity selec;
+ Selectivity selec, maxsubselec;
ListCell *l;
+ double correlation;
/*
* We estimate OR selectivity on the assumption that the inputs are
@@ -1142,23 +1183,32 @@ cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root)
* optimized out when the inputs are BitmapIndexScans.
*/
totalCost = 0.0;
- selec = 0.0;
+ maxsubselec = selec = 0.0;
+ correlation = 0;
foreach(l, path->bitmapquals)
{
Path *subpath = (Path *) lfirst(l);
Cost subCost;
Selectivity subselec;
+ double subcorrelation;
- cost_bitmap_tree_node(subpath, &subCost, &subselec);
+ cost_bitmap_tree_node(subpath, &subCost, &subselec, &subcorrelation);
selec += subselec;
+ /* For an OR node, use the correlation of its least-selective subpath */
+ if (subselec >= maxsubselec) {
+ correlation = subcorrelation;
+ maxsubselec = subselec;
+ }
+
totalCost += subCost;
if (l != list_head(path->bitmapquals) &&
!IsA(subpath, IndexPath))
totalCost += 100.0 * cpu_operator_cost;
}
path->bitmapselectivity = Min(selec, 1.0);
+ path->bitmapcorrelation = correlation;
path->path.rows = 0; /* per above, not used */
path->path.startup_cost = totalCost;
path->path.total_cost = totalCost;
@@ -5510,8 +5560,11 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
{
Cost indexTotalCost;
Selectivity indexSelectivity;
+ double indexCorrelation;
double T;
- double pages_fetched;
+ double pages_fetched,
+ pages_fetchedMIN,
+ pages_fetchedMAX;
double tuples_fetched;
double heap_pages;
long maxentries;
@@ -5520,7 +5573,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
* Fetch total cost of obtaining the bitmap, as well as its total
* selectivity.
*/
- cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
+ cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity, &indexCorrelation);
/*
* Estimate number of main-table pages fetched.
@@ -5534,7 +5587,12 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
* the same as the Mackert and Lohman formula for the case T <= b (ie, no
* re-reads needed).
*/
- pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+ pages_fetchedMAX = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+
+ /* pages_fetchedMIN is for the perfectly correlated case (csquared=1) */
+ pages_fetchedMIN = ceil(indexSelectivity * (double) baserel->pages);
+
+ pages_fetched = pages_fetchedMAX + indexCorrelation*indexCorrelation*(pages_fetchedMIN - pages_fetchedMAX);
/*
* Calculate the number of pages fetched from the heap. Then based on
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index f42926e..5d111fc 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -1483,11 +1483,9 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths)
/* duplicate clauseids, keep the cheaper one */
Cost ncost;
Cost ocost;
- Selectivity nselec;
- Selectivity oselec;
- cost_bitmap_tree_node(pathinfo->path, &ncost, &nselec);
- cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, &oselec);
+ cost_bitmap_tree_node(pathinfo->path, &ncost, NULL, NULL);
+ cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, NULL, NULL);
if (ncost < ocost)
pathinfoarray[i] = pathinfo;
}
@@ -1599,8 +1597,8 @@ path_usage_comparator(const void *a, const void *b)
Selectivity aselec;
Selectivity bselec;
- cost_bitmap_tree_node(pa->path, &acost, &aselec);
- cost_bitmap_tree_node(pb->path, &bcost, &bselec);
+ cost_bitmap_tree_node(pa->path, &acost, &aselec, NULL);
+ cost_bitmap_tree_node(pb->path, &bcost, &bselec, NULL);
/*
* If costs are the same, sort by selectivity.
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 3d3be19..19ddf5b 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1183,6 +1183,7 @@ typedef struct IndexPath
ScanDirection indexscandir;
Cost indextotalcost;
Selectivity indexselectivity;
+ double indexCorrelation;
} IndexPath;
/*
@@ -1263,6 +1264,7 @@ typedef struct BitmapAndPath
Path path;
List *bitmapquals; /* IndexPaths and BitmapOrPaths */
Selectivity bitmapselectivity;
+ double bitmapcorrelation;
} BitmapAndPath;
/*
@@ -1276,6 +1278,7 @@ typedef struct BitmapOrPath
Path path;
List *bitmapquals; /* IndexPaths and BitmapAndPaths */
Selectivity bitmapselectivity;
+ double bitmapcorrelation;
} BitmapOrPath;
/*
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index cb012ba..7d5edb5 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -79,7 +79,7 @@ extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *bas
Path *bitmapqual, double loop_count);
extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root);
extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root);
-extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec);
+extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec, double *correlation);
extern void cost_tidscan(Path *path, PlannerInfo *root,
RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info);
extern void cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 6446907..5bd8aae 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1770,24 +1770,23 @@ DROP TABLE onek_with_null;
--
-- Check bitmap index path planning
--
+SET cpu_operator_cost=0;
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
- WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
- QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42);
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------
Bitmap Heap Scan on tenk1
- Recheck Cond: (((thousand = 42) AND (tenthous = 1)) OR ((thousand = 42) AND (tenthous = 3)) OR ((thousand = 42) AND (tenthous = 42)))
+ Recheck Cond: (((thousand = 42) AND (tenthous = 1)) OR ((thousand = 42) AND (tenthous = 42)))
-> BitmapOr
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 42) AND (tenthous = 1))
-> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = 3))
- -> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: ((thousand = 42) AND (tenthous = 42))
-(9 rows)
+(7 rows)
SELECT * FROM tenk1
- WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42);
unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4
---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
42 | 5530 | 0 | 2 | 2 | 2 | 42 | 42 | 42 | 42 | 42 | 84 | 85 | QBAAAA | SEIAAA | OOOOxx
@@ -1818,6 +1817,7 @@ SELECT count(*) FROM tenk1
10
(1 row)
+RESET cpu_operator_cost;
--
-- Check behavior with duplicate index column contents
--
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 761376b..bc011ee 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3584,24 +3584,28 @@ select b.unique1 from
join int4_tbl i1 on b.thousand = f1
right join int4_tbl i2 on i2.f1 = b.tenthous
order by 1;
- QUERY PLAN
------------------------------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------
Sort
Sort Key: b.unique1
- -> Nested Loop Left Join
- -> Seq Scan on int4_tbl i2
+ -> Hash Right Join
+ Hash Cond: (b.tenthous = i2.f1)
-> Nested Loop Left Join
Join Filter: (b.unique1 = 42)
-> Nested Loop
-> Nested Loop
-> Seq Scan on int4_tbl i1
- -> Index Scan using tenk1_thous_tenthous on tenk1 b
- Index Cond: ((thousand = i1.f1) AND (tenthous = i2.f1))
+ -> Bitmap Heap Scan on tenk1 b
+ Recheck Cond: (thousand = i1.f1)
+ -> Bitmap Index Scan on tenk1_thous_tenthous
+ Index Cond: (thousand = i1.f1)
-> Index Scan using tenk1_unique1 on tenk1 a
Index Cond: (unique1 = b.unique2)
-> Index Only Scan using tenk1_thous_tenthous on tenk1 c
Index Cond: (thousand = a.thousand)
-(15 rows)
+ -> Hash
+ -> Seq Scan on int4_tbl i2
+(19 rows)
select b.unique1 from
tenk1 a join tenk1 b on a.unique1 = b.unique2
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 3c0c1cd..6607a7b 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -666,11 +666,13 @@ DROP TABLE onek_with_null;
-- Check bitmap index path planning
--
+SET cpu_operator_cost=0;
+
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
- WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42);
SELECT * FROM tenk1
- WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
+ WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42);
EXPLAIN (COSTS OFF)
SELECT count(*) FROM tenk1
@@ -678,6 +680,8 @@ SELECT count(*) FROM tenk1
SELECT count(*) FROM tenk1
WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
+RESET cpu_operator_cost;
+
--
-- Check behavior with duplicate index column contents
--
--
2.7.4