Hi, thank you for the feedback! On 9/7/2026 14:39, Ilia Evdokimov (<[email protected]>) wrote:
> 1. mcv_can_cap() reimplements logic already present in > dependency_is_compatible_clause(). Shall we combine the two in order to > avoid code duplication? > > It's true that some of the checks that are in `mcv_can_cap()` are already in `dependency_is_compatible_clause()`. They both return the same value when there's a pseudo-constant and the clause is `=`, `= TRUE` or `= FALSE`. Rest of the cases are different (IS NULL, ANY/IN, OR). We would need to add surrounding logic because even for the matching cases, `mcv_can_cap()` doesn't need the pseudo-constant check that `dependency_is_compatible_clause()` requires for the `=` branch, among other checks. We won't gain much from combining both: the number of removed lines is compensated with the added surrounding logic; and things will be more convoluted. Moreover, the two functions will evolve independently because they serve different stats, so coupling them would create undesirable cross-dependency. I would prefer to maintain it as it is. 2. mcv_can_cap() runs unconditionally before the if (is_or) branch, but > can_cap is only consumed in the else/AND branch. This means mcv_can_cap() - > including get_oprrest, syscache lookups per clause - runs for every OR > query as wasted work. Both can_cap and covered_attnums should be moved > inside the else branch. > Makes sense. I've modified it, you can check v4-0001. It only made sense to have it there if we implement the OR path, which might be done in a future patch. get_ndistinct_for_keys() reimplements the ndistinct item lookup already > present in estimate_multivariate_ndistinct(). Both functions iterate over > MVNDistinct->items match by attributes count. > Right! I've created a helper and used it in both places. You can check v4-0002. I've checked that the patch applies cleanly and pg-ci.yml passes. Best regards, Enrique.
From 2cd3db834e510a57b59d14ce09e67368e4e09d72 Mon Sep 17 00:00:00 2001 From: Enrique Sanchez Cardoso <[email protected]> Date: Sun, 7 Jun 2026 15:20:37 +0200 Subject: [PATCH v4 2/2] Use ndistinct to cap non-MCV values When no MCV matches and ndistinct is available, apply the uniform distribution among non-MCV combinations as an upper bound: (1 - mcv_totalsel) / (ndistinct - mcv_nitems) --- src/backend/statistics/extended_stats.c | 73 ++++++++++++++++++- src/backend/statistics/mcv.c | 4 +- src/backend/statistics/mvdistinct.c | 46 ++++++++++++ src/backend/utils/adt/selfuncs.c | 39 +--------- .../statistics/extended_stats_internal.h | 3 +- src/include/statistics/statistics.h | 3 + src/test/regress/expected/stats_ext.out | 21 +++++- src/test/regress/sql/stats_ext.sql | 14 +++- 8 files changed, 157 insertions(+), 46 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 2f6fabe4589..f3541a8fa0e 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1768,6 +1768,48 @@ mcv_can_cap(StatisticExtInfo *stat, Bitmapset *covered_attnums, List *stat_claus return true; } +/* + * get_ndistinct_for_keys + * Return the ndistinct estimate for the full set of columns identified by + * keys, using a matching STATS_EXT_NDISTINCT object from the relation's + * statlist. + * + * Accepts both exact-match and superset statistics objects. Returns -1.0 + * if no matching ndistinct statistics object or item is found. + */ +static double +get_ndistinct_for_keys(List *statlist, Bitmapset *keys, bool inh) +{ + ListCell *lc; + + foreach(lc, statlist) + { + StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); + MVNDistinct *mvnd; + MVNDistinctItem *item; + + if (info->kind != STATS_EXT_NDISTINCT || info->inherit != inh) + continue; + if (!bms_is_subset(keys, info->keys)) + continue; + + mvnd = statext_ndistinct_load(info->statOid, inh); + item = mvndistinct_find_item(mvnd, keys, 0); + + if (item) + { + double ndistinct = item->ndistinct; + + statext_ndistinct_free(mvnd); + return ndistinct; + } + + statext_ndistinct_free(mvnd); + } + + return -1.0; +} + /* * statext_mcv_clauselist_selectivity * Estimate clauses using the best multi-column statistics. @@ -2062,6 +2104,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli mcv_totalsel, mcv_cap, stat_sel; + uint32 mcv_nitems; can_cap = mcv_can_cap(stat, covered_attnums, stat_clauses); bms_free(covered_attnums); @@ -2082,7 +2125,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli varRelid, jointype, sjinfo, rel, &mcv_basesel, &mcv_totalsel, - &mcv_cap); + &mcv_cap, + &mcv_nitems); /* Combine the simple and multi-column estimates. */ stat_sel = mcv_combine_selectivities(simple_sel, @@ -2090,9 +2134,30 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli mcv_basesel, mcv_totalsel); - /* Cap to the least common MCV item when no MCV items matched. */ - if (can_cap && stat_sel > mcv_cap) - stat_sel = mcv_cap; + /* Cap when no MCV items matched (mcv_sel = 0.0). */ + if (can_cap && mcv_sel == 0.0) + { + double ndistinct; + + /* Cap to the least common MCV item. */ + if (stat_sel > mcv_cap) + stat_sel = mcv_cap; + + ndistinct = get_ndistinct_for_keys(rel->statlist, stat->keys, rte->inh); + + if (ndistinct > (double) mcv_nitems) + { + double non_mcv_sel = (1.0 - mcv_totalsel) / (ndistinct - (double) mcv_nitems); + + /* + * Cap to uniform distribution among the non-MCV + * combinations. This is similar to what var_eq_const() + * does for single-column MCV stats. + */ + if (stat_sel > non_mcv_sel) + stat_sel = non_mcv_sel; + } + } /* Factor this into the overall result */ sel *= stat_sel; diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 6617c297eab..04a2c430637 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -2048,7 +2048,7 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel, - Selectivity *cap) + Selectivity *cap, uint32 *nitems) { int i; MCVList *mcv; @@ -2064,6 +2064,8 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, /* load the MCV list stored in the statistics object */ mcv = statext_mcv_load(stat->statOid, rte->inh); + *nitems = mcv->nitems; + /* build a match bitmap for the clauses */ matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs, mcv, false); diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 4f8f578a22f..85462470604 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -336,6 +336,52 @@ statext_ndistinct_free(MVNDistinct *ndistinct) pfree(ndistinct); } +/* + * mvndistinct_find_item + * Search an MVNDistinct for the item whose attribute set exactly matches + * the supplied keys bitmap. + * + * attnum_offset is added to each item attribute number before the bitmap + * membership check. Pass 0 when keys contains plain attribute numbers. + * Pass the offset used when building keys when expression-based statistics + * are involved (as in estimate_multivariate_ndistinct). + * + * Returns a pointer to the matching MVNDistinctItem, or NULL if not found. + */ +MVNDistinctItem * +mvndistinct_find_item(MVNDistinct *stats, Bitmapset *keys, + AttrNumber attnum_offset) +{ + int nkeys = bms_num_members(keys); + + for (int i = 0; i < stats->nitems; i++) + { + MVNDistinctItem *item = &stats->items[i]; + int j; + + if (item->nattributes != nkeys) + continue; + + /* assume it's the right item */ + for (j = 0; j < item->nattributes; j++) + { + AttrNumber attnum = item->attributes[j] + attnum_offset; + + if (!bms_is_member(attnum, keys)) + { + /* nah, it's not this item */ + item = NULL; + break; + } + } + + if (item) + return item; + } + + return NULL; +} + /* * Validate a set of MVNDistincts against the extended statistics object * definition. diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index d6efd07073a..834bafdf0a4 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4685,7 +4685,6 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, */ if (stats) { - int i; List *newlist = NIL; MVNDistinctItem *item = NULL; ListCell *lc2; @@ -4775,43 +4774,7 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, } /* Find the specific item that exactly matches the combination */ - for (i = 0; i < stats->nitems; i++) - { - int j; - MVNDistinctItem *tmpitem = &stats->items[i]; - - if (tmpitem->nattributes != bms_num_members(matched)) - continue; - - /* assume it's the right item */ - item = tmpitem; - - /* check that all item attributes/expressions fit the match */ - for (j = 0; j < tmpitem->nattributes; j++) - { - AttrNumber attnum = tmpitem->attributes[j]; - - /* - * Thanks to how we constructed the matched bitmap above, we - * can just offset all attnums the same way. - */ - attnum = attnum + attnum_offset; - - if (!bms_is_member(attnum, matched)) - { - /* nah, it's not this item */ - item = NULL; - break; - } - } - - /* - * If the item has all the matched attributes, we know it's the - * right one - there can't be a better one. matching more. - */ - if (item) - break; - } + item = mvndistinct_find_item(stats, matched, attnum_offset); /* * Make sure we found an item. There has to be one, because ndistinct diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index 01b5f67b843..1114d2870b2 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -130,7 +130,8 @@ extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel, - Selectivity *cap); + Selectivity *cap, + uint32 *nitems); extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index 8f9b9d237fd..e1d03ee98d4 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -95,6 +95,9 @@ typedef struct MCVList } MCVList; extern MVNDistinct *statext_ndistinct_load(Oid mvoid, bool inh); +extern MVNDistinctItem *mvndistinct_find_item(MVNDistinct *stats, + Bitmapset *keys, + AttrNumber attnum_offset); extern MVDependencies *statext_dependencies_load(Oid mvoid, bool inh); extern MCVList *statext_mcv_load(Oid mvoid, bool inh); diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index c87b2d9f9f5..30760efe47c 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -2945,7 +2945,7 @@ SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 1219 | 0 (1 row) -CREATE STATISTICS mcv_cap_stats (mcv) ON a, b, c, d FROM mcv_cap; +CREATE STATISTICS mcv_cap_stats_mcv (mcv) ON a, b, c, d FROM mcv_cap; ANALYZE mcv_cap; -- MCV SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); @@ -2954,6 +2954,15 @@ SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 100 | 0 (1 row) +CREATE STATISTICS mcv_cap_stats_nd (ndistinct) ON a, b, c, d FROM mcv_cap; +ANALYZE mcv_cap; +-- MCV + ndistinct +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); + estimated | actual +-----------+-------- + 50 | 0 +(1 row) + -- When a value IS in the MCV list, no cap path runs SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 1 AND c = TRUE AND d = '{}'$$); estimated | actual @@ -2975,6 +2984,16 @@ SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 2450 | 0 (1 row) +-- MCV + superset ndistinct +DROP STATISTICS mcv_cap_stats_nd; +CREATE STATISTICS mcv_cap_stats_nd (ndistinct) ON a, b, c, d, e FROM mcv_cap; +ANALYZE mcv_cap; +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); + estimated | actual +-----------+-------- + 50 | 0 +(1 row) + DROP TABLE mcv_cap; -- check the ability to use multiple MCV lists CREATE TABLE mcv_lists_multi ( diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index e6cf85aa6ab..ffbf64a98af 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1483,12 +1483,18 @@ ANALYZE mcv_cap; -- no MCV SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); -CREATE STATISTICS mcv_cap_stats (mcv) ON a, b, c, d FROM mcv_cap; +CREATE STATISTICS mcv_cap_stats_mcv (mcv) ON a, b, c, d FROM mcv_cap; ANALYZE mcv_cap; -- MCV SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); +CREATE STATISTICS mcv_cap_stats_nd (ndistinct) ON a, b, c, d FROM mcv_cap; +ANALYZE mcv_cap; + +-- MCV + ndistinct +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); + -- When a value IS in the MCV list, no cap path runs SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 1 AND c = TRUE AND d = '{}'$$); @@ -1498,6 +1504,12 @@ SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a >= 0 AND b = -- Capping does not apply when the query does not cover all MCV columns SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE$$); +-- MCV + superset ndistinct +DROP STATISTICS mcv_cap_stats_nd; +CREATE STATISTICS mcv_cap_stats_nd (ndistinct) ON a, b, c, d, e FROM mcv_cap; +ANALYZE mcv_cap; +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); + DROP TABLE mcv_cap; -- check the ability to use multiple MCV lists -- 2.43.0
From fe16908c387c7ce09ffcadb5769a74ccf168f8ab Mon Sep 17 00:00:00 2001 From: Enrique Sanchez Cardoso <[email protected]> Date: Wed, 3 Jun 2026 23:12:16 +0200 Subject: [PATCH v4 1/2] Cap selectivity when values are not in multi-column mcv Selectivity can't be > last MCV item (least common) selectivity when they are AND clauses and cover all the MCV dimensions. --- src/backend/statistics/extended_stats.c | 82 ++++++++++++++++++- src/backend/statistics/mcv.c | 9 +- .../statistics/extended_stats_internal.h | 3 +- src/test/regress/expected/stats_ext.out | 48 +++++++++++ src/test/regress/sql/stats_ext.sql | 35 ++++++++ 5 files changed, 174 insertions(+), 3 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 2b83355d26e..2f6fabe4589 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1705,6 +1705,69 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, return true; } +/* + * mcv_can_cap + * Determines whether the MCV selectivity estimate can be capped at the + * frequency of the least common item in the MCV list. + * + * When a combination of values does not appear in the MCV list, its true + * selectivity must be lower than the frequency of the least common tracked + * combination. We can exploit this to cap the combined selectivity estimate, + * but only when the following conditions are both satisfied: + * + * 1. The clauses cover all dimensions of the statistics object, i.e. + * covered_attnums equals stat->keys exactly. If any dimension is + * unconstrained, the absence of a match in the MCV list does not bound + * the selectivity of the full combination. + * + * 2. Every clause is an equality-like condition: either an equality operator, + * an IS NULL test, or a bare boolean Var. Range or inequality predicates + * can match many values, so the per-combination argument no longer applies. + * + * Returns true if both conditions hold and capping is valid. + */ +static bool +mcv_can_cap(StatisticExtInfo *stat, Bitmapset *covered_attnums, List *stat_clauses) +{ + ListCell *lc; + + /* + * Expressions are not supported, they can match multiple rows. Also, the + * clauses must cover all dimensions of the MCV list. + */ + if (stat->exprs != NULL || !bms_equal(covered_attnums, stat->keys)) + { + return false; + } + + foreach(lc, stat_clauses) + { + Node *clause = (Node *) lfirst(lc); + + if (IsA(clause, RestrictInfo)) + clause = (Node *) ((RestrictInfo *) clause)->clause; + + /* = */ + if (is_opclause(clause) && get_oprrest(((const OpExpr *) clause)->opno) == F_EQSEL) + continue; + + /* IS NULL */ + if (IsA(clause, NullTest) && ((const NullTest *) clause)->nulltesttype == IS_NULL) + continue; + + /* = TRUE */ + if (IsA(clause, Var)) + continue; + + /* = FALSE */ + if (IsA(clause, BoolExpr) && ((const BoolExpr *) clause)->boolop == NOT_EXPR && IsA(linitial(((const BoolExpr *) clause)->args), Var)) + continue; + + return false; + } + return true; +} + /* * statext_mcv_clauselist_selectivity * Estimate clauses using the best multi-column statistics. @@ -1800,6 +1863,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli StatisticExtInfo *stat; List *stat_clauses; Bitmapset *simple_clauses; + Bitmapset *covered_attnums; /* find the best suited statistics object for these attnums */ stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV, rte->inh, @@ -1822,6 +1886,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli /* record which clauses are simple (single column or expression) */ simple_clauses = NULL; + /* record all attnums to check if MCV covers all of them */ + covered_attnums = NULL; + listidx = -1; foreach(l, clauses) { @@ -1872,6 +1939,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli stat_clauses = lappend(stat_clauses, (Node *) lfirst(l)); *estimatedclauses = bms_add_member(*estimatedclauses, listidx); + if (!is_or) + covered_attnums = bms_add_members(covered_attnums, list_attnums[listidx]); + /* * Reset the pointers, so that choose_best_statistics knows this * clause was estimated and does not consider it again. @@ -1985,12 +2055,17 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli } else /* Implicitly-ANDed list of clauses */ { + bool can_cap; Selectivity simple_sel, mcv_sel, mcv_basesel, mcv_totalsel, + mcv_cap, stat_sel; + can_cap = mcv_can_cap(stat, covered_attnums, stat_clauses); + bms_free(covered_attnums); + /* * "Simple" selectivity, i.e. without any extended statistics, * essentially assuming independence of the columns/clauses. @@ -2006,7 +2081,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses, varRelid, jointype, sjinfo, rel, &mcv_basesel, - &mcv_totalsel); + &mcv_totalsel, + &mcv_cap); /* Combine the simple and multi-column estimates. */ stat_sel = mcv_combine_selectivities(simple_sel, @@ -2014,6 +2090,10 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli mcv_basesel, mcv_totalsel); + /* Cap to the least common MCV item when no MCV items matched. */ + if (can_cap && stat_sel > mcv_cap) + stat_sel = mcv_cap; + /* Factor this into the overall result */ sel *= stat_sel; } diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 0b7da605a4c..6617c297eab 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -2047,7 +2047,8 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, - Selectivity *basesel, Selectivity *totalsel) + Selectivity *basesel, Selectivity *totalsel, + Selectivity *cap) { int i; MCVList *mcv; @@ -2057,6 +2058,9 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, /* match/mismatch bitmap for each MCV item */ bool *matches = NULL; + /* default: no cap on selectivity */ + *cap = 1.0; + /* load the MCV list stored in the statistics object */ mcv = statext_mcv_load(stat->statOid, rte->inh); @@ -2078,6 +2082,9 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, } } + if (s == 0.0 && mcv->nitems > 0) + *cap = mcv->items[mcv->nitems - 1].frequency; + return s; } diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index c775442f2ee..01b5f67b843 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -129,7 +129,8 @@ extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, - Selectivity *totalsel); + Selectivity *totalsel, + Selectivity *cap); extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 37070c1a896..c87b2d9f9f5 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -2928,6 +2928,54 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 (1 row) DROP TABLE mcv_lists_partial; +-- After building MCV statistics the cap limits the combined estimate, +-- eliminating most of the over-estimation. +CREATE TABLE mcv_cap (a INT, b INT, c BOOL, d INTEGER[], e INT) WITH (autovacuum_enabled = off); +INSERT INTO mcv_cap + SELECT 0, b, TRUE, '{}', 1 FROM generate_series(1, 99) b, generate_series(1, 100) r; +INSERT INTO mcv_cap + SELECT a, 0, NULL, '{1, 2}', 2 FROM generate_series(1, 99) a, generate_series(1, 100) r; +INSERT INTO mcv_cap + SELECT c, c, FALSE, '{1, 1}', 3 FROM generate_series(1, 100) c; +ANALYZE mcv_cap; +-- no MCV +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); + estimated | actual +-----------+-------- + 1219 | 0 +(1 row) + +CREATE STATISTICS mcv_cap_stats (mcv) ON a, b, c, d FROM mcv_cap; +ANALYZE mcv_cap; +-- MCV +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +-- When a value IS in the MCV list, no cap path runs +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 1 AND c = TRUE AND d = '{}'$$); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +-- Capping does not apply when the query includes an inequality clause +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a >= 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); + estimated | actual +-----------+-------- + 2450 | 0 +(1 row) + +-- Capping does not apply when the query does not cover all MCV columns +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE$$); + estimated | actual +-----------+-------- + 2450 | 0 +(1 row) + +DROP TABLE mcv_cap; -- check the ability to use multiple MCV lists CREATE TABLE mcv_lists_multi ( a INTEGER, diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 3cc6012b822..e6cf85aa6ab 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1465,6 +1465,41 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 DROP TABLE mcv_lists_partial; +-- After building MCV statistics the cap limits the combined estimate, +-- eliminating most of the over-estimation. +CREATE TABLE mcv_cap (a INT, b INT, c BOOL, d INTEGER[], e INT) WITH (autovacuum_enabled = off); + +INSERT INTO mcv_cap + SELECT 0, b, TRUE, '{}', 1 FROM generate_series(1, 99) b, generate_series(1, 100) r; + +INSERT INTO mcv_cap + SELECT a, 0, NULL, '{1, 2}', 2 FROM generate_series(1, 99) a, generate_series(1, 100) r; + +INSERT INTO mcv_cap + SELECT c, c, FALSE, '{1, 1}', 3 FROM generate_series(1, 100) c; + +ANALYZE mcv_cap; + +-- no MCV +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); + +CREATE STATISTICS mcv_cap_stats (mcv) ON a, b, c, d FROM mcv_cap; +ANALYZE mcv_cap; + +-- MCV +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); + +-- When a value IS in the MCV list, no cap path runs +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 1 AND c = TRUE AND d = '{}'$$); + +-- Capping does not apply when the query includes an inequality clause +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a >= 0 AND b = 0 AND c = TRUE AND d = '{1, 2}'$$); + +-- Capping does not apply when the query does not cover all MCV columns +SELECT * FROM check_estimated_rows($$SELECT * FROM mcv_cap WHERE a = 0 AND b = 0 AND c = TRUE$$); + +DROP TABLE mcv_cap; + -- check the ability to use multiple MCV lists CREATE TABLE mcv_lists_multi ( a INTEGER, -- 2.43.0
