Hi, > >> We should do anything like add column options in the meantime. Those >> are hard to remove once added. > > I will try it very soon.
Attached is a PoC version. and here is the test case. create table t(a int, b int, c int) with (autovacuum_enabled=off); create index on t(a, b); create index on t(a, c); insert into t select floor(random() * 100 + 1)::int, i, i from generate_series(1, 100000) i; analyze t; insert into t select floor(random() * 10 + 1)::int + 100 , i, i from generate_series(1, 1000) i; -- one of the below queries would choose a wrong index. -- here is the result from my test. explain (costs off) select * from t where a = 109 and c = 8; QUERY PLAN --------------------------------------- Index Scan using t_a_c_idx on t Index Cond: ((a = 109) AND (c = 8)) (2 rows) explain (costs off) select * from t where a = 109 and b = 8; QUERY PLAN --------------------------------- Index Scan using t_a_c_idx on t Index Cond: (a = 109) Filter: (b = 8) (3 rows) Wrong index is chosen for the second case! -- After applying the new API. alter table t alter column a set (force_generic=on); explain (costs off) select * from t where a = 109 and c = 8; QUERY PLAN --------------------------------------- Index Scan using t_a_c_idx on t Index Cond: ((a = 109) AND (c = 8)) (2 rows) explain (costs off) select * from t where a = 109 and b = 8; QUERY PLAN --------------------------------------- Index Scan using t_a_b_idx on t Index Cond: ((a = 109) AND (b = 8)) (2 rows) Then both cases can choose a correct index. commit f8cca472479c50ba73479ec387882db43d203522 (HEAD -> shared_detoast_value) Author: yizhi.fzh <yizhi....@alibaba-inc.com> Date: Tue Mar 5 18:27:48 2024 +0800 Add a "force_generic" attoptions for selfunc.c Sometime user just care about the recent data and the optimizer statistics for such data is not gathered, then some bad decision may happen. Before this patch, we have to make the autoanalyze often and often, but it is not only expensive but also may be too late. This patch introduces a new attoptions like this: ALTER TABLE t ALTER COLUMN col set (force_generic=true); Then selfunc.c realizes this and ignore the special Const value, then average selectivity is chosen. This fall into the weakness of generic plan, but this patch doesn't introduce any new weakness and we leave the decision to user which could resolve some problem. Also this logic only apply to eqsel since the ineqsel have the get_actual_variable_range mechanism which is helpful for index choose case at least. I think it is OK for a design review, for the implementaion side, the known issue includes: 1. Support grap such infromation from its parent for partitioned table if the child doesn't have such information. 2. builtin document and testing. Any feedback is welcome. -- Best Regards Andy Fan
>From f8cca472479c50ba73479ec387882db43d203522 Mon Sep 17 00:00:00 2001 From: "yizhi.fzh" <yizhi....@alibaba-inc.com> Date: Tue, 5 Mar 2024 18:27:48 +0800 Subject: [PATCH v0 1/1] Add a "force_generic" attoptions for selfunc.c Sometime user just care about the recent data and the optimizer statistics for such data is not gathered, then some bad decision may happen. Before this patch, we have to make the autoanalyze often and often, but it is not only expensive but also may be too late. This patch introduces a new attoptions like this: ALTER TABLE t ALTER COLUMN col set (force_generic=true); Then selfunc.c realizes this and ignore the special Const value, then average selectivity is chosen. This fall into the weakness of generic plan, but this patch doesn't introduce any new weakness and we leave the decision to user which could resolve some problem. Also this logic only apply to eqsel since the ineqsel have the get_actual_variable_range mechanism which is helpful for index choose case at least. --- src/backend/access/common/reloptions.c | 12 ++++++++- src/backend/utils/adt/selfuncs.c | 35 ++++++++++++++++++++++---- src/bin/psql/tab-complete.c | 2 +- src/include/utils/attoptcache.h | 1 + 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 0921a736ab..c8444ea577 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -131,6 +131,15 @@ static relopt_bool boolRelOpts[] = }, true }, + { + { + "force_generic", + "estimate the selectivity like generic plan", + RELOPT_KIND_ATTRIBUTE, + ShareUpdateExclusiveLock + }, + false + }, { { "security_barrier", @@ -2072,7 +2081,8 @@ attribute_reloptions(Datum reloptions, bool validate) { static const relopt_parse_elt tab[] = { {"n_distinct", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct)}, - {"n_distinct_inherited", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct_inherited)} + {"n_distinct_inherited", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct_inherited)}, + {"force_generic", RELOPT_TYPE_BOOL, offsetof(AttributeOpts, force_generic)} }; return (bytea *) build_reloptions(reloptions, validate, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index cea777e9d4..e7c4a5f80e 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -125,6 +125,7 @@ #include "storage/bufmgr.h" #include "utils/acl.h" #include "utils/array.h" +#include "utils/attoptcache.h" #include "utils/builtins.h" #include "utils/date.h" #include "utils/datum.h" @@ -213,7 +214,7 @@ static bool get_actual_variable_endpoint(Relation heapRel, MemoryContext outercontext, Datum *endpointDatum); static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); - +static bool est_selectivity_generic(PlannerInfo *root, VariableStatData *vardata); /* * eqsel - Selectivity of "=" for any data types. @@ -268,11 +269,11 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate) return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL; /* - * We can do a lot better if the something is a constant. (Note: the - * Const might result from estimation rather than being a simple constant - * in the query.) + * We can do a lot better if the something is a constant unless user disallow + * that. (Note: the Const might result from estimation rather than being a + * simple constant in the query.) */ - if (IsA(other, Const)) + if (IsA(other, Const) && !est_selectivity_generic(root, &vardata)) selec = var_eq_const(&vardata, operator, collation, ((Const *) other)->constvalue, ((Const *) other)->constisnull, @@ -8141,3 +8142,27 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexPages = index->pages; } + +/* + * Treat the vardata as generic, so that all the Const in the qual is treated + * as parameter. + */ +static bool +est_selectivity_generic(PlannerInfo *root, VariableStatData *vardata) +{ + Var *var; + Oid relid; + AttributeOpts *opts; + + if (!IsA(vardata->var, Var)) + return false; + + var = castNode(Var, vardata->var); + + relid = root->simple_rte_array[var->varno]->relid; + + opts = get_attribute_options(relid, var->varattno); + + return opts ? opts->force_generic : false; + +} diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index aa1acf8523..f93892c46c 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2511,7 +2511,7 @@ psql_completion(const char *text, int start, int end) /* ALTER TABLE ALTER [COLUMN] <foo> SET ( */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "(") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "(")) - COMPLETE_WITH("n_distinct", "n_distinct_inherited"); + COMPLETE_WITH("n_distinct", "n_distinct_inherited", "force_generic"); /* ALTER TABLE ALTER [COLUMN] <foo> SET COMPRESSION */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "COMPRESSION") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "COMPRESSION")) diff --git a/src/include/utils/attoptcache.h b/src/include/utils/attoptcache.h index a1a9bfc0fb..584cd099e4 100644 --- a/src/include/utils/attoptcache.h +++ b/src/include/utils/attoptcache.h @@ -21,6 +21,7 @@ typedef struct AttributeOpts int32 vl_len_; /* varlena header (do not touch directly!) */ float8 n_distinct; float8 n_distinct_inherited; + bool force_generic; } AttributeOpts; extern AttributeOpts *get_attribute_options(Oid attrelid, int attnum); -- 2.34.1