I wrote:
> Looking around, there are a couple of places outside commands/analyze.c
> that are making the same mistake, so this patch isn't complete, but it
> illustrates what needs to be done.
Here's a more complete patch.
regards, tom lane
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 5fcedd7..9ac7122 100644
*** a/src/backend/commands/analyze.c
--- b/src/backend/commands/analyze.c
*************** compute_distinct_stats(VacAttrStatsP sta
*** 2049,2056 ****
if (nmultiple == 0)
{
! /* If we found no repeated values, assume it's a unique column */
! stats->stadistinct = -1.0;
}
else if (track_cnt < track_max && toowide_cnt == 0 &&
nmultiple == track_cnt)
--- 2049,2059 ----
if (nmultiple == 0)
{
! /*
! * If we found no repeated non-null values, assume it's a unique
! * column; but be sure to discount for any nulls we found.
! */
! stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
}
else if (track_cnt < track_max && toowide_cnt == 0 &&
nmultiple == track_cnt)
*************** compute_scalar_stats(VacAttrStatsP stats
*** 2426,2433 ****
if (nmultiple == 0)
{
! /* If we found no repeated values, assume it's a unique column */
! stats->stadistinct = -1.0;
}
else if (toowide_cnt == 0 && nmultiple == ndistinct)
{
--- 2429,2439 ----
if (nmultiple == 0)
{
! /*
! * If we found no repeated non-null values, assume it's a unique
! * column; but be sure to discount for any nulls we found.
! */
! stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
}
else if (toowide_cnt == 0 && nmultiple == ndistinct)
{
*************** compute_scalar_stats(VacAttrStatsP stats
*** 2753,2759 ****
else
stats->stawidth = stats->attrtype->typlen;
/* Assume all too-wide values are distinct, so it's a unique column */
! stats->stadistinct = -1.0;
}
else if (null_cnt > 0)
{
--- 2759,2765 ----
else
stats->stawidth = stats->attrtype->typlen;
/* Assume all too-wide values are distinct, so it's a unique column */
! stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
}
else if (null_cnt > 0)
{
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index 0f851ea..817453c 100644
*** a/src/backend/tsearch/ts_typanalyze.c
--- b/src/backend/tsearch/ts_typanalyze.c
*************** compute_tsvector_stats(VacAttrStats *sta
*** 295,301 ****
stats->stawidth = total_width / (double) nonnull_cnt;
/* Assume it's a unique column (see notes above) */
! stats->stadistinct = -1.0;
/*
* Construct an array of the interesting hashtable items, that is,
--- 295,301 ----
stats->stawidth = total_width / (double) nonnull_cnt;
/* Assume it's a unique column (see notes above) */
! stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
/*
* Construct an array of the interesting hashtable items, that is,
diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c
index fcb71d3..56504fc 100644
*** a/src/backend/utils/adt/rangetypes_typanalyze.c
--- b/src/backend/utils/adt/rangetypes_typanalyze.c
*************** compute_range_stats(VacAttrStats *stats,
*** 203,209 ****
/* Do the simple null-frac and width stats */
stats->stanullfrac = (double) null_cnt / (double) samplerows;
stats->stawidth = total_width / (double) non_null_cnt;
! stats->stadistinct = -1.0;
/* Must copy the target values into anl_context */
old_cxt = MemoryContextSwitchTo(stats->anl_context);
--- 203,211 ----
/* Do the simple null-frac and width stats */
stats->stanullfrac = (double) null_cnt / (double) samplerows;
stats->stawidth = total_width / (double) non_null_cnt;
!
! /* Estimate that non-null values are unique */
! stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
/* Must copy the target values into anl_context */
old_cxt = MemoryContextSwitchTo(stats->anl_context);
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index cc2a9a1..56943f2 100644
*** a/src/backend/utils/adt/selfuncs.c
--- b/src/backend/utils/adt/selfuncs.c
*************** double
*** 4738,4743 ****
--- 4738,4744 ----
get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
{
double stadistinct;
+ double stanullfrac = 0.0;
double ntuples;
*isdefault = false;
*************** get_variable_numdistinct(VariableStatDat
*** 4745,4751 ****
/*
* Determine the stadistinct value to use. There are cases where we can
* get an estimate even without a pg_statistic entry, or can get a better
! * value than is in pg_statistic.
*/
if (HeapTupleIsValid(vardata->statsTuple))
{
--- 4746,4753 ----
/*
* Determine the stadistinct value to use. There are cases where we can
* get an estimate even without a pg_statistic entry, or can get a better
! * value than is in pg_statistic. Grab stanullfrac too if we can find it
! * (otherwise, assume no nulls, for lack of any better idea).
*/
if (HeapTupleIsValid(vardata->statsTuple))
{
*************** get_variable_numdistinct(VariableStatDat
*** 4754,4759 ****
--- 4756,4762 ----
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
stadistinct = stats->stadistinct;
+ stanullfrac = stats->stanullfrac;
}
else if (vardata->vartype == BOOLOID)
{
*************** get_variable_numdistinct(VariableStatDat
*** 4777,4783 ****
{
case ObjectIdAttributeNumber:
case SelfItemPointerAttributeNumber:
! stadistinct = -1.0; /* unique */
break;
case TableOidAttributeNumber:
stadistinct = 1.0; /* only 1 value */
--- 4780,4786 ----
{
case ObjectIdAttributeNumber:
case SelfItemPointerAttributeNumber:
! stadistinct = -1.0; /* unique (and all non null) */
break;
case TableOidAttributeNumber:
stadistinct = 1.0; /* only 1 value */
*************** get_variable_numdistinct(VariableStatDat
*** 4799,4808 ****
* If there is a unique index or DISTINCT clause for the variable, assume
* it is unique no matter what pg_statistic says; the statistics could be
* out of date, or we might have found a partial unique index that proves
! * the var is unique for this query.
*/
if (vardata->isunique)
! stadistinct = -1.0;
/*
* If we had an absolute estimate, use that.
--- 4802,4812 ----
* If there is a unique index or DISTINCT clause for the variable, assume
* it is unique no matter what pg_statistic says; the statistics could be
* out of date, or we might have found a partial unique index that proves
! * the var is unique for this query. However, we'd better still believe
! * the null-fraction statistic.
*/
if (vardata->isunique)
! stadistinct = -1.0 * (1.0 - stanullfrac);
/*
* If we had an absolute estimate, use that.
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers