COALESCE patch

prankware Tue, 30 Jun 2026 04:26:37 -0700

Hi everyone,

The planner ignores column statistics when an equality has a COALESCE
expression on one side. For a clause like COALESCE(a, b) = $1, or a join on
COALESCE(t1.a, t1.b) = COALESCE(t2.c, t2.d), there are no statistics on the
COALESCE node itself, so eqsel() and eqjoinsel() return the generic 0.005
estimate while the per-column stats for a, b, c and d sit unused. The only way
around this today is an expression index or extended statistics on that exact
expression, which doesn't scale across many different COALESCE clauses.
estimate_hash_bucket_stats() has the same gap: a COALESCE hash key gets a
default ndistinct and therefore a default bucket size. Since these expressions
are common in joins and filters over nullable or fallback columns, the default
estimate can be far enough off to flip the join order or join method.


The idea is to estimate straight from the existing per-column stats, with no
extra statistics object. COALESCE(arg_1, ..., arg_n) returns arg_i only when
arg_1 .. arg_{i-1} are all NULL, so the chance of reaching branch i is the
product of stanullfrac over the earlier branches. Selectivity of
COALESCE(l_1..l_M) = COALESCE(r_1..r_N) is then the sum over branch pairs of
P(reach l_i) * P(reach r_j) * sel(l_i = r_j), and each sel(l_i = r_j) is a
recursive call back into eqsel()/eqjoinsel(). A non-COALESCE side is treated as
a one-branch list, so scalar COALESCE(a, b) = const falls out of the same code,
and the same decomposition feeds estimate_hash_bucket_stats(). If any branch is
missing stats, the code bails and today's behavior is unchanged.

A minimal example:

    CREATE TABLE t (a int, b int);
    INSERT INTO t
      SELECT CASE WHEN i % 5 < 2 THEN NULL ELSE i END, i
      FROM generate_series(1, 1000) i;
    ANALYZE t;

    EXPLAIN SELECT * FROM t WHERE COALESCE(a, b) = 42;

After ANALYZE, a is NULL in 400 of the 1000 rows (stanullfrac 0.4, ndistinct
600) and b has no NULLs (stanullfrac 0, ndistinct 1000). COALESCE(a, b) is
unique, so exactly one row matches. Each branch is weighted by the probability
of reaching it, the product of stanullfrac over the branches before it:

    branch a:  reach 1.0,  sel(a = 42) = (1 - 0.4) / 600  = 0.001
    branch b:  reach 0.4,  sel(b = 42) = (1 - 0.0) / 1000 = 0.001

    selectivity = 1.0 * 0.001 + 0.4 * 0.001 = 0.0014  ->  ~1 row out of 1000

The patch lands on ~1 row, matching reality. The 0.005 default (5 rows) is not
derived from the table at all: it is the 1/DEFAULT_NUM_DISTINCT constant the
planner falls back to with no statistics on the expression, so it stays 5 rows
regardless of the null fraction or the ndistinct of a and b.

Feedback is welcome.

Regards,
Egor Savelev,
Tantor Labs LLC,
https://tantorlabs.com/

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d6efd07073a..3a47495fd0c 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -302,6 +302,278 @@ eqsel(PG_FUNCTION_ARGS)
 	PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
 }
 
+/*
+ * Extract the estimable branches of a CoalesceExpr.  Strips RelabelType,
+ * skips NULL-constant branches, and stops at the first non-NULL Const
+ * (setting *terminates_with_const).  Returns false if the expression
+ * is constant or no branches remain.
+ */
+static bool
+match_coalesce_join_side(Node *side,
+						 List **stripped_args,
+						 bool *terminates_with_const)
+{
+	CoalesceExpr *c;
+	ListCell   *lc;
+	List	   *result = NIL;
+
+	*terminates_with_const = false;
+	*stripped_args = NIL;
+
+	if (side == NULL || !IsA(side, CoalesceExpr))
+		return false;
+
+	c = (CoalesceExpr *) side;
+
+	if (list_length(c->args) < 2)
+		return false;
+
+	foreach(lc, c->args)
+	{
+		Node	   *arg = (Node *) lfirst(lc);
+
+		/* strip RelabelType */
+		while (arg && IsA(arg, RelabelType))
+			arg = (Node *) ((RelabelType *) arg)->arg;
+
+		/* leading Const makes COALESCE itself constant */
+		if (arg == NULL || (lc == list_head(c->args) && IsA(arg, Const)))
+		{
+			list_free(result);
+			return false;
+		}
+
+		if (IsA(arg, Const))
+		{
+			if (((Const *) arg)->consttype != c->coalescetype)
+			{
+				list_free(result);
+				return false;
+			}
+
+			/* NULL Const is skipped */
+			if (!((Const *) arg)->constisnull)
+			{
+				result = lappend(result, arg);
+				*terminates_with_const = true;
+				break;
+			}
+		}
+		else
+		{
+			if (exprType(arg) != c->coalescetype)
+			{
+				list_free(result);
+				return false;
+			}
+			result = lappend(result, arg);
+		}
+	}
+
+	if (result == NIL)
+		return false;
+
+	*stripped_args = result;
+	return true;
+}
+
+/*
+ * Fill prefix_probs[i] = Prod_{j<i} stanullfrac(args[j]) for a stripped
+ * COALESCE arg list.  Returns false if any non-Const arg lacks statistics.
+ */
+static bool
+get_coalesce_prefix_probs(PlannerInfo *root, List *args, double *prefix_probs)
+{
+	ListCell   *lc;
+	int			i = 0;
+	double		prefix = 1.0;
+
+	foreach(lc, args)
+	{
+		Node	   *arg = (Node *) lfirst(lc);
+
+		prefix_probs[i++] = prefix;
+
+		/* nothing to fold for the last arg or a Const */
+		if (lnext(args, lc) == NULL || IsA(arg, Const))
+			continue;
+
+		{
+			VariableStatData vd;
+			double		p_i;
+
+			examine_variable(root, arg, 0, &vd);
+			if (!HeapTupleIsValid(vd.statsTuple))
+			{
+				ReleaseVariableStats(vd);
+				return false;
+			}
+			p_i = ((Form_pg_statistic) GETSTRUCT(vd.statsTuple))->stanullfrac;
+			ReleaseVariableStats(vd);
+
+			if (p_i < 0.0 || p_i > 1.0)
+				return false;
+
+			prefix *= p_i;
+		}
+	}
+
+	return true;
+}
+
+/*
+ * Common guts of eqsel/eqjoinsel when one or both operands are CoalesceExpr.
+ * Computes:
+ *
+ *   sel(COALESCE(l_1,...,l_M) = COALESCE(r_1,...,r_N)) =
+ *       Sum_{i,j}  P_L(reach i) * P_R(reach j) * sel(l_i = r_j)
+ *
+ * where P(reach i) = Prod_{j<i} stanullfrac(arg_j).  A non-COALESCE operand
+ * is treated as a single-element list.
+ */
+static bool
+try_coalesce_eq(PG_FUNCTION_ARGS, double *selec_out)
+{
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	Oid			operator = PG_GETARG_OID(1);
+	List	   *args = (List *) PG_GETARG_POINTER(2);
+	bool		is_eqjoin = (fcinfo->flinfo != NULL &&
+							 fcinfo->flinfo->fn_oid == F_EQJOINSEL);
+	SpecialJoinInfo *sjinfo = is_eqjoin ?
+		(SpecialJoinInfo *) PG_GETARG_POINTER(4) : NULL;
+	int			eqsel_varRelid = is_eqjoin ? 0 : PG_GETARG_INT32(3);
+	Oid			collation = PG_GET_COLLATION();
+	Node	   *left;
+	Node	   *right;
+	List	   *left_args = NIL;
+	List	   *right_args = NIL;
+	bool		left_is_coalesce;
+	bool		right_is_coalesce;
+	bool		left_term_const = false;
+	bool		right_term_const = false;
+	double	   *left_prefix;
+	double	   *right_prefix;
+	double		acc_selec = 0.0;
+	ListCell   *llc;
+	int			li;
+
+	if (list_length(args) != 2)
+		return false;
+
+	left = (Node *) linitial(args);
+	right = (Node *) lsecond(args);
+
+	left_is_coalesce = match_coalesce_join_side(left, &left_args,
+												&left_term_const);
+	right_is_coalesce = match_coalesce_join_side(right, &right_args,
+												 &right_term_const);
+
+	if (!left_is_coalesce && !right_is_coalesce)
+		return false;
+
+	/* one side is a CoalesceExpr that match_coalesce_join_side rejected */
+	if ((left_is_coalesce && !right_is_coalesce && IsA(right, CoalesceExpr)) ||
+		(right_is_coalesce && !left_is_coalesce && IsA(left, CoalesceExpr)))
+	{
+		list_free(left_args);
+		list_free(right_args);
+		return false;
+	}
+
+	if (!left_is_coalesce)
+	{
+		left_args = list_make1(left);
+		left_term_const = IsA(left, Const);
+	}
+	if (!right_is_coalesce)
+	{
+		right_args = list_make1(right);
+		right_term_const = IsA(right, Const);
+	}
+
+	if (is_eqjoin && left_term_const && right_term_const)
+	{
+		list_free(left_args);
+		list_free(right_args);
+		return false;
+	}
+
+	left_prefix = (double *) palloc(sizeof(double) * list_length(left_args));
+	right_prefix = (double *) palloc(sizeof(double) * list_length(right_args));
+
+	if (!get_coalesce_prefix_probs(root, left_args, left_prefix) ||
+		!get_coalesce_prefix_probs(root, right_args, right_prefix))
+	{
+		pfree(left_prefix);
+		pfree(right_prefix);
+		list_free(left_args);
+		list_free(right_args);
+		return false;
+	}
+
+	li = 0;
+	foreach(llc, left_args)
+	{
+		Node	   *larg = (Node *) lfirst(llc);
+		bool		lconst = IsA(larg, Const);
+		ListCell   *rlc;
+		int			ri = 0;
+
+		if (left_prefix[li] < 1.0e-12)
+			break;
+
+		foreach(rlc, right_args)
+		{
+			Node	   *rarg = (Node *) lfirst(rlc);
+			bool		rconst = IsA(rarg, Const);
+			List	   *sub_args;
+			Selectivity contrib;
+
+			if (right_prefix[ri] < 1.0e-12)
+				break;
+
+			sub_args = list_make2(copyObject(larg), copyObject(rarg));
+
+			if (!is_eqjoin || lconst || rconst)
+			{
+				contrib = DatumGetFloat8(DirectFunctionCall4Coll(eqsel,
+																 collation,
+																 PointerGetDatum(root),
+																 ObjectIdGetDatum(operator),
+																 PointerGetDatum(sub_args),
+																 Int32GetDatum(eqsel_varRelid)));
+			}
+			else
+			{
+				contrib = DatumGetFloat8(DirectFunctionCall5Coll(eqjoinsel,
+																 collation,
+																 PointerGetDatum(root),
+																 ObjectIdGetDatum(operator),
+																 PointerGetDatum(sub_args),
+																 Int16GetDatum(JOIN_INNER),
+																 PointerGetDatum(sjinfo)));
+			}
+
+			CLAMP_PROBABILITY(contrib);
+			acc_selec += left_prefix[li] * right_prefix[ri] * contrib;
+
+			list_free(sub_args);
+			ri++;
+		}
+
+		li++;
+	}
+
+	pfree(left_prefix);
+	pfree(right_prefix);
+	list_free(left_args);
+	list_free(right_args);
+
+	CLAMP_PROBABILITY(acc_selec);
+	*selec_out = acc_selec;
+	return true;
+}
+
 /*
  * Common code for eqsel() and neqsel()
  */
@@ -318,6 +590,9 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
 	bool		varonleft;
 	double		selec;
 
+	if (try_coalesce_eq(fcinfo, &selec))
+		return selec;
+
 	/*
 	 * When asked about <>, we do the estimation using the corresponding =
 	 * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
@@ -2418,6 +2693,9 @@ eqjoinsel(PG_FUNCTION_ARGS)
 	bool		join_is_reversed;
 	RelOptInfo *inner_rel;
 
+	if (try_coalesce_eq(fcinfo, &selec))
+		PG_RETURN_FLOAT8((float8) selec);
+
 	get_join_variables(root, args, sjinfo,
 					   &vardata1, &vardata2, &join_is_reversed);
 
@@ -4374,6 +4652,154 @@ estimate_multivariate_bucketsize(PlannerInfo *root, RelOptInfo *inner,
 	return otherclauses;
 }
 
+/*
+ * Most-common-value frequency for vardata.  Falls back to 1/ntuples when
+ * only a histogram slot is present.  Returns 0.0 if no statistics are
+ * available.
+ */
+static void
+get_variable_mcv_freq(VariableStatData *vardata, Selectivity *mcv_freq)
+{
+	AttStatsSlot sslot;
+
+	*mcv_freq = 0.0;
+
+	if (!HeapTupleIsValid(vardata->statsTuple))
+		return;
+
+	if (get_attstatsslot(&sslot, vardata->statsTuple,
+						 STATISTIC_KIND_MCV, InvalidOid,
+						 ATTSTATSSLOT_NUMBERS))
+	{
+		if (sslot.nnumbers > 0)
+			*mcv_freq = sslot.numbers[0];
+		free_attstatsslot(&sslot);
+	}
+	else if (get_attstatsslot(&sslot, vardata->statsTuple,
+							  STATISTIC_KIND_HISTOGRAM, InvalidOid,
+							  0))
+	{
+		/* no MCVs but histogram present: column is likely unique */
+		if (vardata->rel && vardata->rel->tuples > 0)
+			*mcv_freq = 1.0 / vardata->rel->tuples;
+	}
+}
+
+/*
+ * Estimate bucket stats for a CoalesceExpr hashkey when examine_variable()
+ * returned a default ndistinct.  Uses per-branch ndistinct and mcv_freq,
+ * weighted by null fall-through probability.
+ */
+static bool
+hash_bucket_stats_coalesce_dispatch(PlannerInfo *root,
+									Node *hashkey,
+									double nbuckets,
+									Selectivity *mcv_freq,
+									Selectivity *bucketsize_frac)
+{
+	List	   *stripped_args;
+	bool		terminates_with_const;
+	double	   *prefix;
+	int			nargs;
+	int			i;
+	ListCell   *lc;
+	double		nd_mix = 0.0;
+	Selectivity mcv_mix = 0.0;
+	double		rel_rows_proxy = 0.0;
+	double		rel_tuples_proxy = 0.0;
+	double		estfract;
+
+	if (!match_coalesce_join_side(hashkey, &stripped_args, &terminates_with_const))
+		return false;
+
+	nargs = list_length(stripped_args);
+	prefix = (double *) palloc(sizeof(double) * nargs);
+
+	if (!get_coalesce_prefix_probs(root, stripped_args, prefix))
+	{
+		pfree(prefix);
+		list_free(stripped_args);
+		return false;
+	}
+
+	i = 0;
+	foreach(lc, stripped_args)
+	{
+		Node	   *arg = (Node *) lfirst(lc);
+
+		if (IsA(arg, Const))
+		{
+			mcv_mix = Max(mcv_mix, prefix[i]);
+			if (prefix[i] > 0.0)
+				nd_mix += 1.0;
+		}
+		else
+		{
+			VariableStatData vd;
+			double		nd_i;
+			bool		isdefault;
+			Selectivity mcv_i;
+
+			examine_variable(root, arg, 0, &vd);
+			nd_i = get_variable_numdistinct(&vd, &isdefault);
+
+			if (isdefault)
+			{
+				ReleaseVariableStats(vd);
+				pfree(prefix);
+				list_free(stripped_args);
+				return false;
+			}
+
+			get_variable_mcv_freq(&vd, &mcv_i);
+
+			nd_mix = Max(nd_mix, nd_i);
+			mcv_mix = Max(mcv_mix, prefix[i] * mcv_i);
+
+			if (i == 0 && vd.rel && vd.rel->tuples > 0)
+			{
+				rel_rows_proxy = vd.rel->rows;
+				rel_tuples_proxy = vd.rel->tuples;
+			}
+
+			ReleaseVariableStats(vd);
+		}
+
+		i++;
+	}
+
+	pfree(prefix);
+	list_free(stripped_args);
+
+	if (rel_tuples_proxy > 0.0)
+	{
+		nd_mix *= rel_rows_proxy / rel_tuples_proxy;
+		nd_mix = clamp_row_est(nd_mix);
+	}
+
+	if (nd_mix <= 0.0)
+		return false;
+
+	if (nd_mix > nbuckets)
+		estfract = 1.0 / nbuckets;
+	else
+		estfract = 1.0 / nd_mix;
+
+	CLAMP_PROBABILITY(mcv_mix);
+	*mcv_freq = Max(*mcv_freq, mcv_mix);
+	CLAMP_PROBABILITY(*mcv_freq);
+	estfract = Max(estfract, *mcv_freq);
+
+	if (estfract < 1.0e-6)
+		estfract = 1.0e-6;
+	else if (estfract > 1.0)
+		estfract = 1.0;
+
+	*bucketsize_frac = (Selectivity) estfract;
+
+	return true;
+}
+
 /*
  * Estimate hash bucket statistics when the specified expression is used
  * as a hash key for the given number of buckets.
@@ -4427,43 +4853,21 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
 	double		estfract,
 				ndistinct;
 	bool		isdefault;
-	AttStatsSlot sslot;
 
 	examine_variable(root, hashkey, 0, &vardata);
 
-	/* Initialize *mcv_freq to "unknown" */
-	*mcv_freq = 0.0;
-
-	/* Look up the frequency of the most common value, if available */
-	if (HeapTupleIsValid(vardata.statsTuple))
-	{
-		if (get_attstatsslot(&sslot, vardata.statsTuple,
-							 STATISTIC_KIND_MCV, InvalidOid,
-							 ATTSTATSSLOT_NUMBERS))
-		{
-			/*
-			 * The first MCV stat is for the most common value.
-			 */
-			if (sslot.nnumbers > 0)
-				*mcv_freq = sslot.numbers[0];
-			free_attstatsslot(&sslot);
-		}
-		else if (get_attstatsslot(&sslot, vardata.statsTuple,
-								  STATISTIC_KIND_HISTOGRAM, InvalidOid,
-								  0))
-		{
-			/*
-			 * If there are no recorded MCVs, but we do have a histogram, then
-			 * assume that ANALYZE determined that the column is unique.
-			 */
-			if (vardata.rel && vardata.rel->tuples > 0)
-				*mcv_freq = 1.0 / vardata.rel->tuples;
-		}
-	}
+	get_variable_mcv_freq(&vardata, mcv_freq);
 
 	/* Get number of distinct values */
 	ndistinct = get_variable_numdistinct(&vardata, &isdefault);
 
+	if (isdefault && hash_bucket_stats_coalesce_dispatch(root, hashkey, nbuckets,
+														 mcv_freq, bucketsize_frac))
+	{
+		ReleaseVariableStats(vardata);
+		return;
+	}
+
 	/*
 	 * If ndistinct isn't real, punt.  We normally return 0.1, but if the
 	 * mcv_freq is known to be even higher than that, use it instead.

COALESCE patch

Reply via email to