Hi,

The attached patch series is modified to improve estimates for these special clauses (Var op Var with the same var on both sides) without extended statistics. This is done in 0001, and it seems fairly simple and cheap.

The 0002 part is still the same patch as on 2021/07/20. Part 0003 fixes handling of those clauses so that we don't treat them as simple, but it does that by tweaking statext_is_compatible_clause(), as suggested by Dean. It does work, although it's a bit more invasive than simply checking the shape of clause in statext_mcv_clauselist_selectivity.

I do have results for the randomly generated queries, and this does improve the situation a lot - pretty much all the queries with (a=a) or (a<a) clauses had terrible estimates, and this fixes that.

That being said, I'm still not sure if this is an issue in real-world applications, or whether we're solving something because of synthetic queries generated by the randomized generator. But the checks seem fairly cheap, so maybe it doesn't matter too much.


regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
>From 708587e495483aff4b84487103a545b6e860d0c0 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Thu, 26 Aug 2021 23:01:04 +0200
Subject: [PATCH 1/3] Improve estimates for Var op Var with the same Var

When estimating (Var op Var) conditions, we can treat the case with the
same Var on both sides as a special case, and we can provide better
selectivity estimate than for the generic case.

For example for (a = a) we know it's 1.0, because all rows are expected
to match. Similarly for (a != a) , wich has selectivity 0.0. And the
same logic can be applied to inequality comparisons, like (a < a) etc.

In principle, those clauses are a bit strange and queries are unlikely
to use them. But query generators sometimes do silly things, and these
checks are quite cheap so it's likely a win.
---
 src/backend/utils/adt/selfuncs.c | 75 +++++++++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 0c8c05f6c2..0baeaa040d 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -210,7 +210,8 @@ static bool get_actual_variable_endpoint(Relation heapRel,
 										 MemoryContext outercontext,
 										 Datum *endpointDatum);
 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
-
+static bool matching_restriction_variables(PlannerInfo *root, List *args,
+										   int varRelid);
 
 /*
  *		eqsel			- Selectivity of "=" for any data types.
@@ -256,6 +257,14 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
 		}
 	}
 
+	/*
+	 * It it's (variable = variable) with the same variable on both sides, it's
+	 * a special case and we know it does not filter anything, which means the
+	 * selectivity is 1.0.
+	 */
+	if (matching_restriction_variables(root, args, varRelid))
+		return (negate) ? 0.0 : 1.0;
+
 	/*
 	 * If expression is not variable = something or something = variable, then
 	 * punt and return a default estimate.
@@ -1408,6 +1417,20 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
 	Oid			consttype;
 	double		selec;
 
+	/*
+	 * Handle (variable < variable) and (variable <= variable) with the same
+	 * variable on both sides as a special case. The strict inequality should
+	 * not match anything, hence selectivity is 0.0. The other case is about
+	 * the same as equality, so selectivity is 1.0.
+	 */
+	if (matching_restriction_variables(root, args, varRelid))
+	{
+		if (iseq)
+			PG_RETURN_FLOAT8(1.0);
+		else
+			PG_RETURN_FLOAT8(0.0);
+	}
+
 	/*
 	 * If expression is not variable op something or something op variable,
 	 * then punt and return a default estimate.
@@ -4871,6 +4894,56 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
 	return false;
 }
 
+
+/*
+ * matching_restriction_variable
+ *		Examine the args of a restriction clause to see if it's of the
+ *		form (variable op variable) with the save variable on both sides.
+ *
+ * Inputs:
+ *	root: the planner info
+ *	args: clause argument list
+ *	varRelid: see specs for restriction selectivity functions
+ *
+ * Returns true if the same variable is on both sides, otherwise false.
+ */
+static bool
+matching_restriction_variables(PlannerInfo *root, List *args, int varRelid)
+{
+	Node	   *left,
+			   *right;
+	VariableStatData ldata;
+	VariableStatData rdata;
+	bool		res = false;
+
+	/* Fail if not a binary opclause (probably shouldn't happen) */
+	if (list_length(args) != 2)
+		return false;
+
+	left = (Node *) linitial(args);
+	right = (Node *) lsecond(args);
+
+	/*
+	 * Examine both sides.  Note that when varRelid is nonzero, Vars of other
+	 * relations will be treated as pseudoconstants.
+	 */
+	examine_variable(root, left, varRelid, &ldata);
+	examine_variable(root, right, varRelid, &rdata);
+
+	/*
+	 * If both sides are variable, and are equal, we win.
+	 */
+	if ((ldata.rel != NULL && rdata.rel != NULL) &&
+		equal(ldata.var, rdata.var))
+		res = true;
+
+	/* We don't need the stats. */
+	ReleaseVariableStats(ldata);
+	ReleaseVariableStats(rdata);
+
+	return res;
+}
+
 /*
  * get_join_variables
  *		Apply examine_variable() to each side of a join clause.
-- 
2.31.1

>From caa5c2aa0e640097a43e87f7d9557a640e6e8948 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Sun, 15 Aug 2021 13:23:13 +0200
Subject: [PATCH 2/3] patch 20210720

---
 src/backend/optimizer/path/clausesel.c        |  37 +++-
 src/backend/statistics/extended_stats.c       |  83 ++++++---
 src/backend/statistics/mcv.c                  | 172 +++++++++++++-----
 .../statistics/extended_stats_internal.h      |   4 +-
 src/test/regress/expected/stats_ext.out       |  96 ++++++++++
 src/test/regress/sql/stats_ext.sql            |  26 +++
 6 files changed, 341 insertions(+), 77 deletions(-)

diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index d263ecf082..6a7e9ceea5 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -714,6 +714,7 @@ clause_selectivity_ext(PlannerInfo *root,
 	Selectivity s1 = 0.5;		/* default for any unhandled clause type */
 	RestrictInfo *rinfo = NULL;
 	bool		cacheable = false;
+	Node	   *src = clause;
 
 	if (clause == NULL)			/* can this still happen? */
 		return s1;
@@ -871,11 +872,37 @@ clause_selectivity_ext(PlannerInfo *root,
 		}
 		else
 		{
-			/* Estimate selectivity for a restriction clause. */
-			s1 = restriction_selectivity(root, opno,
-										 opclause->args,
-										 opclause->inputcollid,
-										 varRelid);
+			/*
+			 * It might be a single (Expr op Expr) clause, which goes here due
+			 * to the optimization at the beginning of clauselist_selectivity.
+			 * So we try applying extended stats first, and then fall back to
+			 * restriction_selectivity.
+			 */
+			bool	estimated = false;
+
+			if (use_extended_stats)
+			{
+				List	   *clauses = list_make1(src);
+				RelOptInfo *rel = find_single_rel_for_clauses(root, clauses);
+
+				if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+				{
+					Bitmapset  *estimatedclauses = NULL;
+
+					s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+														jointype, sjinfo, rel,
+														&estimatedclauses, false);
+
+					estimated = (bms_num_members(estimatedclauses) == 1);
+				}
+			}
+
+			/* Estimate selectivity for a restriction clause (fallback). */
+			if (!estimated)
+				s1 = restriction_selectivity(root, opno,
+											 opclause->args,
+											 opclause->inputcollid,
+											 varRelid);
 		}
 
 		/*
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 2e55913bc8..606cf8c588 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1347,19 +1347,27 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		return true;
 	}
 
-	/* (Var/Expr op Const) or (Const op Var/Expr) */
+	/*
+	 * Three opclause variants are supported: (Expr op Const), (Const op Expr),
+	 * (Expr op Expr). That means we may need to analyze one or two expressions
+	 * to make sure the opclause is compatible with extended stats.
+	 */
 	if (is_opclause(clause))
 	{
 		RangeTblEntry *rte = root->simple_rte_array[relid];
 		OpExpr	   *expr = (OpExpr *) clause;
-		Node	   *clause_expr;
+		ListCell   *lc;
+		List	   *clause_exprs;
 
 		/* Only expressions with two arguments are considered compatible. */
 		if (list_length(expr->args) != 2)
 			return false;
 
-		/* Check if the expression has the right shape */
-		if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
+		/*
+		 * Check if the expression has the right shape. This returns either one
+		 * or two expressions, depending on whether there is a Const.
+		 */
+		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL))
 			return false;
 
 		/*
@@ -1399,13 +1407,31 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			!get_func_leakproof(get_opcode(expr->opno)))
 			return false;
 
-		/* Check (Var op Const) or (Const op Var) clauses by recursing. */
-		if (IsA(clause_expr, Var))
-			return statext_is_compatible_clause_internal(root, clause_expr,
-														 relid, attnums, exprs);
+		/*
+		 * There's always at least one expression, otherwise the clause would
+		 * not be considered compatible.
+		 */
+		Assert(list_length(clause_exprs) >= 1);
+
+		/*
+		 * Check all expressions by recursing. Var expressions are handled as
+		 * a special case (to match it to attnums etc.)
+		 */
+		foreach (lc, clause_exprs)
+		{
+			Node *clause_expr = (Node *) lfirst(lc);
+
+			if (IsA(clause_expr, Var))
+			{
+				/* if the Var is incompatible, the whole clause is incompatible */
+				if (!statext_is_compatible_clause_internal(root, clause_expr,
+														   relid, attnums, exprs))
+					return false;
+			}
+			else	/* generic expression */
+				*exprs = lappend(*exprs, clause_expr);
+		}
 
-		/* Otherwise we have (Expr op Const) or (Const op Expr). */
-		*exprs = lappend(*exprs, clause_expr);
 		return true;
 	}
 
@@ -1415,15 +1441,21 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		RangeTblEntry *rte = root->simple_rte_array[relid];
 		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
 		Node	   *clause_expr;
+		List	   *clause_exprs;
 
 		/* Only expressions with two arguments are considered compatible. */
 		if (list_length(expr->args) != 2)
 			return false;
 
 		/* Check if the expression has the right shape (one Var, one Const) */
-		if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
+		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL))
 			return false;
 
+		/* There has to be one expression exactly. */
+		Assert(list_length(clause_exprs) == 1);
+
+		clause_expr = (Node *) linitial(clause_exprs);
+
 		/*
 		 * If it's not one of the supported operators ("=", "<", ">", etc.),
 		 * just ignore the clause, as it's not compatible with MCV lists.
@@ -2009,20 +2041,19 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
  * examine_opclause_args
  *		Split an operator expression's arguments into Expr and Const parts.
  *
- * Attempts to match the arguments to either (Expr op Const) or (Const op
- * Expr), possibly with a RelabelType on top. When the expression matches this
- * form, returns true, otherwise returns false.
+ * Attempts to match the arguments to either (Expr op Const) or (Const op Expr)
+ * or (Expr op Expr), possibly with a RelabelType on top. When the expression
+ * matches this form, returns true, otherwise returns false.
  *
  * Optionally returns pointers to the extracted Expr/Const nodes, when passed
- * non-null pointers (exprp, cstp and expronleftp). The expronleftp flag
+ * non-null pointers (exprsp, cstp and expronleftp). The expronleftp flag
  * specifies on which side of the operator we found the expression node.
  */
 bool
-examine_opclause_args(List *args, Node **exprp, Const **cstp,
-					  bool *expronleftp)
+examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp)
 {
-	Node	   *expr;
-	Const	   *cst;
+	List	   *exprs = NIL;
+	Const	   *cst = NULL;
 	bool		expronleft;
 	Node	   *leftop,
 			   *rightop;
@@ -2042,22 +2073,26 @@ examine_opclause_args(List *args, Node **exprp, Const **cstp,
 
 	if (IsA(rightop, Const))
 	{
-		expr = (Node *) leftop;
+		exprs = lappend(exprs, leftop);
 		cst = (Const *) rightop;
 		expronleft = true;
 	}
 	else if (IsA(leftop, Const))
 	{
-		expr = (Node *) rightop;
+		exprs = lappend(exprs, rightop);
 		cst = (Const *) leftop;
 		expronleft = false;
 	}
 	else
-		return false;
+	{
+		exprs = lappend(exprs, leftop);
+		exprs = lappend(exprs, rightop);
+		expronleft = false;
+	}
 
 	/* return pointers to the extracted parts if requested */
-	if (exprp)
-		*exprp = expr;
+	if (exprsp)
+		*exprsp = exprs;
 
 	if (cstp)
 		*cstp = cst;
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index ef118952c7..85f650f572 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1645,78 +1645,154 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 
 			/* valid only after examine_opclause_args returns true */
 			Node	   *clause_expr;
+			Node	   *clause_expr2;
+			List	   *clause_exprs;
 			Const	   *cst;
 			bool		expronleft;
-			int			idx;
 			Oid			collid;
 
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
 			/* extract the var/expr and const from the expression */
-			if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
+			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft))
 				elog(ERROR, "incompatible clause");
 
-			/* match the attribute/expression to a dimension of the statistic */
-			idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
+			if (cst)	/* Expr op Const */
+			{
+				int idx;
 
-			Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
+				Assert(list_length(clause_exprs) == 1);
+				clause_expr = (Node *) linitial(clause_exprs);
 
-			/*
-			 * Walk through the MCV items and evaluate the current clause. We
-			 * can skip items that were already ruled out, and terminate if
-			 * there are no remaining MCV items that might possibly match.
-			 */
-			for (i = 0; i < mcvlist->nitems; i++)
-			{
-				bool		match = true;
-				MCVItem    *item = &mcvlist->items[i];
+				/* match the attribute/expression to a dimension of the statistic */
+				idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
 
-				Assert(idx >= 0);
+				Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
 
 				/*
-				 * When the MCV item or the Const value is NULL we can treat
-				 * this as a mismatch. We must not call the operator because
-				 * of strictness.
+				 * Walk through the MCV items and evaluate the current clause. We
+				 * can skip items that were already ruled out, and terminate if
+				 * there are no remaining MCV items that might possibly match.
 				 */
-				if (item->isnull[idx] || cst->constisnull)
+				for (i = 0; i < mcvlist->nitems; i++)
 				{
-					matches[i] = RESULT_MERGE(matches[i], is_or, false);
-					continue;
+					bool		match = true;
+					MCVItem    *item = &mcvlist->items[i];
+
+					Assert(idx >= 0);
+
+					/*
+					 * When the MCV item or the Const value is NULL we can treat
+					 * this as a mismatch. We must not call the operator because
+					 * of strictness.
+					 */
+					if (item->isnull[idx] || cst->constisnull)
+					{
+						matches[i] = RESULT_MERGE(matches[i], is_or, false);
+						continue;
+					}
+
+					/*
+					 * Skip MCV items that can't change result in the bitmap. Once
+					 * the value gets false for AND-lists, or true for OR-lists,
+					 * we don't need to look at more clauses.
+					 */
+					if (RESULT_IS_FINAL(matches[i], is_or))
+						continue;
+
+					/*
+					 * We don't store collations used to build the statistics, but
+					 * we can use the collation for the attribute itself, as
+					 * stored in varcollid. We do reset the statistics after a
+					 * type change (including collation change), so this is OK.
+					 * For expressions, we use the collation extracted from the
+					 * expression itself.
+					 */
+					if (expronleft)
+						match = DatumGetBool(FunctionCall2Coll(&opproc,
+															   collid,
+															   item->values[idx],
+															   cst->constvalue));
+					else
+						match = DatumGetBool(FunctionCall2Coll(&opproc,
+															   collid,
+															   cst->constvalue,
+															   item->values[idx]));
+
+					/* update the match bitmap with the result */
+					matches[i] = RESULT_MERGE(matches[i], is_or, match);
 				}
+			}
+			else	/* Expr op Expr */
+			{
+				int			idx;
+				int			idx2;
+
+				Assert(list_length(clause_exprs) == 2);
+
+				clause_expr = (Node *) linitial(clause_exprs);
+				clause_expr2 = (Node *) lsecond(clause_exprs);
+
+				Assert(clause_expr2);
+				Assert(!expronleft);
 
 				/*
-				 * Skip MCV items that can't change result in the bitmap. Once
-				 * the value gets false for AND-lists, or true for OR-lists,
-				 * we don't need to look at more clauses.
+				 * Match the expressions to a dimension of the statistic.
+				 *
+				 * XXX Can the collations differ?
 				 */
-				if (RESULT_IS_FINAL(matches[i], is_or))
-					continue;
+				idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
+				idx2 = mcv_match_expression(clause_expr2, keys, exprs, &collid);
+
+				Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
+				Assert((idx2 >= 0) && (idx2 < bms_num_members(keys) + list_length(exprs)));
 
 				/*
-				 * First check whether the constant is below the lower
-				 * boundary (in that case we can skip the bucket, because
-				 * there's no overlap).
-				 *
-				 * We don't store collations used to build the statistics, but
-				 * we can use the collation for the attribute itself, as
-				 * stored in varcollid. We do reset the statistics after a
-				 * type change (including collation change), so this is OK.
-				 * For expressions, we use the collation extracted from the
-				 * expression itself.
+				 * Walk through the MCV items and evaluate the current clause.
+				 * We can skip items that were already ruled out, and
+				 * terminate if there are no remaining MCV items that might
+				 * possibly match.
 				 */
-				if (expronleft)
+				for (i = 0; i < mcvlist->nitems; i++)
+				{
+					bool		match = true;
+					MCVItem    *item = &mcvlist->items[i];
+
+					/*
+					 * When either of the MCV items is NULL we can treat this
+					 * as a mismatch. We must not call the operator because
+					 * of strictness.
+					 */
+					if (item->isnull[idx] || item->isnull[idx2])
+					{
+						matches[i] = RESULT_MERGE(matches[i], is_or, false);
+						continue;
+					}
+
+					/*
+					 * Skip MCV items that can't change result in the bitmap.
+					 * Once the value gets false for AND-lists, or true for
+					 * OR-lists, we don't need to look at more clauses.
+					 */
+					if (RESULT_IS_FINAL(matches[i], is_or))
+						continue;
+
+					/*
+					 * We don't store collations used to build the statistics,
+					 * but we can use the collation for the attribute itself,
+					 * as stored in varcollid. We do reset the statistics after
+					 * a type change (including collation change), so this is
+					 * OK. We may need to relax this after allowing extended
+					 * statistics on expressions.
+					 */
 					match = DatumGetBool(FunctionCall2Coll(&opproc,
 														   collid,
 														   item->values[idx],
-														   cst->constvalue));
-				else
-					match = DatumGetBool(FunctionCall2Coll(&opproc,
-														   collid,
-														   cst->constvalue,
-														   item->values[idx]));
+														   item->values[idx2]));
 
-				/* update the match bitmap with the result */
-				matches[i] = RESULT_MERGE(matches[i], is_or, match);
+					/* update the match bitmap with the result */
+					matches[i] = RESULT_MERGE(matches[i], is_or, match);
+				}
 			}
 		}
 		else if (IsA(clause, ScalarArrayOpExpr))
@@ -1726,6 +1802,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 
 			/* valid only after examine_opclause_args returns true */
 			Node	   *clause_expr;
+			List	   *clause_exprs;
 			Const	   *cst;
 			bool		expronleft;
 			Oid			collid;
@@ -1743,11 +1820,14 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
 			/* extract the var/expr and const from the expression */
-			if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
+			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft))
 				elog(ERROR, "incompatible clause");
 
 			/* ScalarArrayOpExpr has the Var always on the left */
 			Assert(expronleft);
+			Assert(list_length(clause_exprs) == 1);
+
+			clause_expr = (Node *) linitial(clause_exprs);
 
 			/* XXX what if (cst->constisnull == NULL)? */
 			if (!cst->constisnull)
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 55cd9252a5..1f30fa9060 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -99,8 +99,8 @@ extern SortItem *build_sorted_items(StatsBuildData *data, int *nitems,
 									MultiSortSupport mss,
 									int numattrs, AttrNumber *attnums);
 
-extern bool examine_opclause_args(List *args, Node **exprp,
-								  Const **cstp, bool *expronleftp);
+extern bool examine_opclause_args(List *args, List **exprs, Const **cstp,
+								  bool *expronleftp);
 
 extern Selectivity mcv_combine_selectivities(Selectivity simple_sel,
 											 Selectivity mcv_sel,
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 7fb54de53d..301227079c 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -1904,6 +1904,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
        343 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual 
+-----------+--------
+      1667 |   3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual 
+-----------+--------
+      1667 |      0
+(1 row)
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
  estimated | actual 
 -----------+--------
@@ -2051,6 +2063,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
        200 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual 
+-----------+--------
+      3750 |   3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
  estimated | actual 
 -----------+--------
@@ -2460,6 +2484,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND
       3750 |   2500
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+ estimated | actual 
+-----------+--------
+        25 |   2500
+(1 row)
+
 -- create statistics
 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists;
 ANALYZE mcv_lists;
@@ -2493,6 +2523,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND
       2500 |   2500
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+ estimated | actual 
+-----------+--------
+      2500 |   2500
+(1 row)
+
 -- mcv with pass-by-ref fixlen types, e.g. uuid
 CREATE TABLE mcv_lists_uuid (
     a UUID,
@@ -2587,6 +2623,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
       1094 |      0
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+ estimated | actual 
+-----------+--------
+      9950 |   2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+ estimated | actual 
+-----------+--------
+        50 |   2500
+(1 row)
+
 CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
   FROM mcv_lists_bool;
 ANALYZE mcv_lists_bool;
@@ -2742,6 +2790,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0
 (1 row)
 
 DROP TABLE mcv_lists_partial;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+ estimated | actual 
+-----------+--------
+      2500 |   2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+ estimated | actual 
+-----------+--------
+      2500 |   2500
+(1 row)
+
 -- check the ability to use multiple MCV lists
 CREATE TABLE mcv_lists_multi (
 	a INTEGER,
@@ -2801,6 +2861,24 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR
       2649 |   1572
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+ estimated | actual 
+-----------+--------
+         1 |   5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+ estimated | actual 
+-----------+--------
+      1667 |      0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
+ estimated | actual 
+-----------+--------
+      1667 |   5000
+(1 row)
+
 -- create separate MCV statistics
 CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
 CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi;
@@ -2847,6 +2925,24 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR
       1571 |   1572
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+ estimated | actual 
+-----------+--------
+      5000 |   5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
+ estimated | actual 
+-----------+--------
+      5000 |   5000
+(1 row)
+
 DROP TABLE mcv_lists_multi;
 -- statistics on integer expressions
 CREATE TABLE expr_stats (a int, b int, c int);
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index d563c4654c..b0cb0f1924 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -946,6 +946,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
@@ -999,6 +1003,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
@@ -1198,6 +1206,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+
 -- create statistics
 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists;
 
@@ -1216,6 +1226,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+
 -- mcv with pass-by-ref fixlen types, e.g. uuid
 CREATE TABLE mcv_lists_uuid (
     a UUID,
@@ -1291,6 +1303,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+
 CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
   FROM mcv_lists_bool;
 
@@ -1376,6 +1392,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0
 
 DROP TABLE mcv_lists_partial;
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+
 -- check the ability to use multiple MCV lists
 CREATE TABLE mcv_lists_multi (
 	a INTEGER,
@@ -1403,6 +1423,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
 
 -- create separate MCV statistics
 CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
@@ -1417,6 +1440,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
 
 DROP TABLE mcv_lists_multi;
 
-- 
2.31.1

>From 743015add200d8041737f364281139ae0b5a5766 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Wed, 18 Aug 2021 11:53:45 +0200
Subject: [PATCH 3/3] Don't treat Var op Var as simple clauses

---
 src/backend/statistics/extended_stats.c       | 60 ++++++++++++++-----
 src/backend/statistics/mcv.c                  |  4 +-
 .../statistics/extended_stats_internal.h      |  2 +-
 3 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 606cf8c588..f555ace022 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1319,7 +1319,7 @@ choose_best_statistics(List *stats, char requiredkind,
 static bool
 statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 									  Index relid, Bitmapset **attnums,
-									  List **exprs)
+									  List **exprs, bool *issimple)
 {
 	/* Look inside any binary-compatible relabeling (as in examine_variable) */
 	if (IsA(clause, RelabelType))
@@ -1343,6 +1343,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			return false;
 
 		*attnums = bms_add_member(*attnums, var->varattno);
+		*issimple = true;
 
 		return true;
 	}
@@ -1367,7 +1368,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		 * Check if the expression has the right shape. This returns either one
 		 * or two expressions, depending on whether there is a Const.
 		 */
-		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL))
+		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL, issimple))
 			return false;
 
 		/*
@@ -1420,16 +1421,20 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		foreach (lc, clause_exprs)
 		{
 			Node *clause_expr = (Node *) lfirst(lc);
+			bool	tmp = false;
 
 			if (IsA(clause_expr, Var))
 			{
 				/* if the Var is incompatible, the whole clause is incompatible */
 				if (!statext_is_compatible_clause_internal(root, clause_expr,
-														   relid, attnums, exprs))
+														   relid, attnums, exprs,
+														   &tmp))
 					return false;
 			}
 			else	/* generic expression */
 				*exprs = lappend(*exprs, clause_expr);
+
+			*issimple = (*issimple && tmp);
 		}
 
 		return true;
@@ -1448,7 +1453,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			return false;
 
 		/* Check if the expression has the right shape (one Var, one Const) */
-		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL))
+		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL, issimple))
 			return false;
 
 		/* There has to be one expression exactly. */
@@ -1496,7 +1501,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		/* Check Var IN Array clauses by recursing. */
 		if (IsA(clause_expr, Var))
 			return statext_is_compatible_clause_internal(root, clause_expr,
-														 relid, attnums, exprs);
+														 relid, attnums, exprs,
+														 issimple);
 
 		/* Otherwise we have Expr IN Array. */
 		*exprs = lappend(*exprs, clause_expr);
@@ -1533,8 +1539,11 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			 */
 			if (!statext_is_compatible_clause_internal(root,
 													   (Node *) lfirst(lc),
-													   relid, attnums, exprs))
+													   relid, attnums, exprs,
+													   issimple))
 				return false;
+
+			*issimple = false;
 		}
 
 		return true;
@@ -1548,7 +1557,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		/* Check Var IS NULL clauses by recursing. */
 		if (IsA(nt->arg, Var))
 			return statext_is_compatible_clause_internal(root, (Node *) (nt->arg),
-														 relid, attnums, exprs);
+														 relid, attnums, exprs,
+														 issimple);
 
 		/* Otherwise we have Expr IS NULL. */
 		*exprs = lappend(*exprs, nt->arg);
@@ -1584,13 +1594,15 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
  */
 static bool
 statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
-							 Bitmapset **attnums, List **exprs)
+							 Bitmapset **attnums, List **exprs, bool *issimple)
 {
 	RangeTblEntry *rte = root->simple_rte_array[relid];
 	RestrictInfo *rinfo = (RestrictInfo *) clause;
 	int			clause_relid;
 	Oid			userid;
 
+	*issimple = true;
+
 	/*
 	 * Special-case handling for bare BoolExpr AND clauses, because the
 	 * restrictinfo machinery doesn't build RestrictInfos on top of AND
@@ -1608,8 +1620,11 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
 		foreach(lc, expr->args)
 		{
 			if (!statext_is_compatible_clause(root, (Node *) lfirst(lc),
-											  relid, attnums, exprs))
+											  relid, attnums, exprs,
+											  issimple))
 				return false;
+
+			*issimple = false;
 		}
 
 		return true;
@@ -1630,7 +1645,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
 
 	/* Check the clause and determine what attributes it references. */
 	if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause,
-											   relid, attnums, exprs))
+											   relid, attnums, exprs, issimple))
 		return false;
 
 	/*
@@ -1720,6 +1735,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	ListCell   *l;
 	Bitmapset **list_attnums;	/* attnums extracted from the clause */
 	List	  **list_exprs;		/* expressions matched to any statistic */
+	bool	  *list_simple;		/* marks simple expressions */
 	int			listidx;
 	Selectivity sel = (is_or) ? 0.0 : 1.0;
 
@@ -1733,6 +1749,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	/* expressions extracted from complex expressions */
 	list_exprs = (List **) palloc(sizeof(Node *) * list_length(clauses));
 
+	/* expressions determined to be simple (single expression) */
+	list_simple = (bool *) palloc(sizeof(bool) * list_length(clauses));
+
 	/*
 	 * Pre-process the clauses list to extract the attnums and expressions
 	 * seen in each item.  We need to determine if there are any clauses which
@@ -1750,17 +1769,21 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		Node	   *clause = (Node *) lfirst(l);
 		Bitmapset  *attnums = NULL;
 		List	   *exprs = NIL;
+		bool		issimple = false;
 
 		if (!bms_is_member(listidx, *estimatedclauses) &&
-			statext_is_compatible_clause(root, clause, rel->relid, &attnums, &exprs))
+			statext_is_compatible_clause(root, clause, rel->relid,
+										 &attnums, &exprs, &issimple))
 		{
 			list_attnums[listidx] = attnums;
 			list_exprs[listidx] = exprs;
+			list_simple[listidx] = issimple;
 		}
 		else
 		{
 			list_attnums[listidx] = NULL;
 			list_exprs[listidx] = NIL;
+			list_simple[listidx] = false;
 		}
 
 		listidx++;
@@ -1797,6 +1820,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		listidx = -1;
 		foreach(l, clauses)
 		{
+			Node *clause = (Node *) lfirst(l);
+
 			/* Increment the index before we decide if to skip the clause. */
 			listidx++;
 
@@ -1836,12 +1861,12 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 			if ((list_attnums[listidx] == NULL &&
 				 list_length(list_exprs[listidx]) == 1) ||
 				(list_exprs[listidx] == NIL &&
-				 bms_membership(list_attnums[listidx]) == BMS_SINGLETON))
+				 list_simple[listidx]))
 				simple_clauses = bms_add_member(simple_clauses,
 												list_length(stat_clauses));
 
 			/* add clause to list and mark it as estimated */
-			stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
+			stat_clauses = lappend(stat_clauses, clause);
 			*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
 
 			/*
@@ -2050,13 +2075,15 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
  * specifies on which side of the operator we found the expression node.
  */
 bool
-examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp)
+examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp,
+					  bool *issimplep)
 {
 	List	   *exprs = NIL;
 	Const	   *cst = NULL;
 	bool		expronleft;
 	Node	   *leftop,
 			   *rightop;
+	bool		issimple;
 
 	/* enforced by statext_is_compatible_clause_internal */
 	Assert(list_length(args) == 2);
@@ -2082,12 +2109,14 @@ examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp
 		exprs = lappend(exprs, rightop);
 		cst = (Const *) leftop;
 		expronleft = false;
+		issimple = true;
 	}
 	else
 	{
 		exprs = lappend(exprs, leftop);
 		exprs = lappend(exprs, rightop);
 		expronleft = false;
+		issimple = true;
 	}
 
 	/* return pointers to the extracted parts if requested */
@@ -2100,6 +2129,9 @@ examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp
 	if (expronleftp)
 		*expronleftp = expronleft;
 
+	if (issimplep)
+		*issimplep = issimple;
+
 	return true;
 }
 
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 85f650f572..ad8172db90 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1654,7 +1654,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
 			/* extract the var/expr and const from the expression */
-			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft))
+			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft, NULL))
 				elog(ERROR, "incompatible clause");
 
 			if (cst)	/* Expr op Const */
@@ -1820,7 +1820,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
 			/* extract the var/expr and const from the expression */
-			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft))
+			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft, NULL))
 				elog(ERROR, "incompatible clause");
 
 			/* ScalarArrayOpExpr has the Var always on the left */
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 1f30fa9060..d86cc4184b 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -100,7 +100,7 @@ extern SortItem *build_sorted_items(StatsBuildData *data, int *nitems,
 									int numattrs, AttrNumber *attnums);
 
 extern bool examine_opclause_args(List *args, List **exprs, Const **cstp,
-								  bool *expronleftp);
+								  bool *expronleftp, bool *issimplep);
 
 extern Selectivity mcv_combine_selectivities(Selectivity simple_sel,
 											 Selectivity mcv_sel,
-- 
2.31.1

Reply via email to