Hi,

I finally got around to this patch again, focusing mostly on the first part that simply returns either 1.0 or 0.0 for Var op Var conditions (i.e. the part not really using extended statistics).

I have been unhappy about using examine_variable, which does various expensive things like searching for statistics (which only got worse because now we're also looking for expression stats). But we don't really need the stats - we just need to check the Vars match (same relation, same attribute). So 0002 fixes this.

Which got me thinking that maybe we don't need to restrict this to Var nodes only. We can just as easily compare arbitrary expressions, provided it's for the same relation and there are no volatile functions. So 0003 does this. Conditions with the same complex expression on each side of an operator are probably fairly rare, but it's cheap so why not.

0004 and 0005 parts are unchanged.


The next steps is adding some tests to the first parts, and extending the tests in the main patch (to also use more complex expressions, if 0003 gets included).


regards

--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
From 91371c3e4252580a30530d5b625f72d5525c5c73 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Thu, 26 Aug 2021 23:01:04 +0200
Subject: [PATCH 1/5] Improve estimates for Var op Var with the same Var

When estimating (Var op Var) conditions, we can treat the case with the
same Var on both sides as a special case, and we can provide better
selectivity estimate than for the generic case.

For example for (a = a) we know it's 1.0, because all rows are expected
to match. Similarly for (a != a) , wich has selectivity 0.0. And the
same logic can be applied to inequality comparisons, like (a < a) etc.

In principle, those clauses are a bit strange and queries are unlikely
to use them. But query generators sometimes do silly things, and these
checks are quite cheap so it's likely a win.
---
 src/backend/utils/adt/selfuncs.c | 77 +++++++++++++++++++++++++++++++-
 1 file changed, 76 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 10895fb287..59038895ec 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -210,7 +210,8 @@ static bool get_actual_variable_endpoint(Relation heapRel,
 										 MemoryContext outercontext,
 										 Datum *endpointDatum);
 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
-
+static bool matching_restriction_variables(PlannerInfo *root, List *args,
+										   int varRelid);
 
 /*
  *		eqsel			- Selectivity of "=" for any data types.
@@ -256,6 +257,14 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
 		}
 	}
 
+	/*
+	 * It it's (variable = variable) with the same variable on both sides, it's
+	 * a special case and we know it's not expected to filter anything, so we
+	 * estimate the selectivity as 1.0 (or 0.0 if it's negated).
+	 */
+	if (matching_restriction_variables(root, args, varRelid))
+		return (negate) ? 0.0 : 1.0;
+
 	/*
 	 * If expression is not variable = something or something = variable, then
 	 * punt and return a default estimate.
@@ -1408,6 +1417,22 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
 	Oid			consttype;
 	double		selec;
 
+	/*
+	 * Handle (variable < variable) and (variable <= variable) with the same
+	 * variable on both sides as a special case. The strict inequality should
+	 * not match any rows, hence selectivity is 0.0. The other case is about
+	 * the same as equality, so selectivity is 1.0.
+	 */
+	if (matching_restriction_variables(root, args, varRelid))
+	{
+		/* The case with equality matches all rows, so estimate it as 1.0. */
+		if (iseq)
+			PG_RETURN_FLOAT8(1.0);
+
+		/* Strict inequality matches nothing, so selectivity is 0.0. */
+		PG_RETURN_FLOAT8(0.0);
+	}
+
 	/*
 	 * If expression is not variable op something or something op variable,
 	 * then punt and return a default estimate.
@@ -4871,6 +4896,56 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
 	return false;
 }
 
+
+/*
+ * matching_restriction_variable
+ *		Examine the args of a restriction clause to see if it's of the
+ *		form (variable op variable) with the same variable on both sides.
+ *
+ * Inputs:
+ *	root: the planner info
+ *	args: clause argument list
+ *	varRelid: see specs for restriction selectivity functions
+ *
+ * Returns true if the same variable is on both sides, otherwise false.
+ */
+static bool
+matching_restriction_variables(PlannerInfo *root, List *args, int varRelid)
+{
+	Node	   *left,
+			   *right;
+	VariableStatData ldata;
+	VariableStatData rdata;
+	bool		res = false;
+
+	/* Fail if not a binary opclause (probably shouldn't happen) */
+	if (list_length(args) != 2)
+		return false;
+
+	left = (Node *) linitial(args);
+	right = (Node *) lsecond(args);
+
+	/*
+	 * Examine both sides.  Note that when varRelid is nonzero, Vars of other
+	 * relations will be treated as pseudoconstants.
+	 */
+	examine_variable(root, left, varRelid, &ldata);
+	examine_variable(root, right, varRelid, &rdata);
+
+	/*
+	 * If both sides are variable, and are equal, we win.
+	 */
+	if ((ldata.rel != NULL && rdata.rel != NULL) &&
+		equal(ldata.var, rdata.var))
+		res = true;
+
+	/* We don't need the stats. */
+	ReleaseVariableStats(ldata);
+	ReleaseVariableStats(rdata);
+
+	return res;
+}
+
 /*
  * get_join_variables
  *		Apply examine_variable() to each side of a join clause.
-- 
2.31.1

From 8e454f1aae54317d1d346012c06179ba62fbef47 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Mon, 13 Dec 2021 02:28:27 +0100
Subject: [PATCH 2/5] simplification

---
 src/backend/utils/adt/selfuncs.c | 46 +++++++++++++++++---------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 59038895ec..1031a0fd9e 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -260,7 +260,7 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
 	/*
 	 * It it's (variable = variable) with the same variable on both sides, it's
 	 * a special case and we know it's not expected to filter anything, so we
-	 * estimate the selectivity as 1.0 (or 0.0 if it's negated).
+	 * estimate the selectivity as 1.0 (or 0.0 when it's negated).
 	 */
 	if (matching_restriction_variables(root, args, varRelid))
 		return (negate) ? 0.0 : 1.0;
@@ -4896,11 +4896,11 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
 	return false;
 }
 
-
 /*
  * matching_restriction_variable
- *		Examine the args of a restriction clause to see if it's of the
- *		form (variable op variable) with the same variable on both sides.
+ *		Check if the two arguments of a restriction clause refer to the same
+ *		variable, i.e. if the condition is of the form (variable op variable).
+ *		We can deduce selectivity for such (in)equality clauses.
  *
  * Inputs:
  *	root: the planner info
@@ -4914,9 +4914,7 @@ matching_restriction_variables(PlannerInfo *root, List *args, int varRelid)
 {
 	Node	   *left,
 			   *right;
-	VariableStatData ldata;
-	VariableStatData rdata;
-	bool		res = false;
+	Var		   *lvar;
 
 	/* Fail if not a binary opclause (probably shouldn't happen) */
 	if (list_length(args) != 2)
@@ -4925,25 +4923,29 @@ matching_restriction_variables(PlannerInfo *root, List *args, int varRelid)
 	left = (Node *) linitial(args);
 	right = (Node *) lsecond(args);
 
-	/*
-	 * Examine both sides.  Note that when varRelid is nonzero, Vars of other
-	 * relations will be treated as pseudoconstants.
-	 */
-	examine_variable(root, left, varRelid, &ldata);
-	examine_variable(root, right, varRelid, &rdata);
+	/* Look inside any binary-compatible relabeling */
+
+	if (IsA(left, RelabelType))
+		left = (Node *) ((RelabelType *) left)->arg;
+
+	if (IsA(right, RelabelType))
+		right = (Node *) ((RelabelType *) right)->arg;
+
+	/* We only care about simple Vars from our relation for now */
+	if (!IsA(left, Var) || !IsA(right, Var))
+		return false;
+
+	lvar = (Var *) left;
 
 	/*
-	 * If both sides are variable, and are equal, we win.
+	 * We only check one variable, because we call equals() later, which checks
+	 * the other variable automatically.
 	 */
-	if ((ldata.rel != NULL && rdata.rel != NULL) &&
-		equal(ldata.var, rdata.var))
-		res = true;
-
-	/* We don't need the stats. */
-	ReleaseVariableStats(ldata);
-	ReleaseVariableStats(rdata);
+	if ((varRelid != 0) && (varRelid != lvar->varno))
+		return false;
 
-	return res;
+	/* The two variables need to match */
+	return equal(left, right);
 }
 
 /*
-- 
2.31.1

From d1b54830eea71b6afb465d4c86d9a4a92742dc98 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Mon, 13 Dec 2021 02:48:58 +0100
Subject: [PATCH 3/5] relax the restrictions

---
 src/backend/utils/adt/selfuncs.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1031a0fd9e..6b0dde66bf 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -4914,7 +4914,7 @@ matching_restriction_variables(PlannerInfo *root, List *args, int varRelid)
 {
 	Node	   *left,
 			   *right;
-	Var		   *lvar;
+	Bitmapset  *varnos;
 
 	/* Fail if not a binary opclause (probably shouldn't happen) */
 	if (list_length(args) != 2)
@@ -4931,17 +4931,24 @@ matching_restriction_variables(PlannerInfo *root, List *args, int varRelid)
 	if (IsA(right, RelabelType))
 		right = (Node *) ((RelabelType *) right)->arg;
 
-	/* We only care about simple Vars from our relation for now */
-	if (!IsA(left, Var) || !IsA(right, Var))
+	/*
+	 * Check if it's safe to compare the two expressions. The expressions
+	 * must not contain any volatile expressions, and must belong to the
+	 * same relation.
+	 *
+	 * XXX We do a small trick - we validate just one expression, and then
+	 * check it's equal to the other side.
+	 */
+	if (contain_volatile_functions(left))
 		return false;
 
-	lvar = (Var *) left;
+	/* Check it belongs to a single relation. */
+	varnos = pull_varnos(root, left);
 
-	/*
-	 * We only check one variable, because we call equals() later, which checks
-	 * the other variable automatically.
-	 */
-	if ((varRelid != 0) && (varRelid != lvar->varno))
+	if (bms_num_members(varnos) != 1)
+		return false;
+
+	if ((varRelid != 0) && (!bms_is_member(varRelid, varnos)))
 		return false;
 
 	/* The two variables need to match */
-- 
2.31.1

From 1f6949e9eadcb961738e4314414682e179462d16 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Sun, 15 Aug 2021 13:23:13 +0200
Subject: [PATCH 4/5] main patch

---
 src/backend/optimizer/path/clausesel.c        |  37 +++-
 src/backend/statistics/extended_stats.c       |  83 ++++++---
 src/backend/statistics/mcv.c                  | 172 +++++++++++++-----
 .../statistics/extended_stats_internal.h      |   4 +-
 src/test/regress/expected/stats_ext.out       |  96 ++++++++++
 src/test/regress/sql/stats_ext.sql            |  26 +++
 6 files changed, 341 insertions(+), 77 deletions(-)

diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index d263ecf082..6a7e9ceea5 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -714,6 +714,7 @@ clause_selectivity_ext(PlannerInfo *root,
 	Selectivity s1 = 0.5;		/* default for any unhandled clause type */
 	RestrictInfo *rinfo = NULL;
 	bool		cacheable = false;
+	Node	   *src = clause;
 
 	if (clause == NULL)			/* can this still happen? */
 		return s1;
@@ -871,11 +872,37 @@ clause_selectivity_ext(PlannerInfo *root,
 		}
 		else
 		{
-			/* Estimate selectivity for a restriction clause. */
-			s1 = restriction_selectivity(root, opno,
-										 opclause->args,
-										 opclause->inputcollid,
-										 varRelid);
+			/*
+			 * It might be a single (Expr op Expr) clause, which goes here due
+			 * to the optimization at the beginning of clauselist_selectivity.
+			 * So we try applying extended stats first, and then fall back to
+			 * restriction_selectivity.
+			 */
+			bool	estimated = false;
+
+			if (use_extended_stats)
+			{
+				List	   *clauses = list_make1(src);
+				RelOptInfo *rel = find_single_rel_for_clauses(root, clauses);
+
+				if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+				{
+					Bitmapset  *estimatedclauses = NULL;
+
+					s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+														jointype, sjinfo, rel,
+														&estimatedclauses, false);
+
+					estimated = (bms_num_members(estimatedclauses) == 1);
+				}
+			}
+
+			/* Estimate selectivity for a restriction clause (fallback). */
+			if (!estimated)
+				s1 = restriction_selectivity(root, opno,
+											 opclause->args,
+											 opclause->inputcollid,
+											 varRelid);
 		}
 
 		/*
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 69ca52094f..e1fcfd0711 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1351,19 +1351,27 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		return true;
 	}
 
-	/* (Var/Expr op Const) or (Const op Var/Expr) */
+	/*
+	 * Three opclause variants are supported: (Expr op Const), (Const op Expr),
+	 * (Expr op Expr). That means we may need to analyze one or two expressions
+	 * to make sure the opclause is compatible with extended stats.
+	 */
 	if (is_opclause(clause))
 	{
 		RangeTblEntry *rte = root->simple_rte_array[relid];
 		OpExpr	   *expr = (OpExpr *) clause;
-		Node	   *clause_expr;
+		ListCell   *lc;
+		List	   *clause_exprs;
 
 		/* Only expressions with two arguments are considered compatible. */
 		if (list_length(expr->args) != 2)
 			return false;
 
-		/* Check if the expression has the right shape */
-		if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
+		/*
+		 * Check if the expression has the right shape. This returns either one
+		 * or two expressions, depending on whether there is a Const.
+		 */
+		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL))
 			return false;
 
 		/*
@@ -1403,13 +1411,31 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			!get_func_leakproof(get_opcode(expr->opno)))
 			return false;
 
-		/* Check (Var op Const) or (Const op Var) clauses by recursing. */
-		if (IsA(clause_expr, Var))
-			return statext_is_compatible_clause_internal(root, clause_expr,
-														 relid, attnums, exprs);
+		/*
+		 * There's always at least one expression, otherwise the clause would
+		 * not be considered compatible.
+		 */
+		Assert(list_length(clause_exprs) >= 1);
+
+		/*
+		 * Check all expressions by recursing. Var expressions are handled as
+		 * a special case (to match it to attnums etc.)
+		 */
+		foreach (lc, clause_exprs)
+		{
+			Node *clause_expr = (Node *) lfirst(lc);
+
+			if (IsA(clause_expr, Var))
+			{
+				/* if the Var is incompatible, the whole clause is incompatible */
+				if (!statext_is_compatible_clause_internal(root, clause_expr,
+														   relid, attnums, exprs))
+					return false;
+			}
+			else	/* generic expression */
+				*exprs = lappend(*exprs, clause_expr);
+		}
 
-		/* Otherwise we have (Expr op Const) or (Const op Expr). */
-		*exprs = lappend(*exprs, clause_expr);
 		return true;
 	}
 
@@ -1419,15 +1445,21 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		RangeTblEntry *rte = root->simple_rte_array[relid];
 		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
 		Node	   *clause_expr;
+		List	   *clause_exprs;
 
 		/* Only expressions with two arguments are considered compatible. */
 		if (list_length(expr->args) != 2)
 			return false;
 
 		/* Check if the expression has the right shape (one Var, one Const) */
-		if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
+		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL))
 			return false;
 
+		/* There has to be one expression exactly. */
+		Assert(list_length(clause_exprs) == 1);
+
+		clause_expr = (Node *) linitial(clause_exprs);
+
 		/*
 		 * If it's not one of the supported operators ("=", "<", ">", etc.),
 		 * just ignore the clause, as it's not compatible with MCV lists.
@@ -2013,20 +2045,19 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
  * examine_opclause_args
  *		Split an operator expression's arguments into Expr and Const parts.
  *
- * Attempts to match the arguments to either (Expr op Const) or (Const op
- * Expr), possibly with a RelabelType on top. When the expression matches this
- * form, returns true, otherwise returns false.
+ * Attempts to match the arguments to either (Expr op Const) or (Const op Expr)
+ * or (Expr op Expr), possibly with a RelabelType on top. When the expression
+ * matches this form, returns true, otherwise returns false.
  *
  * Optionally returns pointers to the extracted Expr/Const nodes, when passed
- * non-null pointers (exprp, cstp and expronleftp). The expronleftp flag
+ * non-null pointers (exprsp, cstp and expronleftp). The expronleftp flag
  * specifies on which side of the operator we found the expression node.
  */
 bool
-examine_opclause_args(List *args, Node **exprp, Const **cstp,
-					  bool *expronleftp)
+examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp)
 {
-	Node	   *expr;
-	Const	   *cst;
+	List	   *exprs = NIL;
+	Const	   *cst = NULL;
 	bool		expronleft;
 	Node	   *leftop,
 			   *rightop;
@@ -2046,22 +2077,26 @@ examine_opclause_args(List *args, Node **exprp, Const **cstp,
 
 	if (IsA(rightop, Const))
 	{
-		expr = (Node *) leftop;
+		exprs = lappend(exprs, leftop);
 		cst = (Const *) rightop;
 		expronleft = true;
 	}
 	else if (IsA(leftop, Const))
 	{
-		expr = (Node *) rightop;
+		exprs = lappend(exprs, rightop);
 		cst = (Const *) leftop;
 		expronleft = false;
 	}
 	else
-		return false;
+	{
+		exprs = lappend(exprs, leftop);
+		exprs = lappend(exprs, rightop);
+		expronleft = false;
+	}
 
 	/* return pointers to the extracted parts if requested */
-	if (exprp)
-		*exprp = expr;
+	if (exprsp)
+		*exprsp = exprs;
 
 	if (cstp)
 		*cstp = cst;
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index b350fc5f7b..ac7d7b8978 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1644,78 +1644,154 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 
 			/* valid only after examine_opclause_args returns true */
 			Node	   *clause_expr;
+			Node	   *clause_expr2;
+			List	   *clause_exprs;
 			Const	   *cst;
 			bool		expronleft;
-			int			idx;
 			Oid			collid;
 
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
 			/* extract the var/expr and const from the expression */
-			if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
+			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft))
 				elog(ERROR, "incompatible clause");
 
-			/* match the attribute/expression to a dimension of the statistic */
-			idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
+			if (cst)	/* Expr op Const */
+			{
+				int idx;
 
-			Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
+				Assert(list_length(clause_exprs) == 1);
+				clause_expr = (Node *) linitial(clause_exprs);
 
-			/*
-			 * Walk through the MCV items and evaluate the current clause. We
-			 * can skip items that were already ruled out, and terminate if
-			 * there are no remaining MCV items that might possibly match.
-			 */
-			for (i = 0; i < mcvlist->nitems; i++)
-			{
-				bool		match = true;
-				MCVItem    *item = &mcvlist->items[i];
+				/* match the attribute/expression to a dimension of the statistic */
+				idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
 
-				Assert(idx >= 0);
+				Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
 
 				/*
-				 * When the MCV item or the Const value is NULL we can treat
-				 * this as a mismatch. We must not call the operator because
-				 * of strictness.
+				 * Walk through the MCV items and evaluate the current clause. We
+				 * can skip items that were already ruled out, and terminate if
+				 * there are no remaining MCV items that might possibly match.
 				 */
-				if (item->isnull[idx] || cst->constisnull)
+				for (i = 0; i < mcvlist->nitems; i++)
 				{
-					matches[i] = RESULT_MERGE(matches[i], is_or, false);
-					continue;
+					bool		match = true;
+					MCVItem    *item = &mcvlist->items[i];
+
+					Assert(idx >= 0);
+
+					/*
+					 * When the MCV item or the Const value is NULL we can treat
+					 * this as a mismatch. We must not call the operator because
+					 * of strictness.
+					 */
+					if (item->isnull[idx] || cst->constisnull)
+					{
+						matches[i] = RESULT_MERGE(matches[i], is_or, false);
+						continue;
+					}
+
+					/*
+					 * Skip MCV items that can't change result in the bitmap. Once
+					 * the value gets false for AND-lists, or true for OR-lists,
+					 * we don't need to look at more clauses.
+					 */
+					if (RESULT_IS_FINAL(matches[i], is_or))
+						continue;
+
+					/*
+					 * We don't store collations used to build the statistics, but
+					 * we can use the collation for the attribute itself, as
+					 * stored in varcollid. We do reset the statistics after a
+					 * type change (including collation change), so this is OK.
+					 * For expressions, we use the collation extracted from the
+					 * expression itself.
+					 */
+					if (expronleft)
+						match = DatumGetBool(FunctionCall2Coll(&opproc,
+															   collid,
+															   item->values[idx],
+															   cst->constvalue));
+					else
+						match = DatumGetBool(FunctionCall2Coll(&opproc,
+															   collid,
+															   cst->constvalue,
+															   item->values[idx]));
+
+					/* update the match bitmap with the result */
+					matches[i] = RESULT_MERGE(matches[i], is_or, match);
 				}
+			}
+			else	/* Expr op Expr */
+			{
+				int			idx;
+				int			idx2;
+
+				Assert(list_length(clause_exprs) == 2);
+
+				clause_expr = (Node *) linitial(clause_exprs);
+				clause_expr2 = (Node *) lsecond(clause_exprs);
+
+				Assert(clause_expr2);
+				Assert(!expronleft);
 
 				/*
-				 * Skip MCV items that can't change result in the bitmap. Once
-				 * the value gets false for AND-lists, or true for OR-lists,
-				 * we don't need to look at more clauses.
+				 * Match the expressions to a dimension of the statistic.
+				 *
+				 * XXX Can the collations differ?
 				 */
-				if (RESULT_IS_FINAL(matches[i], is_or))
-					continue;
+				idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
+				idx2 = mcv_match_expression(clause_expr2, keys, exprs, &collid);
+
+				Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
+				Assert((idx2 >= 0) && (idx2 < bms_num_members(keys) + list_length(exprs)));
 
 				/*
-				 * First check whether the constant is below the lower
-				 * boundary (in that case we can skip the bucket, because
-				 * there's no overlap).
-				 *
-				 * We don't store collations used to build the statistics, but
-				 * we can use the collation for the attribute itself, as
-				 * stored in varcollid. We do reset the statistics after a
-				 * type change (including collation change), so this is OK.
-				 * For expressions, we use the collation extracted from the
-				 * expression itself.
+				 * Walk through the MCV items and evaluate the current clause.
+				 * We can skip items that were already ruled out, and
+				 * terminate if there are no remaining MCV items that might
+				 * possibly match.
 				 */
-				if (expronleft)
+				for (i = 0; i < mcvlist->nitems; i++)
+				{
+					bool		match = true;
+					MCVItem    *item = &mcvlist->items[i];
+
+					/*
+					 * When either of the MCV items is NULL we can treat this
+					 * as a mismatch. We must not call the operator because
+					 * of strictness.
+					 */
+					if (item->isnull[idx] || item->isnull[idx2])
+					{
+						matches[i] = RESULT_MERGE(matches[i], is_or, false);
+						continue;
+					}
+
+					/*
+					 * Skip MCV items that can't change result in the bitmap.
+					 * Once the value gets false for AND-lists, or true for
+					 * OR-lists, we don't need to look at more clauses.
+					 */
+					if (RESULT_IS_FINAL(matches[i], is_or))
+						continue;
+
+					/*
+					 * We don't store collations used to build the statistics,
+					 * but we can use the collation for the attribute itself,
+					 * as stored in varcollid. We do reset the statistics after
+					 * a type change (including collation change), so this is
+					 * OK. We may need to relax this after allowing extended
+					 * statistics on expressions.
+					 */
 					match = DatumGetBool(FunctionCall2Coll(&opproc,
 														   collid,
 														   item->values[idx],
-														   cst->constvalue));
-				else
-					match = DatumGetBool(FunctionCall2Coll(&opproc,
-														   collid,
-														   cst->constvalue,
-														   item->values[idx]));
+														   item->values[idx2]));
 
-				/* update the match bitmap with the result */
-				matches[i] = RESULT_MERGE(matches[i], is_or, match);
+					/* update the match bitmap with the result */
+					matches[i] = RESULT_MERGE(matches[i], is_or, match);
+				}
 			}
 		}
 		else if (IsA(clause, ScalarArrayOpExpr))
@@ -1725,6 +1801,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 
 			/* valid only after examine_opclause_args returns true */
 			Node	   *clause_expr;
+			List	   *clause_exprs;
 			Const	   *cst;
 			bool		expronleft;
 			Oid			collid;
@@ -1742,11 +1819,14 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
 			/* extract the var/expr and const from the expression */
-			if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
+			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft))
 				elog(ERROR, "incompatible clause");
 
 			/* ScalarArrayOpExpr has the Var always on the left */
 			Assert(expronleft);
+			Assert(list_length(clause_exprs) == 1);
+
+			clause_expr = (Node *) linitial(clause_exprs);
 
 			/* XXX what if (cst->constisnull == NULL)? */
 			if (!cst->constisnull)
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 55cd9252a5..1f30fa9060 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -99,8 +99,8 @@ extern SortItem *build_sorted_items(StatsBuildData *data, int *nitems,
 									MultiSortSupport mss,
 									int numattrs, AttrNumber *attnums);
 
-extern bool examine_opclause_args(List *args, Node **exprp,
-								  Const **cstp, bool *expronleftp);
+extern bool examine_opclause_args(List *args, List **exprs, Const **cstp,
+								  bool *expronleftp);
 
 extern Selectivity mcv_combine_selectivities(Selectivity simple_sel,
 											 Selectivity mcv_sel,
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index c60ba45aba..cabc97c43b 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -1917,6 +1917,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
        343 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual 
+-----------+--------
+      1667 |   3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual 
+-----------+--------
+      1667 |      0
+(1 row)
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
  estimated | actual 
 -----------+--------
@@ -2064,6 +2076,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
        200 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual 
+-----------+--------
+      3750 |   3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
  estimated | actual 
 -----------+--------
@@ -2473,6 +2497,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND
       3750 |   2500
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+ estimated | actual 
+-----------+--------
+        25 |   2500
+(1 row)
+
 -- create statistics
 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists;
 ANALYZE mcv_lists;
@@ -2506,6 +2536,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND
       2500 |   2500
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+ estimated | actual 
+-----------+--------
+      2500 |   2500
+(1 row)
+
 -- mcv with pass-by-ref fixlen types, e.g. uuid
 CREATE TABLE mcv_lists_uuid (
     a UUID,
@@ -2600,6 +2636,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
       1094 |      0
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+ estimated | actual 
+-----------+--------
+      9950 |   2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+ estimated | actual 
+-----------+--------
+        50 |   2500
+(1 row)
+
 CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
   FROM mcv_lists_bool;
 ANALYZE mcv_lists_bool;
@@ -2755,6 +2803,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0
 (1 row)
 
 DROP TABLE mcv_lists_partial;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+ estimated | actual 
+-----------+--------
+      2500 |   2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+ estimated | actual 
+-----------+--------
+      2500 |   2500
+(1 row)
+
 -- check the ability to use multiple MCV lists
 CREATE TABLE mcv_lists_multi (
 	a INTEGER,
@@ -2814,6 +2874,24 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR
       2649 |   1572
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+ estimated | actual 
+-----------+--------
+         1 |   5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+ estimated | actual 
+-----------+--------
+      1667 |      0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
+ estimated | actual 
+-----------+--------
+      1667 |   5000
+(1 row)
+
 -- create separate MCV statistics
 CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
 CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi;
@@ -2860,6 +2938,24 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR
       1571 |   1572
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+ estimated | actual 
+-----------+--------
+      5000 |   5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
+ estimated | actual 
+-----------+--------
+      5000 |   5000
+(1 row)
+
 DROP TABLE mcv_lists_multi;
 -- statistics on integer expressions
 CREATE TABLE expr_stats (a int, b int, c int);
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 6fb37962a7..9210f144cb 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -956,6 +956,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
@@ -1009,6 +1013,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
@@ -1208,6 +1216,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+
 -- create statistics
 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists;
 
@@ -1226,6 +1236,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+
 -- mcv with pass-by-ref fixlen types, e.g. uuid
 CREATE TABLE mcv_lists_uuid (
     a UUID,
@@ -1301,6 +1313,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+
 CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
   FROM mcv_lists_bool;
 
@@ -1386,6 +1402,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0
 
 DROP TABLE mcv_lists_partial;
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+
 -- check the ability to use multiple MCV lists
 CREATE TABLE mcv_lists_multi (
 	a INTEGER,
@@ -1413,6 +1433,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
 
 -- create separate MCV statistics
 CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
@@ -1427,6 +1450,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
 
 DROP TABLE mcv_lists_multi;
 
-- 
2.31.1

From 87fb7e95f6694c7f9f79c28fc660211af9cfa15b Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Wed, 18 Aug 2021 11:53:45 +0200
Subject: [PATCH 5/5] Don't treat Var op Var as simple clauses

---
 src/backend/statistics/extended_stats.c       | 86 +++++++++++++++----
 src/backend/statistics/mcv.c                  |  4 +-
 .../statistics/extended_stats_internal.h      |  2 +-
 3 files changed, 74 insertions(+), 18 deletions(-)

diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index e1fcfd0711..bf4aa682ae 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1319,11 +1319,14 @@ choose_best_statistics(List *stats, char requiredkind,
  * statext_is_compatible_clause. It needs to be split like this because
  * of recursion.  The attnums bitmap is an input/output parameter collecting
  * attribute numbers from all compatible clauses (recursively).
+ *
+ * XXX The issimple variable is expected to be initialized by the caller, we
+ * just update it while recursively analyzing the current clause.
  */
 static bool
 statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 									  Index relid, Bitmapset **attnums,
-									  List **exprs)
+									  List **exprs, bool *issimple)
 {
 	/* Look inside any binary-compatible relabeling (as in examine_variable) */
 	if (IsA(clause, RelabelType))
@@ -1371,7 +1374,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		 * Check if the expression has the right shape. This returns either one
 		 * or two expressions, depending on whether there is a Const.
 		 */
-		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL))
+		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL, issimple))
 			return false;
 
 		/*
@@ -1420,6 +1423,12 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		/*
 		 * Check all expressions by recursing. Var expressions are handled as
 		 * a special case (to match it to attnums etc.)
+		 *
+		 * An opclause is simple if it's (Expr op Const) or (Const op Expr). We
+		 * have already checked the overall shape in examine_opclause_args, but
+		 * we haven't checked the expressions are simple (i.e. pretty much Var),
+		 * so we need to check that now. If we discover a complex expression, we
+		 * consider the whole clause complex.
 		 */
 		foreach (lc, clause_exprs)
 		{
@@ -1429,11 +1438,17 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			{
 				/* if the Var is incompatible, the whole clause is incompatible */
 				if (!statext_is_compatible_clause_internal(root, clause_expr,
-														   relid, attnums, exprs))
+														   relid, attnums, exprs,
+														   issimple))
 					return false;
 			}
 			else	/* generic expression */
+			{
 				*exprs = lappend(*exprs, clause_expr);
+
+				/* switch to false if there are any complex clauses */
+				*issimple = false;
+			}
 		}
 
 		return true;
@@ -1452,7 +1467,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			return false;
 
 		/* Check if the expression has the right shape (one Var, one Const) */
-		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL))
+		if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL, issimple))
 			return false;
 
 		/* There has to be one expression exactly. */
@@ -1500,7 +1515,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		/* Check Var IN Array clauses by recursing. */
 		if (IsA(clause_expr, Var))
 			return statext_is_compatible_clause_internal(root, clause_expr,
-														 relid, attnums, exprs);
+														 relid, attnums, exprs,
+														 issimple);
 
 		/* Otherwise we have Expr IN Array. */
 		*exprs = lappend(*exprs, clause_expr);
@@ -1529,6 +1545,14 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		BoolExpr   *expr = (BoolExpr *) clause;
 		ListCell   *lc;
 
+		/*
+		 * All AND/OR clauses are considered complex, even if all arguments are
+		 * simple clauses. For NOT clauses we need to check the argument and then
+		 * we can update the flag.
+		 */
+		if (!is_notclause(clause))
+			*issimple = false;
+
 		foreach(lc, expr->args)
 		{
 			/*
@@ -1537,7 +1561,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			 */
 			if (!statext_is_compatible_clause_internal(root,
 													   (Node *) lfirst(lc),
-													   relid, attnums, exprs))
+													   relid, attnums, exprs,
+													   issimple))
 				return false;
 		}
 
@@ -1552,7 +1577,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		/* Check Var IS NULL clauses by recursing. */
 		if (IsA(nt->arg, Var))
 			return statext_is_compatible_clause_internal(root, (Node *) (nt->arg),
-														 relid, attnums, exprs);
+														 relid, attnums, exprs,
+														 issimple);
 
 		/* Otherwise we have Expr IS NULL. */
 		*exprs = lappend(*exprs, nt->arg);
@@ -1588,22 +1614,35 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
  */
 static bool
 statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
-							 Bitmapset **attnums, List **exprs)
+							 Bitmapset **attnums, List **exprs, bool *issimple)
 {
 	RangeTblEntry *rte = root->simple_rte_array[relid];
 	RestrictInfo *rinfo = (RestrictInfo *) clause;
 	int			clause_relid;
 	Oid			userid;
 
+	/*
+	 * Clauses are considered simple by default, and we mark them as complex
+	 * when we discover a complex part.
+	 */
+	*issimple = true;
+
 	/*
 	 * Special-case handling for bare BoolExpr AND clauses, because the
 	 * restrictinfo machinery doesn't build RestrictInfos on top of AND
 	 * clauses.
+	 *
+	 * AND clauses are considered complex, even if all arguments are
+	 * simple clauses.
 	 */
 	if (is_andclause(clause))
 	{
 		BoolExpr   *expr = (BoolExpr *) clause;
 		ListCell   *lc;
+		bool		tmp = false;	/* ignored result */
+
+		/* AND clauses are complex, even if the arguments are simple. */
+		*issimple = false;
 
 		/*
 		 * Check that each sub-clause is compatible.  We expect these to be
@@ -1612,7 +1651,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
 		foreach(lc, expr->args)
 		{
 			if (!statext_is_compatible_clause(root, (Node *) lfirst(lc),
-											  relid, attnums, exprs))
+											  relid, attnums, exprs, &tmp))
 				return false;
 		}
 
@@ -1634,7 +1673,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
 
 	/* Check the clause and determine what attributes it references. */
 	if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause,
-											   relid, attnums, exprs))
+											   relid, attnums, exprs, issimple))
 		return false;
 
 	/*
@@ -1724,6 +1763,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	ListCell   *l;
 	Bitmapset **list_attnums;	/* attnums extracted from the clause */
 	List	  **list_exprs;		/* expressions matched to any statistic */
+	bool	  *list_simple;		/* marks simple expressions */
 	int			listidx;
 	Selectivity sel = (is_or) ? 0.0 : 1.0;
 
@@ -1737,6 +1777,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	/* expressions extracted from complex expressions */
 	list_exprs = (List **) palloc(sizeof(Node *) * list_length(clauses));
 
+	/* expressions determined to be simple (single expression) */
+	list_simple = (bool *) palloc(sizeof(bool) * list_length(clauses));
+
 	/*
 	 * Pre-process the clauses list to extract the attnums and expressions
 	 * seen in each item.  We need to determine if there are any clauses which
@@ -1754,17 +1797,21 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		Node	   *clause = (Node *) lfirst(l);
 		Bitmapset  *attnums = NULL;
 		List	   *exprs = NIL;
+		bool		issimple = false;
 
 		if (!bms_is_member(listidx, *estimatedclauses) &&
-			statext_is_compatible_clause(root, clause, rel->relid, &attnums, &exprs))
+			statext_is_compatible_clause(root, clause, rel->relid,
+										 &attnums, &exprs, &issimple))
 		{
 			list_attnums[listidx] = attnums;
 			list_exprs[listidx] = exprs;
+			list_simple[listidx] = issimple;
 		}
 		else
 		{
 			list_attnums[listidx] = NULL;
 			list_exprs[listidx] = NIL;
+			list_simple[listidx] = false;
 		}
 
 		listidx++;
@@ -1801,6 +1848,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		listidx = -1;
 		foreach(l, clauses)
 		{
+			Node *clause = (Node *) lfirst(l);
+
 			/* Increment the index before we decide if to skip the clause. */
 			listidx++;
 
@@ -1839,13 +1888,12 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 			/* record simple clauses (single column or expression) */
 			if ((list_attnums[listidx] == NULL &&
 				 list_length(list_exprs[listidx]) == 1) ||
-				(list_exprs[listidx] == NIL &&
-				 bms_membership(list_attnums[listidx]) == BMS_SINGLETON))
+				 list_simple[listidx])
 				simple_clauses = bms_add_member(simple_clauses,
 												list_length(stat_clauses));
 
 			/* add clause to list and mark it as estimated */
-			stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
+			stat_clauses = lappend(stat_clauses, clause);
 			*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
 
 			/*
@@ -2054,13 +2102,15 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
  * specifies on which side of the operator we found the expression node.
  */
 bool
-examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp)
+examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp,
+					  bool *issimplep)
 {
 	List	   *exprs = NIL;
 	Const	   *cst = NULL;
 	bool		expronleft;
 	Node	   *leftop,
 			   *rightop;
+	bool		issimple;
 
 	/* enforced by statext_is_compatible_clause_internal */
 	Assert(list_length(args) == 2);
@@ -2080,18 +2130,21 @@ examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp
 		exprs = lappend(exprs, leftop);
 		cst = (Const *) rightop;
 		expronleft = true;
+		issimple = true;
 	}
 	else if (IsA(leftop, Const))
 	{
 		exprs = lappend(exprs, rightop);
 		cst = (Const *) leftop;
 		expronleft = false;
+		issimple = true;
 	}
 	else
 	{
 		exprs = lappend(exprs, leftop);
 		exprs = lappend(exprs, rightop);
 		expronleft = false;
+		issimple = false;
 	}
 
 	/* return pointers to the extracted parts if requested */
@@ -2104,6 +2157,9 @@ examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp
 	if (expronleftp)
 		*expronleftp = expronleft;
 
+	if (issimplep)
+		*issimplep = (*issimplep && issimple);
+
 	return true;
 }
 
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index ac7d7b8978..eb3267b164 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1653,7 +1653,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
 			/* extract the var/expr and const from the expression */
-			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft))
+			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft, NULL))
 				elog(ERROR, "incompatible clause");
 
 			if (cst)	/* Expr op Const */
@@ -1819,7 +1819,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
 			/* extract the var/expr and const from the expression */
-			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft))
+			if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft, NULL))
 				elog(ERROR, "incompatible clause");
 
 			/* ScalarArrayOpExpr has the Var always on the left */
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 1f30fa9060..d86cc4184b 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -100,7 +100,7 @@ extern SortItem *build_sorted_items(StatsBuildData *data, int *nitems,
 									int numattrs, AttrNumber *attnums);
 
 extern bool examine_opclause_args(List *args, List **exprs, Const **cstp,
-								  bool *expronleftp);
+								  bool *expronleftp, bool *issimplep);
 
 extern Selectivity mcv_combine_selectivities(Selectivity simple_sel,
 											 Selectivity mcv_sel,
-- 
2.31.1

Reply via email to