I noticed you lost a couple of test cases in v14, so I added them back.

I also added a case where your implementation returns a different number of rows compared to vanilla:

select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1;

Please see the attached v15.

--
Alexander Kuzmenkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

>From 2b1350df99b5e59f9ad7e516806800d09256beea Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <a.kuzmen...@postgrespro.ru>
Date: Mon, 25 Mar 2019 18:10:52 +0300
Subject: [PATCH v15] Remove self joins.

---
 src/backend/optimizer/plan/analyzejoins.c | 961 ++++++++++++++++++++++++++++--
 src/backend/optimizer/plan/planmain.c     |   7 +-
 src/backend/optimizer/util/relnode.c      |  26 +-
 src/include/optimizer/pathnode.h          |   5 +
 src/include/optimizer/planmain.h          |   7 +-
 src/test/regress/expected/equivclass.out  |  32 +
 src/test/regress/expected/join.out        | 260 ++++++++
 src/test/regress/sql/equivclass.sql       |  17 +
 src/test/regress/sql/join.sql             | 117 ++++
 9 files changed, 1350 insertions(+), 82 deletions(-)

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index a4efa69..7efcf9f 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -22,6 +22,7 @@
  */
 #include "postgres.h"
 
+#include "catalog/pg_class.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/joininfo.h"
@@ -33,7 +34,7 @@
 #include "utils/lsyscache.h"
 
 /* local functions */
-static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
+static bool leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
 static void remove_rel_from_query(PlannerInfo *root, int relid,
 					  Relids joinrelids);
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
@@ -47,18 +48,21 @@ static bool is_innerrel_unique_for(PlannerInfo *root,
 					   RelOptInfo *innerrel,
 					   JoinType jointype,
 					   List *restrictlist);
+static void split_selfjoin_quals(PlannerInfo *root, List *joinquals,
+					 List **selfjoinquals,
+					 List **otherjoinquals);
 
 
 /*
- * remove_useless_joins
- *		Check for relations that don't actually need to be joined at all,
- *		and remove them from the query.
+ * remove_useless_left_joins
+ *		Check for left joined relations that don't actually need to be joined
+ *		at all, and remove them from the query.
  *
  * We are passed the current joinlist and return the updated list.  Other
  * data structures that have to be updated are accessible via "root".
  */
 List *
-remove_useless_joins(PlannerInfo *root, List *joinlist)
+remove_useless_left_joins(PlannerInfo *root, List *joinlist)
 {
 	ListCell   *lc;
 
@@ -74,11 +78,11 @@ restart:
 		int			nremoved;
 
 		/* Skip if not removable */
-		if (!join_is_removable(root, sjinfo))
+		if (!leftjoin_is_removable(root, sjinfo))
 			continue;
 
 		/*
-		 * Currently, join_is_removable can only succeed when the sjinfo's
+		 * Currently, leftjoin_is_removable can only succeed when the sjinfo's
 		 * righthand is a single baserel.  Remove that rel from the query and
 		 * joinlist.
 		 */
@@ -146,9 +150,9 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
 }
 
 /*
- * join_is_removable
- *	  Check whether we need not perform this special join at all, because
- *	  it will just duplicate its left input.
+ * leftjoin_is_removable
+ *	  Check whether we need not perform this left join at all, because it will
+ *	  just duplicate its left input.
  *
  * This is true for a left join for which the join condition cannot match
  * more than one inner-side row.  (There are other possibly interesting
@@ -157,12 +161,11 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
  * above the join.
  */
 static bool
-join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
+leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
 	int			innerrelid;
 	RelOptInfo *innerrel;
 	Relids		joinrelids;
-	List	   *clause_list = NIL;
 	ListCell   *l;
 	int			attroff;
 
@@ -238,67 +241,23 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	}
 
 	/*
-	 * Search for mergejoinable clauses that constrain the inner rel against
-	 * either the outer rel or a pseudoconstant.  If an operator is
-	 * mergejoinable then it behaves like equality for some btree opclass, so
-	 * it's what we want.  The mergejoinability test also eliminates clauses
-	 * containing volatile functions, which we couldn't depend on.
+	 * Check for pushed-down clauses referencing the inner rel. If there is
+	 * such a clause then join removal has to be disallowed.  We have to
+	 * check this despite the previous attr_needed checks because of the
+	 * possibility of pushed-down clauses referencing the rel.
 	 */
 	foreach(l, innerrel->joininfo)
 	{
 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
-
-		/*
-		 * If it's not a join clause for this outer join, we can't use it.
-		 * Note that if the clause is pushed-down, then it is logically from
-		 * above the outer join, even if it references no other rels (it might
-		 * be from WHERE, for example).
-		 */
-		if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
-		{
-			/*
-			 * If such a clause actually references the inner rel then join
-			 * removal has to be disallowed.  We have to check this despite
-			 * the previous attr_needed checks because of the possibility of
-			 * pushed-down clauses referencing the rel.
-			 */
-			if (bms_is_member(innerrelid, restrictinfo->clause_relids))
+		if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)
+			&& bms_is_member(innerrel->relid, restrictinfo->clause_relids))
 				return false;
-			continue;			/* else, ignore; not useful here */
-		}
-
-		/* Ignore if it's not a mergejoinable clause */
-		if (!restrictinfo->can_join ||
-			restrictinfo->mergeopfamilies == NIL)
-			continue;			/* not mergejoinable */
-
-		/*
-		 * Check if clause has the form "outer op inner" or "inner op outer",
-		 * and if so mark which side is inner.
-		 */
-		if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
-									 innerrel->relids))
-			continue;			/* no good for these input relations */
-
-		/* OK, add to list */
-		clause_list = lappend(clause_list, restrictinfo);
 	}
 
-	/*
-	 * Now that we have the relevant equality join clauses, try to prove the
-	 * innerrel distinct.
-	 */
-	if (rel_is_distinct_for(root, innerrel, clause_list))
-		return true;
-
-	/*
-	 * Some day it would be nice to check for other methods of establishing
-	 * distinctness.
-	 */
-	return false;
+	return is_innerrel_unique_for(root, joinrelids, sjinfo->min_lefthand,
+								  innerrel, sjinfo->jointype, innerrel->joininfo);
 }
 
-
 /*
  * Remove the target relid from the planner's data structures, having
  * determined that there is no need to include it in the query.
@@ -381,7 +340,8 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
 	 * joinrelids.  However, a PHV used at a partner rel could not have the
 	 * target rel in ph_eval_at, so we check that while deciding whether to
 	 * remove or just update the PHV.  There is no corresponding test in
-	 * join_is_removable because it doesn't need to distinguish those cases.
+	 * leftjoin_is_removable because it doesn't need to distinguish those
+	 * cases.
 	 */
 	for (l = list_head(root->placeholder_list); l != NULL; l = nextl)
 	{
@@ -1125,3 +1085,874 @@ is_innerrel_unique_for(PlannerInfo *root,
 	/* Let rel_is_distinct_for() do the hard work */
 	return rel_is_distinct_for(root, innerrel, clause_list);
 }
+
+typedef struct
+{
+	Index oldRelid;
+	Index newRelid;
+} ChangeVarnoContext;
+
+static bool
+change_varno_walker(Node *node, ChangeVarnoContext *context)
+{
+	if (node == NULL)
+		return false;
+
+	if (IsA(node, Var) && ((Var *) node)->varno == context->oldRelid)
+	{
+		((Var *) node)->varno = context->newRelid;
+		((Var *) node)->varnoold = context->newRelid;
+		return false;
+	}
+
+	return expression_tree_walker(node, change_varno_walker, context);
+}
+
+/*
+ * For all Vars in the expression that have varno = oldRelid, set
+ * varno = newRelid.
+ */
+static void
+change_varno(Expr *expr, Index oldRelid, Index newRelid)
+{
+	ChangeVarnoContext context;
+
+	context.oldRelid = oldRelid;
+	context.newRelid = newRelid;
+	change_varno_walker((Node *) expr, &context);
+}
+
+/*
+ * Substitute newId for oldId in relids.
+ */
+static void
+change_relid(Relids *relids, Index oldId, Index newId)
+{
+	if (bms_is_member(oldId, *relids))
+		*relids = bms_add_member(bms_del_member(*relids, oldId), newId);
+}
+
+/*
+ * Update EC members to point to the remaining relation instead of the removed
+ * one, removing duplicates.
+ */
+static void
+update_ec_members(EquivalenceClass *ec, Index toRemove, Index toKeep)
+{
+	ListCell *prev = NULL;
+	ListCell *cell;
+	ListCell *next;
+
+	for (cell = list_head(ec->ec_members); cell; cell = next)
+	{
+		ListCell *otherCell;
+		EquivalenceMember *em = lfirst(cell);
+
+		next = lnext(cell);
+
+		if (!bms_is_member(toRemove, em->em_relids))
+		{
+			prev = cell;
+			continue;
+		}
+
+		change_relid(&em->em_relids, toRemove, toKeep);
+		/* We only process inner joins */
+		Assert(em->em_nullable_relids == NULL);
+		change_varno(em->em_expr, toRemove, toKeep);
+
+		/*
+		 * After we switched the equivalence member to the remaining relation,
+		 * check that it is not the same as the existing member, and if it
+		 * is, delete it.
+		 */
+		foreach (otherCell, ec->ec_members)
+		{
+			EquivalenceMember *other;
+
+			if (otherCell == cell)
+				continue;
+
+			other = castNode(EquivalenceMember, lfirst(otherCell));
+
+			if (equal(other->em_expr, em->em_expr))
+				break;
+		}
+
+		if (otherCell)
+			ec->ec_members = list_delete_cell(ec->ec_members, cell, prev);
+		else
+			prev = cell;
+	}
+}
+
+/*
+ * Update EC sources to point to the remaining relation instead of the
+ * removed one.
+ */
+static void
+update_ec_sources(List **sources, Index toRemove, Index toKeep)
+{
+	ListCell *prev = NULL;
+	ListCell *cell;
+	ListCell *next;
+
+	for (cell = list_head(*sources); cell; cell = next)
+	{
+		ListCell *otherCell;
+		RestrictInfo *rinfo = castNode(RestrictInfo, lfirst(cell));
+
+		next = lnext(cell);
+
+		if (!bms_is_member(toRemove, rinfo->required_relids))
+		{
+			prev = cell;
+			continue;
+		}
+
+		change_varno(rinfo->clause, toRemove, toKeep);
+
+		/*
+		 * After switching the clause to the remaining relation, check it for
+		 * redundancy with existing ones. We don't have to check for
+		 * redundancy with derived clauses, because we've just deleted them.
+		 */
+		foreach (otherCell, *sources)
+		{
+			RestrictInfo *other;
+
+			if (otherCell == cell)
+				continue;
+
+			other = castNode(RestrictInfo, lfirst(otherCell));
+			if (equal(rinfo->clause, other->clause))
+				break;
+		}
+
+		if (otherCell)
+			*sources = list_delete_cell(*sources, cell, prev);
+		else
+		{
+			prev = cell;
+
+			/* We will keep this RestrictInfo, correct its relids. */
+			change_relid(&rinfo->required_relids, toRemove, toKeep);
+			change_relid(&rinfo->left_relids, toRemove, toKeep);
+			change_relid(&rinfo->right_relids, toRemove, toKeep);
+			change_relid(&rinfo->clause_relids, toRemove, toKeep);
+		}
+	}
+}
+
+/*
+ * Scratch space for the unique self join removal code.
+ */
+typedef struct
+{
+	PlannerInfo *root;
+
+	/* Temporary array for relation ids. */
+	Index *relids;
+
+	/*
+	 * Array of Relids, one for each relation, indexed by relation id. Each
+	 * element is a set of relation ids with which this relation has a special
+	 * join.
+	 */
+	Relids *special_join_rels;
+
+	/* Array of row marks indexed by relid. */
+	PlanRowMark **row_marks;
+
+	/* Bitmapset for join relids that is used to avoid reallocation. */
+	Relids joinrelids;
+
+	/*
+	 * Top-level targetlist of the query. We have to update any references
+	 * it has to the relations we remove.
+	 */
+	 List *targetlist;
+} UsjScratch;
+
+/*
+ * Remove a relation after we have proven that it participates only in an
+ * unneeded unique self join.
+ *
+ * The joinclauses list is destructively changed.
+ */
+static void
+remove_self_join_rel(UsjScratch *scratch, Relids joinrelids, List *joinclauses,
+					 RelOptInfo *toKeep, RelOptInfo *toRemove)
+{
+	PlannerInfo *root = scratch->root;
+	ListCell *cell;
+	List *toAppend;
+
+	/*
+	 * Transfer join and restriction clauses from the removed relation to the
+	 * remaining one. We change the Vars of the clause to point to the
+	 * remaining relation instead of the removed one. The clauses that require
+	 * a subset of joinrelids become restriction clauses of the remaining
+	 * relation, and others remain join clauses. We append them to
+	 * baserestrictinfo and joininfo respectively, trying not to introduce
+	 * duplicates.
+	 *
+	 * We also have to process the 'joinclauses' list here, because it
+	 * contains EC-derived join clauses which must become filter clauses. It
+	 * is not enough to just correct the ECs, because the EC-derived
+	 * restrictions are generated before join removal (see
+	 * generate_base_implied_equalities).
+	 */
+	toAppend = list_concat(joinclauses, toRemove->baserestrictinfo);
+	toAppend = list_concat(toAppend, toRemove->joininfo);
+
+	foreach(cell, toAppend)
+	{
+		RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell);
+		bool is_join_clause = !bms_is_subset(rinfo->required_relids, joinrelids);
+		List **target = is_join_clause ? &toKeep->joininfo : &toKeep->baserestrictinfo;
+		ListCell *otherCell;
+
+		/*
+		 * We can't have an EC-derived clause that joins to some third
+		 * relation
+		 */
+		Assert(!(is_join_clause && rinfo->parent_ec != NULL));
+
+		/*
+		 * Replace the references to the removed relation with references to
+		 * the remaining one. We won't necessarily add this clause, because it
+		 * may be already present in the joininfo or baserestrictinfo. Still,
+		 * we have to switch it to point to the remaining relation. This is
+		 * important for join clauses that reference both relations, because
+		 * they are included in both joininfos.
+		 */
+		change_varno(rinfo->clause, toRemove->relid, toKeep->relid);
+		change_relid(&rinfo->required_relids, toRemove->relid, toKeep->relid);
+		change_relid(&rinfo->left_relids, toRemove->relid, toKeep->relid);
+		change_relid(&rinfo->right_relids, toRemove->relid, toKeep->relid);
+		change_relid(&rinfo->clause_relids, toRemove->relid, toKeep->relid);
+
+		/*
+		 * Don't add the clause if it is already present in the list, or
+		 * derived from the same equivalence class, or is the same as another
+		 * clause.
+		 */
+		foreach (otherCell, *target)
+		{
+			RestrictInfo *other = lfirst(otherCell);
+
+			if (other == rinfo
+				|| (rinfo->parent_ec != NULL
+					&& other->parent_ec == rinfo->parent_ec)
+				|| equal(rinfo->clause, other->clause))
+			{
+				break;
+			}
+		}
+		if (otherCell != NULL)
+			continue;
+
+		/*
+		 * If the clause has the form of "X = X", replace it with null test.
+		 */
+		if (rinfo->mergeopfamilies)
+		{
+			Expr *leftOp = (Expr *) get_leftop(rinfo->clause);
+			Expr *rightOp = (Expr *) get_rightop(rinfo->clause);
+
+			if (leftOp != NULL && equal(leftOp, rightOp))
+			{
+				NullTest *test = makeNode(NullTest);
+				test->arg = leftOp;
+				test->nulltesttype = IS_NOT_NULL;
+				test->argisrow = false;
+				test->location = -1;
+				rinfo->clause = (Expr *) test;
+			}
+		}
+
+		*target = lappend(*target, rinfo);
+	}
+
+	/*
+	 * Transfer the targetlist and attr_needed flags.
+	 */
+	Assert(toRemove->reltarget->sortgrouprefs == 0);
+
+	foreach (cell, toRemove->reltarget->exprs)
+	{
+		Expr *node = lfirst(cell);
+		change_varno(node, toRemove->relid, toKeep->relid);
+		if (!list_member(toKeep->reltarget->exprs, node))
+			toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs,
+											   node);
+	}
+
+	for (int i = toKeep->min_attr; i <= toKeep->max_attr; i++)
+	{
+		int attno = i - toKeep->min_attr;
+		toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
+													 toRemove->attr_needed[attno]);
+	}
+
+	/*
+	 * If the removed relation has a row mark, transfer it to the remaining
+	 * one.
+	 *
+	 * If both rels have row marks, just keep the one corresponding to the
+	 * remaining relation, because we verified earlier that they have the same
+	 * strength.
+	 *
+	 * Also make sure that the scratch->row_marks cache is up to date, because
+	 * we are going to use it for further join removals.
+	 */
+	if (scratch->row_marks[toRemove->relid])
+	{
+		PlanRowMark **markToRemove = &scratch->row_marks[toRemove->relid];
+		PlanRowMark **markToKeep = &scratch->row_marks[toKeep->relid];
+
+		if (*markToKeep)
+		{
+			Assert((*markToKeep)->markType == (*markToRemove)->markType);
+
+			root->rowMarks = list_delete_ptr(root->rowMarks, *markToKeep);
+			*markToKeep = NULL;
+		}
+		else
+		{
+			*markToKeep = *markToRemove;
+			*markToRemove = NULL;
+
+			/* Shouldn't have inheritance children here. */
+			Assert((*markToKeep)->rti == (*markToKeep)->prti);
+
+			(*markToKeep)->rti = toKeep->relid;
+			(*markToKeep)->prti = toKeep->relid;
+		}
+	}
+
+	/*
+	 * Likewise replace references in SpecialJoinInfo data structures.
+	 *
+	 * This is relevant in case the join we're deleting is nested inside some
+	 * special joins: the upper joins' relid sets have to be adjusted.
+	 */
+	foreach (cell, root->join_info_list)
+	{
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(cell);
+
+		change_relid(&sjinfo->min_lefthand, toRemove->relid, toKeep->relid);
+		change_relid(&sjinfo->min_righthand, toRemove->relid, toKeep->relid);
+		change_relid(&sjinfo->syn_lefthand, toRemove->relid, toKeep->relid);
+		change_relid(&sjinfo->syn_righthand, toRemove->relid, toKeep->relid);
+	}
+
+	/*
+	 * Likewise update references in PlaceHolderVar data structures.
+	 */
+	foreach(cell, root->placeholder_list)
+	{
+		PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(cell);
+
+		/*
+		 * We are at an inner join of two base relations. A placeholder can't
+		 * be needed here or evaluated here.
+		 */
+		Assert(!bms_is_subset(phinfo->ph_eval_at, joinrelids));
+		Assert(!bms_is_subset(phinfo->ph_needed, joinrelids));
+
+		change_relid(&phinfo->ph_eval_at, toRemove->relid, toKeep->relid);
+		change_relid(&phinfo->ph_needed, toRemove->relid, toKeep->relid);
+		change_relid(&phinfo->ph_lateral, toRemove->relid, toKeep->relid);
+		change_relid(&phinfo->ph_var->phrels, toRemove->relid, toKeep->relid);
+	}
+
+	/*
+	 * Update the equivalence classes that reference the removed relations.
+	 */
+	foreach(cell, root->eq_classes)
+	{
+		EquivalenceClass *ec = lfirst(cell);
+
+		if (!bms_is_member(toRemove->relid, ec->ec_relids))
+		{
+			/*
+			 * This EC doesn't reference the removed relation, nothing to be
+			 * done for it.
+			 */
+			continue;
+		}
+
+		/*
+		 * Update the EC members to reference the remaining relation instead
+		 * of the removed one.
+		 */
+		update_ec_members(ec, toRemove->relid, toKeep->relid);
+		change_relid(&ec->ec_relids, toRemove->relid, toKeep->relid);
+
+		/*
+		 * We will now update source and derived clauses of the EC.
+		 *
+		 * Restriction clauses for base relations are already distributed to
+		 * the respective baserestrictinfo lists (see
+		 * generate_implied_equalities). The above code has already processed
+		 * this list, and updated these clauses to reference the remaining
+		 * relation, so we can skip them here based on their relids.
+		 *
+		 * Likewise, we have already processed the join clauses that join the
+		 * removed relation to the remaining one.
+		 *
+		 * Finally, there are join clauses that join the removed relation to
+		 * some third relation. We can't just delete the source clauses and
+		 * regenerate them from the EC, because the corresponding equality
+		 * operators might be missing (see the handling of ec_broken).
+		 * Therefore, we will update the references in the source clauses.
+		 *
+		 * Derived clauses can be generated again, so it is simpler to just
+		 * delete them.
+		 */
+		list_free(ec->ec_derives);
+		ec->ec_derives = NULL;
+		update_ec_sources(&ec->ec_sources, toRemove->relid, toKeep->relid);
+	}
+
+	/*
+	 * Mark the rel as "dead" to show it is no longer part of the join tree.
+	 * (Removing it from the baserel array altogether seems too risky.)
+	 */
+	toRemove->reloptkind = RELOPT_DEADREL;
+
+	/*
+	 * Update references to the removed relation from other baserels.
+	 */
+	for (int i = 1; i < root->simple_rel_array_size; i++)
+	{
+		RelOptInfo *otherrel = root->simple_rel_array[i];
+		int			attroff;
+
+		/* no point in processing target rel itself */
+		if (i == toRemove->relid)
+			continue;
+
+		/* there may be empty slots corresponding to non-baserel RTEs */
+		if (otherrel == NULL)
+			continue;
+
+		Assert(otherrel->relid == i); /* sanity check on array */
+
+		/* Update attr_needed arrays. */
+		for (attroff = otherrel->max_attr - otherrel->min_attr;
+			 attroff >= 0; attroff--)
+		{
+			change_relid(&otherrel->attr_needed[attroff], toRemove->relid,
+						 toKeep->relid);
+		}
+
+		/* Update lateral references. */
+		foreach (cell, otherrel->lateral_vars)
+			change_varno(lfirst(cell), toRemove->relid, toKeep->relid);
+	}
+}
+
+/*
+ * split_selfjoin_quals
+ *		Processes 'joinquals' building two lists, one with a list of quals
+ *		where the columns/exprs on either side of the join match and another
+ *		list containing the remaining quals.
+ *
+ * 'joinquals' must only contain quals for a RTE_RELATION being joined to
+ * itself.
+ */
+static void
+split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
+					 List **otherjoinquals)
+{
+	ListCell *lc;
+	List	 *sjoinquals = NIL;
+	List	 *rjoinquals = NIL;
+
+	foreach(lc, joinquals)
+	{
+		RestrictInfo   *rinfo = (RestrictInfo *) lfirst(lc);
+		OpExpr		   *expr;
+		Expr		   *leftexpr;
+		Expr		   *rightexpr;
+
+		if (bms_num_members(rinfo->clause_relids) != 2)
+			continue;
+
+		expr = (OpExpr *) rinfo->clause;
+
+		if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
+			continue;
+
+		leftexpr = linitial(expr->args);
+		rightexpr = lsecond(expr->args);
+
+		/* Can't match of the exprs are not of the same type */
+		if (leftexpr->type != rightexpr->type)
+			continue;
+
+		/* Easy case, both exprs are Vars */
+		if (IsA(leftexpr, Var))
+		{
+			Var *leftvar = (Var *) leftexpr;
+			Var *rightvar = (Var *) rightexpr;
+
+			Assert(leftvar->varlevelsup == 0);
+			Assert(rightvar->varlevelsup == 0);
+
+			/*
+			 * Ensure the caller didn't pass us any join quals that are not
+			 * self join quals.
+			 */
+			Assert(root->simple_rte_array[leftvar->varno]->relid ==
+				   root->simple_rte_array[rightvar->varno]->relid);
+
+			if (leftvar->varattno == rightvar->varattno)
+				sjoinquals = lappend(sjoinquals, rinfo);
+			else
+				rjoinquals = lappend(rjoinquals, rinfo);
+
+		}
+
+		/* TODO handle complex exprs by replacing varnos on RHS expr */
+	}
+
+	*selfjoinquals = sjoinquals;
+	*otherjoinquals = rjoinquals;
+}
+
+/*
+ * Find and remove unique self joins in a group of base relations that have
+ * the same Oid.
+ *
+ * Returns a set of relids that were removed.
+ */
+static Relids
+remove_self_joins_one_group(UsjScratch *scratch, Index *relids, int n)
+{
+	PlannerInfo *root = scratch->root;
+	Relids joinrelids = scratch->joinrelids;
+	Relids result = NULL;
+	int i, o;
+
+	if (n < 2)
+		return NULL;
+
+	for (o = 0; o < n; o++)
+	{
+		RelOptInfo *outer = root->simple_rel_array[relids[o]];
+
+		for (i = o + 1; i < n; i++)
+		{
+			RelOptInfo *inner = root->simple_rel_array[relids[i]];
+			List	   *restrictlist;
+			List	   *selfjoinquals;
+			List	   *otherjoinquals;
+
+			/* A sanity check: the relations have the same Oid. */
+			Assert(root->simple_rte_array[relids[i]]->relid
+					== root->simple_rte_array[relids[o]]->relid);
+
+			/*
+			 * This optimization applies to inner joins only, so skip any
+			 * relations that form a special join.
+			 */
+			if (bms_is_member(relids[i], scratch->special_join_rels[relids[o]]))
+				continue;
+
+			/* Reuse joinrelids bitset to avoid reallocation. */
+			joinrelids = bms_del_members(joinrelids, joinrelids);
+
+			/*
+			 * We only deal with base rels here, so their relids bitset
+			 * contains only one member -- their relid.
+			 */
+			joinrelids = bms_add_member(joinrelids, relids[o]);
+			joinrelids = bms_add_member(joinrelids, relids[i]);
+
+			/* Is it a unique self join? */
+			restrictlist = build_joinrel_restrictlist(root, joinrelids, outer,
+													  inner);
+
+			/*
+			 * Process restrictlist to seperate out the self join quals from
+			 * the other quals. e.g x = x goes to selfjoinquals and a = b to
+			 * otherjoinquals.
+			 */
+			split_selfjoin_quals(root, restrictlist, &selfjoinquals,
+								 &otherjoinquals);
+
+			/*
+			 * Determine if the inner table can duplicate outer rows.  We must
+			 * bypass the unique rel cache here since we're possibly using a
+			 * subset of join quals. We can use 'force_cache' = true when all
+			 * join quals are selfjoin quals.  Otherwise we could end up
+			 * putting false negatives in the cache.
+			 */
+			if (!innerrel_is_unique(root, joinrelids, inner->relids,
+				outer, JOIN_INNER, selfjoinquals, list_length(otherjoinquals) == 0))
+				continue;
+
+			/*
+			 * We can't remove the join if the relations have row marks of
+			 * different strength (e.g. one is locked FOR UPDATE and another
+			 * just has ROW_MARK_REFERENCE for EvalPlanQual rechecking).
+			 */
+			if (scratch->row_marks[relids[i]] && scratch->row_marks[relids[o]]
+				&& scratch->row_marks[relids[i]]->markType
+					!= scratch->row_marks[relids[o]]->markType)
+			{
+				continue;
+			}
+
+			/*
+			 * We can remove either relation, so remove the outer one, to
+			 * simplify this loop.
+			 */
+			remove_self_join_rel(scratch, joinrelids, restrictlist, inner, outer);
+			result = bms_add_member(result, relids[o]);
+
+			/*
+			 * Replace varno in root targetlist and HAVING clause.
+			 */
+			change_varno((Expr *) scratch->targetlist, relids[o], relids[i]);
+			change_varno((Expr *) root->parse->havingQual, relids[o], relids[i]);
+
+			/* We removed the outer relation, try the next one. */
+			break;
+		}
+	}
+
+	scratch->joinrelids = joinrelids;
+	return result;
+}
+
+/*
+ * A qsort comparator to sort the relids by the relation Oid.
+ */
+static int
+compare_rte(const Index *left, const Index *right, PlannerInfo *root)
+{
+	Oid l = root->simple_rte_array[*left]->relid;
+	Oid r = root->simple_rte_array[*right]->relid;
+
+	return l < r ? 1 : (l == r ? 0 : -1);
+}
+
+/*
+ * Find and remove unique self joins on a particular level of the join tree.
+ *
+ * We sort the relations by Oid and then examine each group with the same Oid.
+ * If we removed any relation, remove it from joinlist as well.
+ */
+static void
+remove_self_joins_one_level(UsjScratch *scratch, List **joinlist)
+{
+	ListCell *prev, *cell, *next;
+	Relids relidsToRemove = NULL;
+	Oid groupOid;
+	int groupStart;
+	int i;
+	int n = 0;
+	Index *relid_ascending = scratch->relids;
+	PlannerInfo *root = scratch->root;
+
+	/*
+	 * Collect the ids of base relations at this level of the join tree.
+	 */
+	foreach (cell, *joinlist)
+	{
+		RangeTblEntry *rte;
+		RelOptInfo *rel;
+		RangeTblRef *ref = (RangeTblRef *) lfirst(cell);
+		if (!IsA(ref, RangeTblRef))
+			continue;
+
+		rte = root->simple_rte_array[ref->rtindex];
+		rel = root->simple_rel_array[ref->rtindex];
+
+		/* We only care about base relations from which we select something. */
+		if (rte->rtekind != RTE_RELATION || rte->relkind != RELKIND_RELATION
+			|| rel == NULL)
+		{
+			continue;
+		}
+
+		/*
+		 * This optimization won't work for tables that have inheritance
+		 * children.
+		 */
+		if (rte->inh)
+			continue;
+
+		relid_ascending[n++] = ref->rtindex;
+
+		/*
+		 * Limit the number of joins we process to control the quadratic
+		 * behavior.
+		 */
+		if (n > join_collapse_limit)
+			break;
+	}
+
+	if (n < 2)
+		return;
+
+	/*
+	 * Find and process the groups of relations that have same Oid.
+	 */
+	qsort_arg(relid_ascending, n, sizeof(*relid_ascending),
+			  (qsort_arg_comparator) compare_rte, root);
+	groupOid = root->simple_rte_array[relid_ascending[0]]->relid;
+	groupStart = 0;
+	for (i = 1; i < n; i++)
+	{
+		RangeTblEntry *rte = root->simple_rte_array[relid_ascending[i]];
+		Assert(rte->relid != InvalidOid);
+		if (rte->relid != groupOid)
+		{
+			relidsToRemove = bms_add_members(relidsToRemove,
+				remove_self_joins_one_group(scratch, &relid_ascending[groupStart],
+					i - groupStart));
+			groupOid = rte->relid;
+			groupStart = i;
+		}
+	}
+	Assert(groupOid != InvalidOid);
+	Assert(groupStart < n);
+	relidsToRemove = bms_add_members(relidsToRemove,
+		remove_self_joins_one_group(scratch, &relid_ascending[groupStart],
+			n - groupStart));
+
+	/* Delete the removed relations from joinlist. */
+	prev = NULL;
+	for (cell = list_head(*joinlist); cell; cell = next)
+	{
+		Node *node = lfirst(cell);
+
+		next = lnext(cell);
+
+		if (IsA(node, RangeTblRef)
+			&& bms_is_member(((RangeTblRef *) node)->rtindex, relidsToRemove))
+		{
+			*joinlist = list_delete_cell(*joinlist, cell, prev);
+		}
+		else
+			prev = cell;
+	}
+}
+
+/*
+ * Find and remove unique self joins on a single level of a join tree, and
+ * recurse to handle deeper levels.
+ */
+static void
+remove_self_joins_recurse(UsjScratch *scratch, List **joinlist)
+{
+	ListCell *lc;
+	foreach (lc, *joinlist)
+	{
+		switch (((Node *) lfirst(lc))->type)
+		{
+			case T_List:
+				remove_self_joins_recurse(scratch, (List **) &lfirst(lc));
+				break;
+			case T_RangeTblRef:
+				break;
+			default:
+				Assert(false);
+		}
+	}
+	remove_self_joins_one_level(scratch, joinlist);
+}
+
+/*
+ * Find and remove useless self joins.
+ *
+ * We search for joins where the same relation is joined to itself on all
+ * columns of some unique index. If this condition holds, then, for
+ * each outer row, only one inner row matches, and it is the same row
+ * of the same relation. This allows us to remove the join and replace
+ * it with a scan that combines WHERE clauses from both sides. The join
+ * clauses themselves assume the form of X = X and can be replaced with
+ * NOT NULL clauses.
+ *
+ * For the sake of simplicity, we don't apply this optimization to special
+ * joins. Here is a list of what we could do in some particular cases:
+ * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
+ * and then removed normally.
+ * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
+ * (IS NULL on join columns OR NOT inner quals)'.
+ * 'a a1 left join a a2': could simplify to a scan like inner, but without
+ * NOT NULL condtions on join columns.
+ * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
+ * can both remove rows and introduce duplicates.
+ *
+ * To search for removable joins, we order all the relations on their Oid,
+ * go over each set with the same Oid, and consider each pair of relations
+ * in this set. We check that both relation are made unique by the same
+ * unique index with the same clauses.
+ *
+ * To remove the join, we mark one of the participating relation as
+ * dead, and rewrite all references to it to point to the remaining
+ * relation. This includes modifying RestrictInfos, EquivalenceClasses and
+ * EquivalenceMembers. We also have to modify the row marks. The join clauses
+ * of the removed relation become either restriction or join clauses, based on
+ * whether they reference any relations not participating in the removed join.
+ *
+ * 'targetlist' is the top-level targetlist of query. If it has any references
+ * to the removed relations, we update them to point to the remaining ones.
+ */
+void
+remove_useless_self_joins(PlannerInfo *root, List **joinlist, List *targetlist)
+{
+	ListCell *lc;
+	UsjScratch scratch;
+
+	scratch.root = root;
+	scratch.relids = palloc(root->simple_rel_array_size * sizeof(Index));
+	scratch.special_join_rels = palloc0(root->simple_rel_array_size * sizeof(Relids));
+	scratch.row_marks = palloc0(root->simple_rel_array_size * sizeof(PlanRowMark *));
+	scratch.joinrelids = NULL;
+	scratch.targetlist = targetlist;
+
+	/* Find out which relations have special joins to which. */
+	foreach(lc, root->join_info_list)
+	{
+		SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
+		int bit = -1;
+		while ((bit = bms_next_member(info->min_lefthand, bit)) >= 0)
+		{
+			RelOptInfo *rel = find_base_rel(root, bit);
+			scratch.special_join_rels[rel->relid] =
+				bms_add_members(scratch.special_join_rels[rel->relid],
+					info->min_righthand);
+		}
+
+		bit = -1;
+		while ((bit = bms_next_member(info->min_righthand, bit)) >= 0)
+		{
+			RelOptInfo *rel = find_base_rel(root, bit);
+			scratch.special_join_rels[rel->relid] =
+				bms_add_members(scratch.special_join_rels[rel->relid],
+					info->min_lefthand);
+		}
+	}
+
+	/* Collect row marks. */
+	foreach (lc, root->rowMarks)
+	{
+		PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
+
+		/* Can't have more than one row mark for a relation. */
+		Assert(scratch.row_marks[rowMark->rti] == NULL);
+
+		scratch.row_marks[rowMark->rti] = rowMark;
+	}
+
+	/* Finally, remove the joins. */
+	remove_self_joins_recurse(&scratch, joinlist);
+}
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 3cedd01..8d6036c 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -224,7 +224,7 @@ query_planner(PlannerInfo *root, List *tlist,
 	 * jointree preprocessing, but the necessary information isn't available
 	 * until we've built baserel data structures and classified qual clauses.
 	 */
-	joinlist = remove_useless_joins(root, joinlist);
+	joinlist = remove_useless_left_joins(root, joinlist);
 
 	/*
 	 * Also, reduce any semijoins with unique inner rels to plain inner joins.
@@ -233,6 +233,11 @@ query_planner(PlannerInfo *root, List *tlist,
 	reduce_unique_semijoins(root);
 
 	/*
+	 * Remove self joins on a unique column.
+	 */
+	remove_useless_self_joins(root, &joinlist, tlist);
+
+	/*
 	 * Now distribute "placeholders" to base rels as needed.  This has to be
 	 * done after join removal because removal could change whether a
 	 * placeholder is evaluable at a base rel.
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 4130514..fde4118 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -39,14 +39,10 @@ typedef struct JoinHashEntry
 
 static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
 					RelOptInfo *input_rel);
-static List *build_joinrel_restrictlist(PlannerInfo *root,
-						   RelOptInfo *joinrel,
-						   RelOptInfo *outer_rel,
-						   RelOptInfo *inner_rel);
 static void build_joinrel_joinlist(RelOptInfo *joinrel,
 					   RelOptInfo *outer_rel,
 					   RelOptInfo *inner_rel);
-static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
+static List *subbuild_joinrel_restrictlist(Relids joinrelids,
 							  List *joininfo_list,
 							  List *new_restrictlist);
 static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel,
@@ -556,7 +552,7 @@ build_join_rel(PlannerInfo *root,
 		 */
 		if (restrictlist_ptr)
 			*restrictlist_ptr = build_joinrel_restrictlist(root,
-														   joinrel,
+														   joinrel->relids,
 														   outer_rel,
 														   inner_rel);
 		return joinrel;
@@ -659,7 +655,7 @@ build_join_rel(PlannerInfo *root,
 	 * caller might or might not need the restrictlist, but I need it anyway
 	 * for set_joinrel_size_estimates().)
 	 */
-	restrictlist = build_joinrel_restrictlist(root, joinrel,
+	restrictlist = build_joinrel_restrictlist(root, joinrel->relids,
 											  outer_rel, inner_rel);
 	if (restrictlist_ptr)
 		*restrictlist_ptr = restrictlist;
@@ -981,7 +977,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
  *	  the various joinlist entries ultimately refer to RestrictInfos
  *	  pushed into them by distribute_restrictinfo_to_rels().
  *
- * 'joinrel' is a join relation node
+ * 'joinrelids' is a join relation id set
  * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined
  *		to form joinrel.
  *
@@ -994,9 +990,9 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
  * RestrictInfo nodes are no longer context-dependent.  Instead, just include
  * the original nodes in the lists made for the join relation.
  */
-static List *
+List *
 build_joinrel_restrictlist(PlannerInfo *root,
-						   RelOptInfo *joinrel,
+						   Relids joinrelids,
 						   RelOptInfo *outer_rel,
 						   RelOptInfo *inner_rel)
 {
@@ -1007,8 +1003,8 @@ build_joinrel_restrictlist(PlannerInfo *root,
 	 * eliminating any duplicates (important since we will see many of the
 	 * same clauses arriving from both input relations).
 	 */
-	result = subbuild_joinrel_restrictlist(joinrel, outer_rel->joininfo, NIL);
-	result = subbuild_joinrel_restrictlist(joinrel, inner_rel->joininfo, result);
+	result = subbuild_joinrel_restrictlist(joinrelids, outer_rel->joininfo, NIL);
+	result = subbuild_joinrel_restrictlist(joinrelids, inner_rel->joininfo, result);
 
 	/*
 	 * Add on any clauses derived from EquivalenceClasses.  These cannot be
@@ -1017,7 +1013,7 @@ build_joinrel_restrictlist(PlannerInfo *root,
 	 */
 	result = list_concat(result,
 						 generate_join_implied_equalities(root,
-														  joinrel->relids,
+														  joinrelids,
 														  outer_rel->relids,
 														  inner_rel));
 
@@ -1043,7 +1039,7 @@ build_joinrel_joinlist(RelOptInfo *joinrel,
 }
 
 static List *
-subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
+subbuild_joinrel_restrictlist(Relids joinrelids,
 							  List *joininfo_list,
 							  List *new_restrictlist)
 {
@@ -1053,7 +1049,7 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
 	{
 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
 
-		if (bms_is_subset(rinfo->required_relids, joinrel->relids))
+		if (bms_is_subset(rinfo->required_relids, joinrelids))
 		{
 			/*
 			 * This clause becomes a restriction clause for the joinrel, since
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 574bb85..7eb59c1 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -287,6 +287,11 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root,
 			   RelOptInfo *inner_rel,
 			   SpecialJoinInfo *sjinfo,
 			   List **restrictlist_ptr);
+
+extern List *build_joinrel_restrictlist(PlannerInfo *root,
+						   Relids joinrelids,
+						   RelOptInfo *outer_rel,
+						   RelOptInfo *inner_rel);
 extern Relids min_join_parameterization(PlannerInfo *root,
 						  Relids joinrelids,
 						  RelOptInfo *outer_rel,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index b093a3c..15fa476 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -16,6 +16,7 @@
 
 #include "nodes/pathnodes.h"
 #include "nodes/plannodes.h"
+#include "optimizer/paths.h"
 
 /* GUC parameters */
 #define DEFAULT_CURSOR_TUPLE_FRACTION 0.1
@@ -95,14 +96,18 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
 /*
  * prototypes for plan/analyzejoins.c
  */
-extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
+extern List *remove_useless_left_joins(PlannerInfo *root, List *joinlist);
 extern void reduce_unique_semijoins(PlannerInfo *root);
 extern bool query_supports_distinctness(Query *query);
 extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
+
 extern bool innerrel_is_unique(PlannerInfo *root,
 				   Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel,
 				   JoinType jointype, List *restrictlist, bool force_cache);
 
+extern void remove_useless_self_joins(PlannerInfo *root, List **jointree,
+									  List *tlist);
+
 /*
  * prototypes for plan/setrefs.c
  */
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index c448d85..a57905f 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -439,3 +439,35 @@ explain (costs off)
    Filter: ((unique1 = unique1) OR (unique2 = unique2))
 (2 rows)
 
+-- Test that broken ECs are processed correctly during self join removal.
+-- Disable merge joins so that we don't get an error about missing commutator.
+-- Test both orientations of the join clause, because only one of them breaks
+-- the EC.
+set enable_mergejoin to off;
+explain (costs off)
+  select * from ec0 m join ec0 n on m.ff = n.ff
+  join ec1 p on m.ff + n.ff = p.f1;
+               QUERY PLAN               
+----------------------------------------
+ Nested Loop
+   Join Filter: ((n.ff + n.ff) = p.f1)
+   ->  Seq Scan on ec1 p
+   ->  Materialize
+         ->  Seq Scan on ec0 n
+               Filter: (ff IS NOT NULL)
+(6 rows)
+
+explain (costs off)
+  select * from ec0 m join ec0 n on m.ff = n.ff
+  join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Nested Loop
+   Join Filter: ((p.f1)::bigint = ((n.ff + n.ff))::int8alias1)
+   ->  Seq Scan on ec1 p
+   ->  Materialize
+         ->  Seq Scan on ec0 n
+               Filter: (ff IS NOT NULL)
+(6 rows)
+
+reset enable_mergejoin;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 88fcd52..6b6f490 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4486,6 +4486,266 @@ select * from
 (0 rows)
 
 --
+-- test that semi- or inner self-joins on a unique column are removed
+--
+-- enable only nestloop to get more predictable plans
+set enable_hashjoin to off;
+set enable_mergejoin to off;
+create table sj (a int unique, b int);
+insert into sj values (1, null), (null, 2), (2, 1);
+analyze sj;
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+ a | b 
+---+---
+ 2 | 1
+(1 row)
+
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Seq Scan on sj q
+   Filter: ((a IS NOT NULL) AND (b = (a - 1)))
+(2 rows)
+
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+				where q.a = p.a and q.b < 10);
+                QUERY PLAN                
+------------------------------------------
+ Seq Scan on sj q
+   Filter: ((a IS NOT NULL) AND (b < 10))
+(2 rows)
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Seq Scan on sj t2
+   Filter: (a IS NOT NULL)
+   SubPlan 1
+     ->  Result
+           One-Time Filter: (t2.a = t2.a)
+           ->  Seq Scan on sj
+                 Filter: (a = t2.a)
+(7 rows)
+
+-- self-join under outer join
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on x.a = z.q1;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop Left Join
+   Join Filter: (y.a = z.q1)
+   ->  Seq Scan on sj y
+         Filter: (a IS NOT NULL)
+   ->  Materialize
+         ->  Seq Scan on int8_tbl z
+(6 rows)
+
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on y.a = z.q1;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop Left Join
+   Join Filter: (y.a = z.q1)
+   ->  Seq Scan on sj y
+         Filter: (a IS NOT NULL)
+   ->  Materialize
+         ->  Seq Scan on int8_tbl z
+(6 rows)
+
+-- Test that placeholders are updated correctly after join removal
+explain (costs off)
+select * from (values (1)) x
+left join (select coalesce(y.q1, 1) from int8_tbl y
+	right join sj j1 inner join sj j2 on j1.a = j2.a
+	on true) z
+on true;
+                QUERY PLAN                
+------------------------------------------
+ Nested Loop Left Join
+   ->  Result
+   ->  Nested Loop Left Join
+         ->  Seq Scan on sj j2
+               Filter: (a IS NOT NULL)
+         ->  Materialize
+               ->  Seq Scan on int8_tbl y
+(7 rows)
+
+-- HAVING clause
+explain (costs off)
+select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1;
+           QUERY PLAN            
+---------------------------------
+ HashAggregate
+   Group Key: q.b
+   Filter: (sum(q.a) = 1)
+   ->  Seq Scan on sj q
+         Filter: (a IS NOT NULL)
+(5 rows)
+
+-- update lateral references
+explain (costs off)
+select 1 from (select x.* from sj x, sj y where x.a = y.a) q,
+  lateral generate_series(1, q.a) gs(i);
+                QUERY PLAN                 
+-------------------------------------------
+ Nested Loop
+   ->  Seq Scan on sj y
+         Filter: (a IS NOT NULL)
+   ->  Function Scan on generate_series gs
+(4 rows)
+
+explain (costs off)
+select 1 from (select y.* from sj x, sj y where x.a = y.a) q,
+  lateral generate_series(1, q.a) gs(i);
+                QUERY PLAN                 
+-------------------------------------------
+ Nested Loop
+   ->  Seq Scan on sj y
+         Filter: (a IS NOT NULL)
+   ->  Function Scan on generate_series gs
+(4 rows)
+
+-- Test that a non-EC-derived join clause is processed correctly. Use an
+-- outer join so that we can't form an EC.
+explain (costs off) select * from sj p join sj q on p.a = q.a
+  left join sj r on p.a + q.a = r.a;
+             QUERY PLAN             
+------------------------------------
+ Nested Loop Left Join
+   Join Filter: ((q.a + q.a) = r.a)
+   ->  Seq Scan on sj q
+         Filter: (a IS NOT NULL)
+   ->  Materialize
+         ->  Seq Scan on sj r
+(6 rows)
+
+-- FIXME this constant false filter doesn't look good. Should we merge
+-- equivalence classes?
+explain (costs off)
+select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on sj q
+   Filter: ((a IS NOT NULL) AND (b = 2) AND (b = 1))
+(2 rows)
+
+-- Check that attr_needed is updated correctly after self-join removal. In this
+-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2.
+-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b.
+-- Use index scan for k1 so that we don't get 'b' from physical tlist used for
+-- seqscan. Also disable reordering of joins because this test depends on a
+-- particular join tree.
+create table sk (a int, b int);
+create index on sk(a);
+set join_collapse_limit to 1;
+set enable_seqscan to off;
+explain (costs off) select 1 from
+	(sk k1 join sk k2 on k1.a = k2.a)
+	join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Nested Loop
+   Join Filter: (k1.b = j2.b)
+   ->  Nested Loop
+         ->  Index Scan using sk_a_idx on sk k1
+         ->  Index Only Scan using sk_a_idx on sk k2
+               Index Cond: (a = k1.a)
+   ->  Materialize
+         ->  Index Scan using sj_a_key on sj j2
+               Index Cond: (a IS NOT NULL)
+(9 rows)
+
+explain (costs off) select 1 from
+	(sk k1 join sk k2 on k1.a = k2.a)
+	join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Nested Loop
+   Join Filter: (k1.b = j2.b)
+   ->  Nested Loop
+         ->  Index Scan using sk_a_idx on sk k1
+         ->  Index Only Scan using sk_a_idx on sk k2
+               Index Cond: (a = k1.a)
+   ->  Materialize
+         ->  Index Scan using sj_a_key on sj j2
+               Index Cond: (a IS NOT NULL)
+(9 rows)
+
+reset join_collapse_limit;
+reset enable_seqscan;
+-- We need an index on two columns for the next couple of tests.
+create table sl(a int, b int);
+insert into sl values (1, 1), (1, 2), (2, 1);
+create unique index on sl(a, b);
+vacuum analyze sl;
+-- We can remove the join even if we find the join can't duplicate rows and
+-- the base quals of each side are different.  In the following case we end up
+-- moving quals over to s1 to make it so it can't match any rows.
+explain (costs off)
+select * from sl t1, sl t2
+where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on sl t2
+   Filter: ((a IS NOT NULL) AND (b = 2) AND (b = 1))
+(2 rows)
+
+-- This is broken: one of these queries returns a different number of rows
+-- compared to vanilla
+select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1;
+ a | b | a | b 
+---+---+---+---
+ 1 | 1 | 1 | 1
+ 1 | 1 | 1 | 2
+ 2 | 1 | 2 | 1
+(3 rows)
+
+select * from sl t1, sl t2 where t1.a = t2.a and t2.b = 1;
+ a | b | a | b 
+---+---+---+---
+ 1 | 1 | 1 | 1
+ 1 | 2 | 1 | 1
+ 2 | 1 | 2 | 1
+(3 rows)
+
+-- Check that we can cope with multiple matching unique indexes on each side
+create table sm(a int unique, b int unique, c int unique);
+explain (costs off)
+select * from sm m, sm n where m.a = n.b and m.c = n.c;
+               QUERY PLAN                
+-----------------------------------------
+ Seq Scan on sm n
+   Filter: ((c IS NOT NULL) AND (a = b))
+(2 rows)
+
+explain (costs off)
+select * from sm m, sm n where m.a = n.c and m.b = n.b;
+               QUERY PLAN                
+-----------------------------------------
+ Seq Scan on sm n
+   Filter: ((b IS NOT NULL) AND (a = c))
+(2 rows)
+
+explain (costs off)
+select * from sm m, sm n where m.c = n.b and m.a = n.a;
+               QUERY PLAN                
+-----------------------------------------
+ Seq Scan on sm n
+   Filter: ((a IS NOT NULL) AND (c = b))
+(2 rows)
+
+reset enable_hashjoin;
+reset enable_mergejoin;
+--
 -- Test hints given on incorrect column references are useful
 --
 select t1.uunique1 from
diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql
index 85aa65d..b1e2483 100644
--- a/src/test/regress/sql/equivclass.sql
+++ b/src/test/regress/sql/equivclass.sql
@@ -262,3 +262,20 @@ explain (costs off)
 -- this could be converted, but isn't at present
 explain (costs off)
   select * from tenk1 where unique1 = unique1 or unique2 = unique2;
+
+
+-- Test that broken ECs are processed correctly during self join removal.
+-- Disable merge joins so that we don't get an error about missing commutator.
+-- Test both orientations of the join clause, because only one of them breaks
+-- the EC.
+set enable_mergejoin to off;
+
+explain (costs off)
+  select * from ec0 m join ec0 n on m.ff = n.ff
+  join ec1 p on m.ff + n.ff = p.f1;
+
+explain (costs off)
+  select * from ec0 m join ec0 n on m.ff = n.ff
+  join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1;
+
+reset enable_mergejoin;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index c247509..e4d3757 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1570,6 +1570,123 @@ select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
 
 --
+-- test that semi- or inner self-joins on a unique column are removed
+--
+
+-- enable only nestloop to get more predictable plans
+set enable_hashjoin to off;
+set enable_mergejoin to off;
+
+create table sj (a int unique, b int);
+insert into sj values (1, null), (null, 2), (2, 1);
+analyze sj;
+
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+				where q.a = p.a and q.b < 10);
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+
+-- self-join under outer join
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on x.a = z.q1;
+
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on y.a = z.q1;
+
+-- Test that placeholders are updated correctly after join removal
+explain (costs off)
+select * from (values (1)) x
+left join (select coalesce(y.q1, 1) from int8_tbl y
+	right join sj j1 inner join sj j2 on j1.a = j2.a
+	on true) z
+on true;
+
+-- HAVING clause
+explain (costs off)
+select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1;
+
+-- update lateral references
+explain (costs off)
+select 1 from (select x.* from sj x, sj y where x.a = y.a) q,
+  lateral generate_series(1, q.a) gs(i);
+
+explain (costs off)
+select 1 from (select y.* from sj x, sj y where x.a = y.a) q,
+  lateral generate_series(1, q.a) gs(i);
+
+-- Test that a non-EC-derived join clause is processed correctly. Use an
+-- outer join so that we can't form an EC.
+explain (costs off) select * from sj p join sj q on p.a = q.a
+  left join sj r on p.a + q.a = r.a;
+
+-- FIXME this constant false filter doesn't look good. Should we merge
+-- equivalence classes?
+explain (costs off)
+select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2;
+
+-- Check that attr_needed is updated correctly after self-join removal. In this
+-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2.
+-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b.
+-- Use index scan for k1 so that we don't get 'b' from physical tlist used for
+-- seqscan. Also disable reordering of joins because this test depends on a
+-- particular join tree.
+create table sk (a int, b int);
+create index on sk(a);
+set join_collapse_limit to 1;
+set enable_seqscan to off;
+explain (costs off) select 1 from
+	(sk k1 join sk k2 on k1.a = k2.a)
+	join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b;
+explain (costs off) select 1 from
+	(sk k1 join sk k2 on k1.a = k2.a)
+	join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b;
+reset join_collapse_limit;
+reset enable_seqscan;
+
+-- We need an index on two columns for the next couple of tests.
+create table sl(a int, b int);
+insert into sl values (1, 1), (1, 2), (2, 1);
+create unique index on sl(a, b);
+vacuum analyze sl;
+
+-- We can remove the join even if we find the join can't duplicate rows and
+-- the base quals of each side are different.  In the following case we end up
+-- moving quals over to s1 to make it so it can't match any rows.
+explain (costs off)
+select * from sl t1, sl t2
+where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+
+-- This is broken: one of these queries returns a different number of rows
+-- compared to vanilla
+select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1;
+select * from sl t1, sl t2 where t1.a = t2.a and t2.b = 1;
+
+-- Check that we can cope with multiple matching unique indexes on each side
+create table sm(a int unique, b int unique, c int unique);
+explain (costs off)
+select * from sm m, sm n where m.a = n.b and m.c = n.c;
+explain (costs off)
+select * from sm m, sm n where m.a = n.c and m.b = n.b;
+explain (costs off)
+select * from sm m, sm n where m.c = n.b and m.a = n.a;
+
+reset enable_hashjoin;
+reset enable_mergejoin;
+
+--
 -- Test hints given on incorrect column references are useful
 --
 
-- 
2.7.4

Reply via email to