Hi!

I have reviewed your patch and I noticed a few things.

First of all, I think I found a bug in your latest patch version, and this query shows it:

EXPLAIN (COSTS OFF)
SELECT c.oid, e.oid FROM pg_class c FULL JOIN (
  SELECT e1.oid FROM pg_extension e1, pg_extension e2
  WHERE e1.oid=e2.oid) AS e
  ON c.oid=e.oid;

In the current version we get such a query plan:

               QUERY PLAN
-----------------------------------------
 Hash Full Join
   Hash Cond: (c.oid = e2.oid)
   ->  Seq Scan on pg_class c
   ->  Hash
         ->  Seq Scan on pg_extension e2
(5 rows)

But I think it should be:

QUERY PLAN
-----------------------------------------
Hash Full Join
Hash Cond: (c.oid = e2.oid)
-> Seq Scan on pg_class c
-> Hash
-> Seq Scan on pg_extension e2
*Filter: (oid IS NOT NULL)*
(6 rows)

I have looked at the latest version of the code, I assume that the error lies in the replace_varno_walker function, especially in the place where we check the node by type Var, and does not form any NullTest node.

if (OidIsValid(reloid) && get_attnotnull(reloid, attnum)) -- this condition works
        {
          rinfo->clause = (Expr *) makeBoolConst(true, false);
        }
        else
        {
          NullTest   *ntest = makeNode(NullTest);

          ntest->arg = leftOp;
          ntest->nulltesttype = IS_NOT_NULL;
          ntest->argisrow = false;
          ntest->location = -1;
          rinfo->clause = (Expr *) ntest;
        }


Secondly, I added some code in some places to catch erroneous cases and added a condition when we should not try to apply the self-join-removal transformation due to the absence of an empty self-join list after searching for it and in general if there are no joins in the query. Besides, I added a query for testing and wrote about it above. I have attached my diff file.


In addition, I found a comment for myself that was not clear to me. I would be glad if you could explain it to me.

You mentioned superior outer join in the comment, unfortunately, I didn't find anything about it in the PostgreSQL code, and this explanation remained unclear to me. Could you explain in more detail what you meant?

/*
 * At this stage joininfo lists of inner and outer can contain
 * only clauses, required for *a superior outer join* that can't
 * influence on this optimization. So, we can avoid to call the
 * build_joinrel_restrictlist() routine.
*/
 restrictlist = generate_join_implied_equalities(root, joinrelids,
                              inner->relids,
                              outer, NULL);

--

Regards,
Alena Rybakina
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 3e10083905c..5ba5ca693f1 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -1704,6 +1704,8 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
 	List			   *binfo_candidates = NIL;
 	ReplaceVarnoContext	ctx = {.from = toRemove->relid, .to = toKeep->relid};
 
+	Assert(toKeep->relid != -1);
+
 	/*
 	 * Replace index of removing table with the keeping one. The technique of
 	 * removing/distributing restrictinfo is used here to attach just appeared
@@ -2007,6 +2009,8 @@ match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
 				/* Don't consider clauses which aren't similar to 'F(X)=G(Y)' */
 				continue;
 
+			Assert(is_opclause(orinfo->clause));
+
 			oclause = bms_is_empty(orinfo->left_relids) ?
 					get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
 			c2 = (bms_is_empty(orinfo->left_relids) ?
@@ -2150,6 +2154,18 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids)
 			split_selfjoin_quals(root, restrictlist, &selfjoinquals,
 								 &otherjoinquals, inner->relid, outer->relid);
 
+			if (list_length(selfjoinquals) == 0)
+ 			{
+ 				/*
+ 				 * XXX:
+ 				 * we would detect self-join without quals like 'x==x' if we had
+ 				 * an foreign key constraint on some of other quals and this join
+ 				 * haven't any columns from the outer in the target list.
+ 				 * But it is still complex task.
+ 				 */
+ 				continue;
+ 			}
+
 			/*
 			 * To enable SJE for the only degenerate case without any self join
 			 * clauses at all, add baserestrictinfo to this list.
@@ -2332,7 +2348,7 @@ remove_useless_self_joins(PlannerInfo *root, List *joinlist)
 	Relids	ToRemove = NULL;
 	int		relid = -1;
 
-	if (!enable_self_join_removal)
+	if ((list_length(joinlist) <=1 && !IsA(linitial(joinlist), List)) || !enable_self_join_removal)
 		return joinlist;
 
 	/*
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 10b23944feb..800410d6b18 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5807,13 +5807,11 @@ explain (costs off)
 select p.* from
   (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
   where p.k = 1 and p.k = 2;
-                   QUERY PLAN                   
-------------------------------------------------
+        QUERY PLAN        
+--------------------------
  Result
    One-Time Filter: false
-   ->  Index Scan using parent_pkey on parent x
-         Index Cond: (k = 1)
-(4 rows)
+(2 rows)
 
 -- bug 5255: this is not optimizable by join removal
 begin;
@@ -6759,6 +6757,20 @@ explain (costs off) -- Collapse both self joins
    Filter: (a IS NOT NULL)
 (2 rows)
 
+EXPLAIN (COSTS OFF)
+SELECT c.oid, e.oid FROM pg_class c FULL JOIN (
+  SELECT e1.oid FROM pg_extension e1, pg_extension e2
+  WHERE e1.oid=e2.oid) AS e
+  ON c.oid=e.oid;
+               QUERY PLAN                
+-----------------------------------------
+ Hash Full Join
+   Hash Cond: (c.oid = e2.oid)
+   ->  Seq Scan on pg_class c
+   ->  Hash
+         ->  Seq Scan on pg_extension e2
+(5 rows)
+
 reset self_join_search_limit;
 -- Check that clauses from the join filter list is not lost on the self-join removal
 CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int);
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index a73c1f90c4a..1950e6f281f 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -2499,13 +2499,16 @@ SELECT * FROM rw_view1;
 (1 row)
 
 EXPLAIN (costs off) DELETE FROM rw_view1 WHERE id = 1 AND snoop(data);
-                    QUERY PLAN                    
---------------------------------------------------
- Update on base_tbl
-   ->  Index Scan using base_tbl_pkey on base_tbl
-         Index Cond: (id = 1)
-         Filter: ((NOT deleted) AND snoop(data))
-(4 rows)
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Update on base_tbl base_tbl_1
+   ->  Nested Loop
+         ->  Index Scan using base_tbl_pkey on base_tbl base_tbl_1
+               Index Cond: (id = 1)
+         ->  Index Scan using base_tbl_pkey on base_tbl
+               Index Cond: (id = 1)
+               Filter: ((NOT deleted) AND snoop(data))
+(7 rows)
 
 DELETE FROM rw_view1 WHERE id = 1 AND snoop(data);
 NOTICE:  snooped value: Row 1
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 1a643638372..4d49c0767a0 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2564,6 +2564,11 @@ explain (costs off) -- Collapse one self join
 set self_join_search_limit to 3;
 explain (costs off) -- Collapse both self joins
   select j1.* from sj j1, sj j2, sj j3 where (j1.a = j2.a) and (j2.a = j3.a);
+EXPLAIN (COSTS OFF)
+SELECT c.oid, e.oid FROM pg_class c FULL JOIN (
+  SELECT e1.oid FROM pg_extension e1, pg_extension e2
+  WHERE e1.oid=e2.oid) AS e
+  ON c.oid=e.oid;
 reset self_join_search_limit;
 
 -- Check that clauses from the join filter list is not lost on the self-join removal

Reply via email to