From faef921c8abf145362530b8d020a450926361a50 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Fri, 3 May 2024 01:43:31 +0300
Subject: [PATCH v2 3/4] Forbid self-join elimination on table sampling scans

Removing relations with table sampling scans could lead to a change in the
query semantics.  There are probably some situations when we can safely do
a self-join elimination by moving table sampling to the remaining relation.
But leave this till the significant interest in this area.

Reported-by: Alexander Lakhin
Discussion: https://postgr.es/m/0ed2cf02-e655-6ec7-a4fe-52fd8d572f65%40gmail.com
Author: Richard Guo, Andrei Lepikhov
Reviewed-by: Alexander Korotkov
---
 src/backend/optimizer/plan/analyzejoins.c |  9 ++++++---
 src/test/regress/expected/join.out        | 19 +++++++++++++++++++
 src/test/regress/sql/join.sql             |  8 ++++++++
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index a46c3ffaa5a..d82d86b37f4 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -2301,11 +2301,14 @@ remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
 			RangeTblEntry *rte = root->simple_rte_array[varno];
 
 			/*
-			 * We only care about base relations from which we select
-			 * something.
+			 * We only consider ordinary relations as candidates to be removed,
+			 * and these relations should not have TABLESAMPLE clauses
+			 * specified.  Removing a relation with TABLESAMPLE clause could
+			 * potentially change the semantics of the query.
 			 */
 			if (rte->rtekind == RTE_RELATION &&
-				rte->relkind == RELKIND_RELATION)
+				rte->relkind == RELKIND_RELATION &&
+				rte->tablesample == NULL)
 			{
 				Assert(!bms_is_member(varno, relids));
 				relids = bms_add_member(relids, varno);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 4e1288814ab..02765a7bc93 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6215,6 +6215,25 @@ select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null;
                Filter: (b IS NOT NULL)
 (6 rows)
 
+-- Ensure that relations with TABLESAMPLE clauses are not considered as
+-- candidates to be removed
+explain (costs off)
+select * from sj t1
+    join lateral
+      (select * from sj tablesample system(t1.b)) s
+    on t1.a = s.a;
+              QUERY PLAN               
+---------------------------------------
+ Nested Loop
+   ->  Seq Scan on sj t1
+   ->  Memoize
+         Cache Key: t1.a, t1.b
+         Cache Mode: binary
+         ->  Sample Scan on sj
+               Sampling: system (t1.b)
+               Filter: (t1.a = a)
+(8 rows)
+
 -- Degenerated case.
 explain (costs off)
 select * from
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index f5d06dbffb1..0a4ea3df19c 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2352,6 +2352,14 @@ where exists (select * from sj q
 explain (costs off)
 select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null;
 
+-- Ensure that relations with TABLESAMPLE clauses are not considered as
+-- candidates to be removed
+explain (costs off)
+select * from sj t1
+    join lateral
+      (select * from sj tablesample system(t1.b)) s
+    on t1.a = s.a;
+
 -- Degenerated case.
 explain (costs off)
 select * from
-- 
2.39.3 (Apple Git-145)

