I wrote: > ... I think the risk/reward ratio for messing with this in the > back branches is unattractive in any case: to fix a corner case that > apparently nobody uses in the field, we risk breaking any number of > mainstream parameterized-path cases. I'm content to commit the v5 patch > (or a successor) into HEAD, but at this point I'm not sure I even want > to risk it in v16, let alone perform delicate surgery to get it to work > in older branches. I think we ought to go with the "tablesample scans > can't be reparameterized" approach in v16 and before.
Concretely, about like this for v16, and similarly in older branches. regards, tom lane
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 5f5596841c..c4ec6ed5e6 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -4087,6 +4087,28 @@ do { \ switch (nodeTag(path)) { case T_Path: + + /* + * If it's a SampleScan with tablesample parameters referencing + * the other relation, we can't reparameterize, because we must + * not change the RTE's contents here. (Doing so would break + * things if we end up using a non-partitionwise join.) + */ + if (path->pathtype == T_SampleScan) + { + Index scan_relid = path->parent->relid; + RangeTblEntry *rte; + + /* it should be a base rel with a tablesample clause... */ + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_RELATION); + Assert(rte->tablesample != NULL); + + if (bms_overlap(pull_varnos(root, (Node *) rte->tablesample), + child_rel->top_parent_relids)) + return NULL; + } FLAT_COPY_PATH(new_path, path, Path); break; diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 6560fe2416..a69a8a70f3 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -505,6 +505,32 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL 550 | | (12 rows) +-- lateral reference in sample scan +SET max_parallel_workers_per_gather = 0; +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 t1 JOIN LATERAL + (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s + ON t1.a = s.a; + QUERY PLAN +--------------------------------------------------------- + Nested Loop + -> Append + -> Seq Scan on prt1_p1 t1_1 + -> Seq Scan on prt1_p2 t1_2 + -> Seq Scan on prt1_p3 t1_3 + -> Append + -> Sample Scan on prt1_p1 t2_1 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: (t1.a = a) + -> Sample Scan on prt1_p2 t2_2 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: (t1.a = a) + -> Sample Scan on prt1_p3 t2_3 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: (t1.a = a) +(15 rows) + +RESET max_parallel_workers_per_gather; -- bug with inadequate sort key representation SET enable_partitionwise_aggregate TO true; SET enable_hashjoin TO false; @@ -1944,6 +1970,40 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL 550 | 0 | 0002 | | | | | (12 rows) +-- partitionwise join with lateral reference in sample scan +SET max_parallel_workers_per_gather = 0; +EXPLAIN (COSTS OFF) +SELECT * FROM prt1_l t1 JOIN LATERAL + (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s ON + t1.a = s.a AND t1.b = s.b AND t1.c = s.c; + QUERY PLAN +---------------------------------------------------------------------------------- + Nested Loop + -> Append + -> Seq Scan on prt1_l_p1 t1_1 + -> Seq Scan on prt1_l_p2_p1 t1_2 + -> Seq Scan on prt1_l_p2_p2 t1_3 + -> Seq Scan on prt1_l_p3_p1 t1_4 + -> Seq Scan on prt1_l_p3_p2 t1_5 + -> Append + -> Sample Scan on prt1_l_p1 t2_1 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text)) + -> Sample Scan on prt1_l_p2_p1 t2_2 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text)) + -> Sample Scan on prt1_l_p2_p2 t2_3 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text)) + -> Sample Scan on prt1_l_p3_p1 t2_4 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text)) + -> Sample Scan on prt1_l_p3_p2 t2_5 + Sampling: system (t1.a) REPEATABLE (t1.b) + Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text)) +(23 rows) + +RESET max_parallel_workers_per_gather; -- join with one side empty EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c; diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index 48daf3aee3..d28248b42d 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -100,6 +100,14 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a; +-- lateral reference in sample scan +SET max_parallel_workers_per_gather = 0; +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 t1 JOIN LATERAL + (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s + ON t1.a = s.a; +RESET max_parallel_workers_per_gather; + -- bug with inadequate sort key representation SET enable_partitionwise_aggregate TO true; SET enable_hashjoin TO false; @@ -387,6 +395,14 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a; +-- partitionwise join with lateral reference in sample scan +SET max_parallel_workers_per_gather = 0; +EXPLAIN (COSTS OFF) +SELECT * FROM prt1_l t1 JOIN LATERAL + (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s ON + t1.a = s.a AND t1.b = s.b AND t1.c = s.c; +RESET max_parallel_workers_per_gather; + -- join with one side empty EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;