Hi all,
I recently run benchmark[1] on master, but I found performance problem
as below:
explain analyze select
subq_0.c0 as c0,
subq_0.c1 as c1,
subq_0.c2 as c2
from
(select
ref_0.l_shipmode as c0,
sample_0.l_orderkey as c1,
sample_0.l_quantity as c2,
ref_0.l_orderkey as c3,
sample_0.l_shipmode as c5,
ref_0.l_shipinstruct as c6
from
public.lineitem as ref_0
left join public.lineitem as sample_0
on ((select p_partkey from public.part order by p_partkey limit 1)
is not NULL)
where sample_0.l_orderkey is NULL) as subq_0
where subq_0.c5 is NULL
limit 1;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=78.00..45267050.75 rows=1 width=27) (actual
time=299695.097..299695.099 rows=0 loops=1)
InitPlan 1 (returns $0)
-> Limit (cost=78.00..78.00 rows=1 width=8) (actual
time=0.651..0.652 rows=1 loops=1)
-> Sort (cost=78.00..83.00 rows=2000 width=8) (actual
time=0.650..0.651 rows=1 loops=1)
Sort Key: part.p_partkey
Sort Method: top-N heapsort Memory: 25kB
-> Seq Scan on part (cost=0.00..68.00 rows=2000 width=8)
(actual time=0.013..0.428 rows=2000 loops=1)
-> Nested Loop Left Join (cost=0.00..45266972.75 rows=1 width=27)
(actual time=299695.096..299695.096 rows=0 loops=1)
Join Filter: ($0 IS NOT NULL)
Filter: ((sample_0.l_orderkey IS NULL) AND (sample_0.l_shipmode IS
NULL))
Rows Removed by Filter: 3621030625
-> Seq Scan on lineitem ref_0 (cost=0.00..1969.75 rows=60175
width=11) (actual time=0.026..6.225 rows=60175 loops=1)
-> Materialize (cost=0.00..2270.62 rows=60175 width=27) (actual
time=0.000..2.554 rows=60175 loops=60175)
-> Seq Scan on lineitem sample_0 (cost=0.00..1969.75
rows=60175 width=27) (actual time=0.004..8.169 rows=60175 loops=1)
Planning Time: 0.172 ms
Execution Time: 299695.501 ms
(16 rows)
After I set enable_material to off, the same query run faster, as below:
set enable_material = off;
explain analyze select
subq_0.c0 as c0,
subq_0.c1 as c1,
subq_0.c2 as c2
from
(select
ref_0.l_shipmode as c0,
sample_0.l_orderkey as c1,
sample_0.l_quantity as c2,
ref_0.l_orderkey as c3,
sample_0.l_shipmode as c5,
ref_0.l_shipinstruct as c6
from
public.lineitem as ref_0
left join public.lineitem as sample_0
on ((select p_partkey from public.part order by p_partkey limit 1)
is not NULL)
where sample_0.l_orderkey is NULL) as subq_0
where subq_0.c5 is NULL
limit 1;
QUERY
PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=1078.00..91026185.57 rows=1 width=27) (actual
time=192669.605..192670.425 rows=0 loops=1)
InitPlan 1 (returns $0)
-> Limit (cost=78.00..78.00 rows=1 width=8) (actual
time=0.662..0.663 rows=1 loops=1)
-> Sort (cost=78.00..83.00 rows=2000 width=8) (actual
time=0.661..0.662 rows=1 loops=1)
Sort Key: part.p_partkey
Sort Method: top-N heapsort Memory: 25kB
-> Seq Scan on part (cost=0.00..68.00 rows=2000 width=8)
(actual time=0.017..0.430 rows=2000 loops=1)
-> Gather (cost=1000.00..91026107.57 rows=1 width=27) (actual
time=192669.604..192670.422 rows=0 loops=1)
Workers Planned: 1
Params Evaluated: $0
Workers Launched: 1
-> Nested Loop Left Join (cost=0.00..91025107.47 rows=1
width=27) (actual time=192588.143..192588.144 rows=0 loops=2)
Join Filter: ($0 IS NOT NULL)
Filter: ((sample_0.l_orderkey IS NULL) AND
(sample_0.l_shipmode IS NULL))
Rows Removed by Filter: 1810515312
-> Parallel Seq Scan on lineitem ref_0 (cost=0.00..1721.97
rows=35397 width=11) (actual time=0.007..3.797 rows=30088 loops=2)
-> Seq Scan on lineitem sample_0 (cost=0.00..1969.75
rows=60175 width=27) (actual time=0.000..2.637 rows=60175 loops=60175)
Planning Time: 0.174 ms
Execution Time: 192670.458 ms
(19 rows)
I debug the code and find consider_parallel_nestloop() doesn't consider
materialized form of the cheapest inner path.
When enable_material = true, we can see Material path won in first plan,
but Parallel Seq Scan node doesn't add as outer path, which because
in try_partial_nestloop_path() , the cost of nestloop wat computed using
seq scan path not material path.
[1] include test table schema and data, you can repeat above problem.
I try fix this problem in attached patch, and I found pg12.12 also had this
issue. Please review my patch, thanks!
[1] https://github.com/tenderwg/tpch_test
From 7b652382928cc80ab6fbe782feada9f0eab4c5a8 Mon Sep 17 00:00:00 2001
From: "tender.wang" <[email protected]>
Date: Tue, 5 Sep 2023 14:33:24 +0800
Subject: [PATCH] Parallel seq scan should consider materila inner path in
nestloop case.
---
src/backend/optimizer/path/joinpath.c | 16 +++++++++++
src/test/regress/expected/select_parallel.out | 27 +++++++++++++++++++
src/test/regress/sql/select_parallel.sql | 10 +++++++
3 files changed, 53 insertions(+)
diff --git a/src/backend/optimizer/path/joinpath.c
b/src/backend/optimizer/path/joinpath.c
index 821d282497..5a10fb7f4b 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -2004,10 +2004,22 @@ consider_parallel_nestloop(PlannerInfo *root,
{
JoinType save_jointype = jointype;
ListCell *lc1;
+ Path *matpath = NULL;
+ Path *inner_cheapest_total = innerrel->cheapest_total_path;
if (jointype == JOIN_UNIQUE_INNER)
jointype = JOIN_INNER;
+ /*
+ * Consider materializing the cheapest inner path, unless
+ * enable_material is off or the path in question materializes its
+ * output anyway.
+ */
+ if (enable_material && inner_cheapest_total != NULL &&
+ !ExecMaterializesOutput(inner_cheapest_total->pathtype))
+ matpath = (Path *)
+ create_material_path(innerrel, inner_cheapest_total);
+
foreach(lc1, outerrel->partial_pathlist)
{
Path *outerpath = (Path *) lfirst(lc1);
@@ -2064,6 +2076,10 @@ consider_parallel_nestloop(PlannerInfo *root,
try_partial_nestloop_path(root, joinrel,
outerpath, mpath,
pathkeys, jointype, extra);
}
+ /* Also consider materialized form of the cheapest inner path */
+ if (matpath != NULL && matpath->parallel_safe)
+ try_partial_nestloop_path(root, joinrel, outerpath,
matpath,
+
pathkeys, jointype, extra);
}
}
diff --git a/src/test/regress/expected/select_parallel.out
b/src/test/regress/expected/select_parallel.out
index d88353d496..452a3aed07 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -844,6 +844,33 @@ select * from
(12 rows)
reset enable_material;
+-- test materialized form of the cheapest inner path
+set min_parallel_table_scan_size = '512kB';
+explain(costs off) select suq.two, suq.four
+from
+ (select t1.two, t2.four, t1.ten,t2.twenty
+ from tenk1 t1 left join tenk2 t2
+ on ((select q1 from int8_tbl order by q1 limit 1)is not null) where t2.ten is
null) as suq;
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Left Join
+ Join Filter: ($0 IS NOT NULL)
+ Filter: (t2.ten IS NULL)
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Sort
+ Sort Key: int8_tbl.q1
+ -> Seq Scan on int8_tbl
+ -> Gather
+ Workers Planned: 4
+ -> Parallel Seq Scan on tenk1 t1
+ -> Materialize
+ -> Gather
+ Workers Planned: 2
+ -> Parallel Seq Scan on tenk2 t2
+(15 rows)
+
+set min_parallel_table_scan_size = 0;
reset enable_hashagg;
-- check parallelized int8 aggregate (bug #14897)
explain (costs off)
diff --git a/src/test/regress/sql/select_parallel.sql
b/src/test/regress/sql/select_parallel.sql
index 80c914dc02..ded44a0c09 100644
--- a/src/test/regress/sql/select_parallel.sql
+++ b/src/test/regress/sql/select_parallel.sql
@@ -312,6 +312,16 @@ select * from
reset enable_material;
+-- test materialized form of the cheapest inner path
+set min_parallel_table_scan_size = '512kB';
+
+explain(costs off) select suq.two, suq.four
+from
+ (select t1.two, t2.four, t1.ten,t2.twenty
+ from tenk1 t1 left join tenk2 t2
+ on ((select q1 from int8_tbl order by q1 limit 1)is not null) where t2.ten is
null) as suq;
+
+set min_parallel_table_scan_size = 0;
reset enable_hashagg;
-- check parallelized int8 aggregate (bug #14897)
--
2.25.1