Re: allow partial union-all and improve parallel subquery costing

Luc Vlaming Wed, 30 Dec 2020 05:55:03 -0800

On 23-10-2020 07:51, Luc Vlaming wrote:

On 14.10.20 09:38, Luc Vlaming wrote:
Hi,
It seems I ran the wrong make checks to verify everything is correct(make check insteadof make installcheck-world) and this uncovered another regress testchange. I also noticedthe statistics are sometimes giving different row count results so Iincreased the rowstatistics target to make sure the regress output is stable. Updatedpatch attached which
now successfully runs installcheck-world for v13 and master.

Kind regards,
Luc

________________________________________
From: Luc Vlaming <[email protected]>
Sent: Tuesday, October 13, 2020 10:57 AM
To: pgsql-hackers
Subject: allow partial union-all and improve parallel subquery costing

Hi,
While developing some improvements for TPC-DS queries I found out thatwithUNION ALL partial paths are not emitted. Whilst fixing that I alsocame acrossthe subquery costing which does not seem to consider parallelism whendoing
the costing.

I added a simplified testcase in pg-regress to show this goes wrong, and
attached also a before and after explain output of tpc-ds SF100 query 5
based on version 12.4.
I hope I followed all etiquette and these kind of improvements arewelcome.
Kind regards,
Luc
Swarm64
Hi,
Created a commitfest entry assuming this is the right thing to do sothat someone can potentially pick it up during the commitfest.
Kind regards,
Luc
Swarm64


Hi,

Providing an updated patch based on latest master.

Cheers,
Luc

>From 032c48b51ee0d436c91d9db81ce800d91168bd01 Mon Sep 17 00:00:00 2001
From: Luc Vlaming <[email protected]>
Date: Wed, 30 Dec 2020 14:49:48 +0100
Subject: [PATCH v3] Allow partial UNION ALL; improve parallel subquery costing

---
 src/backend/optimizer/path/costsize.c         | 11 ++++
 src/backend/optimizer/prep/prepunion.c        |  4 ++
 .../regress/expected/incremental_sort.out     | 10 ++--
 src/test/regress/expected/union.out           | 52 +++++++++++++++++++
 src/test/regress/sql/union.sql                | 37 +++++++++++++
 5 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 22d6935824..1c3b04c4d7 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1328,6 +1328,17 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
 	startup_cost += path->path.pathtarget->cost.startup;
 	run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
 
+	/* Adjust costing for parallelism, if used. */
+	if (path->path.parallel_workers > 0)
+	{
+    		double          parallel_divisor = get_parallel_divisor(&path->path);
+
+		path->path.rows = clamp_row_est(path->path.rows / parallel_divisor);
+
+		/* The CPU cost is divided among all the workers. */
+		run_cost /= parallel_divisor;
+	}
+
 	path->path.startup_cost += startup_cost;
 	path->path.total_cost += startup_cost + run_cost;
 }
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 745f443e5c..4001eb87f3 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -678,6 +678,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 							   NIL, NULL,
 							   parallel_workers, enable_parallel_append,
 							   NIL, -1);
+
+		if (op->all && enable_parallel_append)
+			add_partial_path(result_rel, ppath);
+
 		ppath = (Path *)
 			create_gather_path(root, result_rel, ppath,
 							   result_rel->reltarget, NULL, NULL);
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index a8cbfd9f5f..40265674c4 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1459,14 +1459,12 @@ explain (costs off) select * from t union select * from t order by 1,3;
    ->  Unique
          ->  Sort
                Sort Key: t.a, t.b, t.c
-               ->  Append
-                     ->  Gather
-                           Workers Planned: 2
+               ->  Gather
+                     Workers Planned: 2
+                     ->  Parallel Append
                            ->  Parallel Seq Scan on t
-                     ->  Gather
-                           Workers Planned: 2
                            ->  Parallel Seq Scan on t t_1
-(13 rows)
+(11 rows)
 
 -- Full sort, not just incremental sort can be pushed below a gather merge path
 -- by generate_useful_gather_paths.
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 75f78db8f5..cf7660f524 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1420,3 +1420,55 @@ where (x = 0) or (q1 >= q2 and q1 <= q2);
  4567890123456789 |  4567890123456789 | 1
 (6 rows)
 
+-- Test handling of appendrel with different types which disables the path flattening and
+-- forces a subquery node. for the subquery node ensure the rowcounts are correct.
+create function check_estimated_rows(text) returns table (estimated int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain %s', $1)
+    loop
+        tmp := regexp_match(ln, 'rows=(\d*)');
+        return query select tmp[1]::int;
+    end loop;
+end;
+$$;
+set min_parallel_table_scan_size = 0;
+set parallel_setup_cost = 0;
+set parallel_tuple_cost = 0;
+set default_statistics_target = 1000;
+explain (costs off)
+select *, 0::int from tenk1 a
+union all
+select *, 1::bigint from tenk1 b;
+                   QUERY PLAN                   
+------------------------------------------------
+ Gather
+   Workers Planned: 2
+   ->  Parallel Append
+         ->  Subquery Scan on "*SELECT* 1"
+               ->  Parallel Seq Scan on tenk1 a
+         ->  Parallel Seq Scan on tenk1 b
+(6 rows)
+
+select check_estimated_rows('select *, 0::int from tenk1 a union all select *, 1::bigint from tenk1 b;');
+ check_estimated_rows 
+----------------------
+                19990
+                     
+                 8330
+                 4165
+                 4165
+                 4165
+(6 rows)
+
+reset parallel_setup_cost;
+reset parallel_tuple_cost;
+reset min_parallel_table_scan_size;
+reset default_statistics_target;
+drop function check_estimated_rows(text);
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index ce22f34c71..ec23991d3e 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -540,3 +540,40 @@ select * from
    union all
    select *, 1 as x from int8_tbl b) ss
 where (x = 0) or (q1 >= q2 and q1 <= q2);
+
+-- Test handling of appendrel with different types which disables the path flattening and
+-- forces a subquery node. for the subquery node ensure the rowcounts are correct.
+create function check_estimated_rows(text) returns table (estimated int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain %s', $1)
+    loop
+        tmp := regexp_match(ln, 'rows=(\d*)');
+        return query select tmp[1]::int;
+    end loop;
+end;
+$$;
+
+set min_parallel_table_scan_size = 0;
+set parallel_setup_cost = 0;
+set parallel_tuple_cost = 0;
+set default_statistics_target = 1000;
+
+explain (costs off)
+select *, 0::int from tenk1 a
+union all
+select *, 1::bigint from tenk1 b;
+
+select check_estimated_rows('select *, 0::int from tenk1 a union all select *, 1::bigint from tenk1 b;');
+
+reset parallel_setup_cost;
+reset parallel_tuple_cost;
+reset min_parallel_table_scan_size;
+reset default_statistics_target;
+drop function check_estimated_rows(text);
\ No newline at end of file
-- 
2.25.1

Re: allow partial union-all and improve parallel subquery costing

Reply via email to