Hello,

Currently, in case of alternative subplans that do hashed vs per-row lookups,
the per-row estimate is used when planning the rest of the query.
It's also coded in not quite an explicit way.

In [1] we found a situation where it leads to a suboptimal plan,
as it bloats the overall cost into large figures,
a decision related to an outer part of the plan look negligible to the planner,
and as a result it doesn't elaborate on choosing the optimal one.

The patch is to fix it. Our linear model for costs cannot quite accommodate
the piecewise linear matter of alternative subplans,
so it is based on ugly heuristics and still cannot be very precise,
but I think it's better than the current one.

Thoughts?

Best, Alex

[1] https://www.postgresql.org/message-id/flat/ff42b25b-ff03-27f8-ed11-b8255d658cd5%40imap.cc
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b976afb69d..5e2c5ec822 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -92,6 +92,7 @@
 #include "optimizer/planmain.h"
 #include "optimizer/restrictinfo.h"
 #include "parser/parsetree.h"
+#include "utils/float.h"
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
 #include "utils/spccache.h"
@@ -4283,15 +4284,57 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
 	else if (IsA(node, AlternativeSubPlan))
 	{
 		/*
-		 * Arbitrarily use the first alternative plan for costing.  (We should
-		 * certainly only include one alternative, and we don't yet have
-		 * enough information to know which one the executor is most likely to
-		 * use.)
+		 * Estimate the cost as some mean of the alternatives.
+		 * It's not uncommon for the alternative costs to be of different
+		 * orders of magnitude, so arithmetic mean would be too biased towards
+		 * the slower one. That's why for per-tuple coefficient we use geometric
+		 * mean.
+		 *
+		 * It's tempting to use geometric mean for startup costs too, but
+		 * one of them can be zero, which will result in substantial
+		 * underestimation. So instead we estimate the cost of returning *first*
+		 * tuple as geometric mean of single-tuple costs for the alternatives.
 		 */
-		AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
+		List   *subplans = ((AlternativeSubPlan *) node)->subplans;
+		Cost 	per_tuple_mean = 1;
+		Cost 	first_tuple_mean = 1;
+		Cost 	startup_min = get_float8_infinity();
+		Cost 	startup_max = 0;
+		Cost 	startup_mean;
+		ListCell   *lc;
+
+		foreach(lc, subplans)
+		{
+			cost_qual_eval_context subplanContext;
+
+			subplanContext.root = context->root;
+			subplanContext.total.startup = 0;
+			subplanContext.total.per_tuple = 0;
 
-		return cost_qual_eval_walker((Node *) linitial(asplan->subplans),
-									 context);
+			cost_qual_eval_walker((Node *) lfirst(lc), &subplanContext);
+
+			Assert(subplanContext.total.startup >= 0);
+			Assert(subplanContext.total.per_tuple >= 0);
+
+			per_tuple_mean *= subplanContext.total.per_tuple;
+			first_tuple_mean *= subplanContext.total.startup + subplanContext.total.per_tuple;
+			startup_min = float8_min(startup_min, subplanContext.total.startup);
+			startup_max = float8_max(startup_min, subplanContext.total.startup);
+		}
+		per_tuple_mean =
+			pow(per_tuple_mean, 1.0 / list_length(subplans));
+		first_tuple_mean =
+			pow(first_tuple_mean, 1.0 / list_length(subplans));
+		startup_mean = first_tuple_mean - per_tuple_mean;
+		/* make sure this calculation makes a sane value */
+		startup_mean = float8_max(startup_mean, startup_min);
+		startup_mean = float8_min(startup_mean, startup_max);
+
+		context->total.startup += startup_mean;
+		context->total.per_tuple += per_tuple_mean;
+
+		/* We've already recursed into children, skip the generic recursion */
+		return false;
 	}
 	else if (IsA(node, PlaceHolderVar))
 	{
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 5de53f2782..98787e75e0 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -2301,17 +2301,16 @@ SELECT * FROM v1 WHERE a=8;
 
 EXPLAIN (VERBOSE, COSTS OFF)
 UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a < 7 AND a != 6;
-                                                        QUERY PLAN                                                         
----------------------------------------------------------------------------------------------------------------------------
+                                                                         QUERY PLAN                                                                          
+-------------------------------------------------------------------------------------------------------------------------------------------------------------
  Update on public.t1
    Update on public.t1
    Update on public.t11 t1_1
    Update on public.t12 t1_2
    Update on public.t111 t1_3
-   ->  Index Scan using t1_a_idx on public.t1
+   ->  Seq Scan on public.t1
          Output: 100, t1.b, t1.c, t1.ctid
-         Index Cond: ((t1.a > 5) AND (t1.a < 7))
-         Filter: ((t1.a <> 6) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1.a) AND leakproof(t1.a))
+         Filter: ((t1.a > 5) AND (t1.a < 7) AND (t1.a <> 6) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1.a) AND leakproof(t1.a))
          SubPlan 1
            ->  Append
                  ->  Seq Scan on public.t12 t12_1
@@ -2324,19 +2323,16 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a < 7 AND a != 6;
                        Output: t12_4.a
                  ->  Seq Scan on public.t111 t12_5
                        Output: t12_5.a
-   ->  Index Scan using t11_a_idx on public.t11 t1_1
+   ->  Seq Scan on public.t11 t1_1
          Output: 100, t1_1.b, t1_1.c, t1_1.d, t1_1.ctid
-         Index Cond: ((t1_1.a > 5) AND (t1_1.a < 7))
-         Filter: ((t1_1.a <> 6) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_1.a) AND leakproof(t1_1.a))
-   ->  Index Scan using t12_a_idx on public.t12 t1_2
+         Filter: ((t1_1.a > 5) AND (t1_1.a < 7) AND (t1_1.a <> 6) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_1.a) AND leakproof(t1_1.a))
+   ->  Seq Scan on public.t12 t1_2
          Output: 100, t1_2.b, t1_2.c, t1_2.e, t1_2.ctid
-         Index Cond: ((t1_2.a > 5) AND (t1_2.a < 7))
-         Filter: ((t1_2.a <> 6) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_2.a) AND leakproof(t1_2.a))
-   ->  Index Scan using t111_a_idx on public.t111 t1_3
+         Filter: ((t1_2.a > 5) AND (t1_2.a < 7) AND (t1_2.a <> 6) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_2.a) AND leakproof(t1_2.a))
+   ->  Seq Scan on public.t111 t1_3
          Output: 100, t1_3.b, t1_3.c, t1_3.d, t1_3.e, t1_3.ctid
-         Index Cond: ((t1_3.a > 5) AND (t1_3.a < 7))
-         Filter: ((t1_3.a <> 6) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_3.a) AND leakproof(t1_3.a))
-(33 rows)
+         Filter: ((t1_3.a > 5) AND (t1_3.a < 7) AND (t1_3.a <> 6) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_3.a) AND leakproof(t1_3.a))
+(29 rows)
 
 UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a < 7 AND a != 6;
 SELECT * FROM v1 WHERE a=100; -- Nothing should have been changed to 100
@@ -2351,17 +2347,16 @@ SELECT * FROM t1 WHERE a=100; -- Nothing should have been changed to 100
 
 EXPLAIN (VERBOSE, COSTS OFF)
 UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8;
-                                               QUERY PLAN                                                
----------------------------------------------------------------------------------------------------------
+                                                                QUERY PLAN                                                                 
+-------------------------------------------------------------------------------------------------------------------------------------------
  Update on public.t1
    Update on public.t1
    Update on public.t11 t1_1
    Update on public.t12 t1_2
    Update on public.t111 t1_3
-   ->  Index Scan using t1_a_idx on public.t1
+   ->  Seq Scan on public.t1
          Output: (t1.a + 1), t1.b, t1.c, t1.ctid
-         Index Cond: ((t1.a > 5) AND (t1.a = 8))
-         Filter: ((alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1.a) AND leakproof(t1.a))
+         Filter: ((t1.a > 5) AND (t1.a = 8) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1.a) AND leakproof(t1.a))
          SubPlan 1
            ->  Append
                  ->  Seq Scan on public.t12 t12_1
@@ -2374,19 +2369,16 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8;
                        Output: t12_4.a
                  ->  Seq Scan on public.t111 t12_5
                        Output: t12_5.a
-   ->  Index Scan using t11_a_idx on public.t11 t1_1
+   ->  Seq Scan on public.t11 t1_1
          Output: (t1_1.a + 1), t1_1.b, t1_1.c, t1_1.d, t1_1.ctid
-         Index Cond: ((t1_1.a > 5) AND (t1_1.a = 8))
-         Filter: ((alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_1.a) AND leakproof(t1_1.a))
-   ->  Index Scan using t12_a_idx on public.t12 t1_2
+         Filter: ((t1_1.a > 5) AND (t1_1.a = 8) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_1.a) AND leakproof(t1_1.a))
+   ->  Seq Scan on public.t12 t1_2
          Output: (t1_2.a + 1), t1_2.b, t1_2.c, t1_2.e, t1_2.ctid
-         Index Cond: ((t1_2.a > 5) AND (t1_2.a = 8))
-         Filter: ((alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_2.a) AND leakproof(t1_2.a))
-   ->  Index Scan using t111_a_idx on public.t111 t1_3
+         Filter: ((t1_2.a > 5) AND (t1_2.a = 8) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_2.a) AND leakproof(t1_2.a))
+   ->  Seq Scan on public.t111 t1_3
          Output: (t1_3.a + 1), t1_3.b, t1_3.c, t1_3.d, t1_3.e, t1_3.ctid
-         Index Cond: ((t1_3.a > 5) AND (t1_3.a = 8))
-         Filter: ((alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_3.a) AND leakproof(t1_3.a))
-(33 rows)
+         Filter: ((t1_3.a > 5) AND (t1_3.a = 8) AND (alternatives: SubPlan 1 or hashed SubPlan 2) AND snoop(t1_3.a) AND leakproof(t1_3.a))
+(29 rows)
 
 UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8;
 NOTICE:  snooped value: 8

Reply via email to