Re: Optimization outcome depends on the index order

Andrei Lepikhov Fri, 22 Dec 2023 09:25:10 -0800

On 22/12/2023 11:48, Alexander Korotkov wrote:

 > Because we must trust all predictions made by the planner, we just
 > choose the most trustworthy path. According to the planner logic, it is
 > a path with a smaller selectivity. We can make mistakes anyway just
 > because of the nature of estimation.
Even if we need to take selectivity into account here, it's still notclear why this should be on top of other logic later in add_path().

I got your point now, thanks for pointing it out. In the next version ofthe patch selectivity is used as a criteria only in the case of COSTS_EQUAL.


--
regards,
Andrei Lepikhov
Postgres Professional

From 45bda9784d28dc9cec90c5b33285023a49850800 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <[email protected]>
Date: Mon, 27 Nov 2023 11:23:48 +0700
Subject: [PATCH] Choose an index path with the best selectivity estimation.

In the case when optimizer predicts only one row prefer choosing UNIQUE indexes
In other cases, if optimizer treats indexes as equal, make a last attempt
selecting the index with less selectivity - this decision takes away dependency
on the order of indexes in an index list (good for reproduction of some issues)
and proposes one more objective argument to choose specific index.
---
 src/backend/optimizer/util/pathnode.c         | 32 +++++++++++++++
 src/test/regress/expected/functional_deps.out | 39 +++++++++++++++++++
 src/test/regress/sql/functional_deps.sql      | 32 +++++++++++++++
 3 files changed, 103 insertions(+)

diff --git a/src/backend/optimizer/util/pathnode.c 
b/src/backend/optimizer/util/pathnode.c
index 0b1d17b9d3..984c974b57 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -454,6 +454,38 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                costcmp = compare_path_costs_fuzzily(new_path, old_path,
                                                                                
         STD_FUZZ_FACTOR);
 
+               /*
+                * Apply some heuristics on index paths.
+                */
+               if (costcmp == COSTS_EQUAL)
+               {
+                       IndexPath *inp = (IndexPath *) new_path;
+                       IndexPath *iop = (IndexPath *) old_path;
+
+                       if  (IsA(new_path, IndexPath) && IsA(old_path, 
IndexPath))
+                       {
+                               /*
+                                * When both paths are predicted to produce 
only one tuple,
+                                * the optimiser should prefer choosing a 
unique index scan
+                                * in all cases.
+                                */
+                               if (inp->indexinfo->unique && 
!iop->indexinfo->unique)
+                                       costcmp = COSTS_BETTER1;
+                               else if (!inp->indexinfo->unique && 
iop->indexinfo->unique)
+                                       costcmp = COSTS_BETTER2;
+                               else if (costcmp != COSTS_DIFFERENT)
+                                       /*
+                                        * If the optimiser doesn't have an 
obviously stable choice
+                                        * of unique index, increase the chance 
of avoiding mistakes
+                                        * by choosing an index with smaller 
selectivity.
+                                        * This option makes decision more 
conservative and looks
+                                        * debatable.
+                                        */
+                                       costcmp = (inp->indexselectivity < 
iop->indexselectivity) ?
+                                                                               
                COSTS_BETTER1 : COSTS_BETTER2;
+                       }
+               }
+
                /*
                 * If the two paths compare differently for startup and total 
cost,
                 * then we want to keep both, and we can skip comparing 
pathkeys and
diff --git a/src/test/regress/expected/functional_deps.out 
b/src/test/regress/expected/functional_deps.out
index 32381b8ae7..7057254278 100644
--- a/src/test/regress/expected/functional_deps.out
+++ b/src/test/regress/expected/functional_deps.out
@@ -230,3 +230,42 @@ EXECUTE foo;
 ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT;
 EXECUTE foo;  -- fail
 ERROR:  column "articles.keywords" must appear in the GROUP BY clause or be 
used in an aggregate function
+/*
+ * Corner case of the PostgreSQL optimizer:
+ *
+ * ANDed clauses selectivity multiplication increases total selectivity error.
+ * If such non-true selectivity is so tiny that row estimation predicts the
+ * absolute minimum number of tuples (1), the optimizer can't choose between
+ * different indexes and picks a first from the index list (last created).
+ */
+CREATE TABLE t AS (  -- selectivity(c1)*selectivity(c2)*nrows <= 1
+    SELECT  gs AS c1,
+            gs AS c2,
+            (gs % 10) AS c3, -- not in the good index.
+            (gs % 100) AS c4 -- not in the bad index.
+    FROM generate_series(1,1000) AS gs
+);
+CREATE INDEX bad ON t (c1,c2,c3);
+CREATE INDEX good ON t (c1,c2,c4);
+ANALYZE t;
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE c1=1 AND c2=1 AND c3=1 AND c4=1;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Index Scan using good on t
+   Index Cond: ((c1 = 1) AND (c2 = 1) AND (c4 = 1))
+   Filter: (c3 = 1)
+(3 rows)
+
+-- Hack: set the bad index to the first position in the index list.
+DROP INDEX bad;
+CREATE INDEX bad ON t (c1,c2,c3);
+ANALYZE t;
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE c1=1 AND c2=1 AND c3=1 AND c4=1;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Index Scan using good on t
+   Index Cond: ((c1 = 1) AND (c2 = 1) AND (c4 = 1))
+   Filter: (c3 = 1)
+(3 rows)
+
+DROP TABLE t CASCADE;
diff --git a/src/test/regress/sql/functional_deps.sql 
b/src/test/regress/sql/functional_deps.sql
index 406490b995..1be009b1ff 100644
--- a/src/test/regress/sql/functional_deps.sql
+++ b/src/test/regress/sql/functional_deps.sql
@@ -208,3 +208,35 @@ EXECUTE foo;
 ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT;
 
 EXECUTE foo;  -- fail
+
+/*
+ * Corner case of the PostgreSQL optimizer:
+ *
+ * ANDed clauses selectivity multiplication increases total selectivity error.
+ * If such non-true selectivity is so tiny that row estimation predicts the
+ * absolute minimum number of tuples (1), the optimizer can't choose between
+ * different indexes and picks a first from the index list (last created).
+ */
+
+CREATE TABLE t AS (  -- selectivity(c1)*selectivity(c2)*nrows <= 1
+    SELECT  gs AS c1,
+            gs AS c2,
+            (gs % 10) AS c3, -- not in the good index.
+            (gs % 100) AS c4 -- not in the bad index.
+    FROM generate_series(1,1000) AS gs
+);
+
+CREATE INDEX bad ON t (c1,c2,c3);
+CREATE INDEX good ON t (c1,c2,c4);
+ANALYZE t;
+
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE c1=1 AND c2=1 AND c3=1 AND c4=1;
+
+-- Hack: set the bad index to the first position in the index list.
+DROP INDEX bad;
+CREATE INDEX bad ON t (c1,c2,c3);
+ANALYZE t;
+
+EXPLAIN (COSTS OFF) SELECT * FROM t WHERE c1=1 AND c2=1 AND c3=1 AND c4=1;
+
+DROP TABLE t CASCADE;
-- 
2.43.0

Re: Optimization outcome depends on the index order

Reply via email to