Hi,

While designing an improvement for the cost sort model, I discovered that the query plan can vary if we slightly change the query text without pushing semantic differences. See the example below:

CREATE TABLE test(x integer, y integer,z text);
INSERT INTO test (x,y) SELECT x, 1 FROM generate_series(1,1000000) AS x;
CREATE INDEX ON test(x);
CREATE INDEX ON test(y);
VACUUM ANALYZE;
SET max_parallel_workers_per_gather = 0;

First query:

EXPLAIN (ANALYZE, TIMING OFF) SELECT count(*) FROM test t1, test t2
WHERE t1.x=t2.y AND t1.y=t2.x GROUP BY t1.x,t1.y;

And the second one - just reverse the left and right sides of expressions in the WHERE condition:

EXPLAIN (ANALYZE, TIMING OFF) SELECT count(*) FROM test t1, test t2
WHERE t2.y=t1.x AND t2.x=t1.y GROUP BY t1.x,t1.y;

You can see two different plans here:

GroupAggregate  (cost=37824.89..37824.96 rows=1 width=16)
  Group Key: t1.y, t1.x
  ->  Incremental Sort  (cost=37824.89..37824.94 rows=2 width=8)
        Sort Key: t1.y, t1.x
        Presorted Key: t1.y
        ->  Merge Join  (cost=0.85..37824.88 rows=1 width=8)
              Merge Cond: (t1.y = t2.x)
              Join Filter: (t2.y = t1.x)
              ->  Index Scan using test_y_idx on test t1
              ->  Index Scan using test_x_idx on test t2

GroupAggregate  (cost=37824.89..37824.92 rows=1 width=16)
  Group Key: t1.x, t1.y
  ->  Sort  (cost=37824.89..37824.90 rows=1 width=8)
        Sort Key: t1.x, t1.y
        Sort Method: quicksort  Memory: 25kB
        ->  Merge Join  (cost=0.85..37824.88 rows=1 width=8)
              Merge Cond: (t1.y = t2.x)
              Join Filter: (t2.y = t1.x)
              ->  Index Scan using test_y_idx on test t1
              ->  Index Scan using test_x_idx on test t2

Don't mind for now that the best plan is to do IncrementalSort with presorted key t1.x. Just pay attention to the fact that the plan has altered without any valuable reason. The cost_incremental_sort() routine causes such behaviour: it chooses the expression to estimate the number of groups from the first EquivalenceClass member that relies on the syntax. I tried to invent a simple solution to fight this minor case. But the most clear and straightforward way here is to save a reference to the expression that triggered the PathKey creation inside the PathKey itself.
See the sketch of the patch in the attachment.
I'm not sure this instability is worth fixing this way, but the dependence of the optimisation outcome on the query text looks buggy.

--
regards, Andrei Lepikhov
From 8eb9998fe3306005d5067e53fef6a032515ebed5 Mon Sep 17 00:00:00 2001
From: "Andrei V. Lepikhov" <lepi...@gmail.com>
Date: Wed, 26 Jun 2024 16:28:12 +0700
Subject: [PATCH] Add a reference to the expression which has triggered
 creation of this pathkey.

For the sake of consistency in number of groups estimation in incremental sort
we need to have an information about expression which caused the pathkey. It
doesn't guarantee correct estimation but at least estimation doesn't rely on
a SQL string.
---
 contrib/postgres_fdw/postgres_fdw.c   |  6 +++--
 src/backend/optimizer/path/costsize.c | 28 +++++++++++++++++++----
 src/backend/optimizer/path/pathkeys.c | 33 ++++++++++++++++++++++-----
 src/include/nodes/pathnodes.h         |  2 ++
 src/include/optimizer/paths.h         |  3 ++-
 5 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/contrib/postgres_fdw/postgres_fdw.c 
b/contrib/postgres_fdw/postgres_fdw.c
index 0bb9a5ae8f..da26d99fb1 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -976,6 +976,7 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, 
RelOptInfo *rel)
        foreach(lc, useful_eclass_list)
        {
                EquivalenceClass *cur_ec = lfirst(lc);
+               EquivalenceMember *em;
                PathKey    *pathkey;
 
                /* If redundant with what we did above, skip it. */
@@ -988,14 +989,15 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, 
RelOptInfo *rel)
                        continue;
 
                /* If no pushable expression for this rel, skip it. */
-               if (find_em_for_rel(root, cur_ec, rel) == NULL)
+               if ((em = find_em_for_rel(root, cur_ec, rel)) == NULL)
                        continue;
 
                /* Looks like we can generate a pathkey, so let's do it. */
                pathkey = make_canonical_pathkey(root, cur_ec,
                                                                                
 linitial_oid(cur_ec->ec_opfamilies),
                                                                                
 BTLessStrategyNumber,
-                                                                               
 false);
+                                                                               
 false,
+                                                                               
 em->em_expr); /* TODO */
                useful_pathkeys_list = lappend(useful_pathkeys_list,
                                                                           
list_make1(pathkey));
        }
diff --git a/src/backend/optimizer/path/costsize.c 
b/src/backend/optimizer/path/costsize.c
index ee23ed7835..a64f3e5797 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2038,21 +2038,41 @@ cost_incremental_sort(Path *path,
        foreach(l, pathkeys)
        {
                PathKey    *key = (PathKey *) lfirst(l);
-               EquivalenceMember *member = (EquivalenceMember *)
-                       linitial(key->pk_eclass->ec_members);
+
+#ifdef USE_ASSERT_CHECKING
+               ListCell *lc;
+
+               /* Be paranoid, but caring about performace don't check on prod 
*/
+               foreach(lc, key->pk_eclass->ec_members)
+               {
+                       if (find_ec_member_matching_expr(
+                                                               key->pk_eclass, 
key->source_expr,
+                                                               
pull_varnos(root, (Node *) key->source_expr)))
+                               break;
+               }
+               if (!lc)
+                       /*
+                        * XXX: Can we build PathKey over derived expression 
and does it
+                        * trigger this error?
+                        */
+                       elog(ERROR, "PathKey source expression must be a part 
of EQ class");
+#endif
+
+               /* Reassure ourselves no one created pathkeys alternative way */
+               Assert(key->source_expr != NULL);
 
                /*
                 * Check if the expression contains Var with "varno 0" so that 
we
                 * don't call estimate_num_groups in that case.
                 */
-               if (bms_is_member(0, pull_varnos(root, (Node *) 
member->em_expr)))
+               if (bms_is_member(0, pull_varnos(root, (Node *) 
key->source_expr)))
                {
                        unknown_varno = true;
                        break;
                }
 
                /* expression not containing any Vars with "varno 0" */
-               presortedExprs = lappend(presortedExprs, member->em_expr);
+               presortedExprs = lappend(presortedExprs, key->source_expr);
 
                if (foreach_current_index(l) + 1 >= presorted_keys)
                        break;
diff --git a/src/backend/optimizer/path/pathkeys.c 
b/src/backend/optimizer/path/pathkeys.c
index 416fc4e240..d859e79208 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -54,7 +54,7 @@ static bool right_merge_direction(PlannerInfo *root, PathKey 
*pathkey);
 PathKey *
 make_canonical_pathkey(PlannerInfo *root,
                                           EquivalenceClass *eclass, Oid 
opfamily,
-                                          int strategy, bool nulls_first)
+                                          int strategy, bool nulls_first, Expr 
*src_expr)
 {
        PathKey    *pk;
        ListCell   *lc;
@@ -89,6 +89,7 @@ make_canonical_pathkey(PlannerInfo *root,
        pk->pk_opfamily = opfamily;
        pk->pk_strategy = strategy;
        pk->pk_nulls_first = nulls_first;
+       pk->source_expr = src_expr;
 
        root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
 
@@ -241,7 +242,7 @@ make_pathkey_from_sortinfo(PlannerInfo *root,
 
        /* And finally we can find or create a PathKey node */
        return make_canonical_pathkey(root, eclass, opfamily,
-                                                                 strategy, 
nulls_first);
+                                                                 strategy, 
nulls_first, expr);
 }
 
 /*
@@ -1117,7 +1118,8 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo 
*rel,
                                                                                
           outer_ec,
                                                                                
           sub_pathkey->pk_opfamily,
                                                                                
           sub_pathkey->pk_strategy,
-                                                                               
           sub_pathkey->pk_nulls_first);
+                                                                               
           sub_pathkey->pk_nulls_first,
+                                                                               
           (Expr *) outer_var);
                        }
                }
                else
@@ -1199,7 +1201,8 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo 
*rel,
                                                                                
                          outer_ec,
                                                                                
                          sub_pathkey->pk_opfamily,
                                                                                
                          sub_pathkey->pk_strategy,
-                                                                               
                          sub_pathkey->pk_nulls_first);
+                                                                               
                          sub_pathkey->pk_nulls_first,
+                                                                               
                          (Expr *) outer_var);
                                        /* score = # of equivalence peers */
                                        score = 
list_length(outer_ec->ec_members) - 1;
                                        /* +1 if it matches the proper 
query_pathkeys item */
@@ -1643,6 +1646,7 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
        List       *pathkeys = NIL;
        int                     nClauses = list_length(mergeclauses);
        EquivalenceClass **ecs;
+       Expr                     **exprs;
        int                *scores;
        int                     necs;
        ListCell   *lc;
@@ -1657,6 +1661,7 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
         * duplicates) and their "popularity" scores.
         */
        ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass 
*));
+       exprs = (Expr **) palloc(nClauses * sizeof(Expr *));
        scores = (int *) palloc(nClauses * sizeof(int));
        necs = 0;
 
@@ -1666,14 +1671,21 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
                EquivalenceClass *oeclass;
                int                     score;
                ListCell   *lc2;
+               Expr       *expr;
 
                /* get the outer eclass */
                update_mergeclause_eclasses(root, rinfo);
 
                if (rinfo->outer_is_left)
+               {
                        oeclass = rinfo->left_ec;
+                       expr = (Expr *) linitial(((OpExpr *) 
rinfo->clause)->args);
+               }
                else
+               {
                        oeclass = rinfo->right_ec;
+                       expr = (Expr *) lsecond(((OpExpr *) 
rinfo->clause)->args);
+               }
 
                /* reject duplicates */
                for (j = 0; j < necs; j++)
@@ -1697,6 +1709,8 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
                }
 
                ecs[necs] = oeclass;
+               exprs[necs] = expr;
+
                scores[necs] = score;
                necs++;
        }
@@ -1798,7 +1812,8 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
                                                                                
 ec,
                                                                                
 linitial_oid(ec->ec_opfamilies),
                                                                                
 BTLessStrategyNumber,
-                                                                               
 false);
+                                                                               
 false,
+                                                                               
 exprs[best_j]);
                /* can't be redundant because no duplicate ECs */
                Assert(!pathkey_is_redundant(pathkey, pathkeys));
                pathkeys = lappend(pathkeys, pathkey);
@@ -1852,18 +1867,23 @@ make_inner_pathkeys_for_merge(PlannerInfo *root,
                EquivalenceClass *oeclass;
                EquivalenceClass *ieclass;
                PathKey    *pathkey;
+               Expr       *src_expr;
 
                update_mergeclause_eclasses(root, rinfo);
 
+               Assert(IsA(rinfo->clause, OpExpr) && rinfo->orclause == NULL);
+
                if (rinfo->outer_is_left)
                {
                        oeclass = rinfo->left_ec;
                        ieclass = rinfo->right_ec;
+                       src_expr = (Expr *) lsecond(((OpExpr *) 
rinfo->clause)->args);
                }
                else
                {
                        oeclass = rinfo->right_ec;
                        ieclass = rinfo->left_ec;
+                       src_expr = (Expr *) linitial(((OpExpr *) 
rinfo->clause)->args);
                }
 
                /* outer eclass should match current or next pathkeys */
@@ -1891,7 +1911,8 @@ make_inner_pathkeys_for_merge(PlannerInfo *root,
                                                                                
         ieclass,
                                                                                
         opathkey->pk_opfamily,
                                                                                
         opathkey->pk_strategy,
-                                                                               
         opathkey->pk_nulls_first);
+                                                                               
         opathkey->pk_nulls_first,
+                                                                               
         src_expr);
 
                /*
                 * Don't generate redundant pathkeys (which can happen if 
multiple
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 2ba297c117..7e99725e00 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1469,6 +1469,8 @@ typedef struct PathKey
        Oid                     pk_opfamily;    /* btree opfamily defining the 
ordering */
        int                     pk_strategy;    /* sort direction (ASC or DESC) 
*/
        bool            pk_nulls_first; /* do NULLs come before normal values? 
*/
+
+       Expr       *source_expr;        /* Expression, which triggered this 
creation */
 } PathKey;
 
 /*
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 5e88c0224a..4b99229ebf 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -264,7 +264,8 @@ extern bool has_useful_pathkeys(PlannerInfo *root, 
RelOptInfo *rel);
 extern List *append_pathkeys(List *target, List *source);
 extern PathKey *make_canonical_pathkey(PlannerInfo *root,
                                                                           
EquivalenceClass *eclass, Oid opfamily,
-                                                                          int 
strategy, bool nulls_first);
+                                                                          int 
strategy, bool nulls_first,
+                                                                          Expr 
*src_expr);
 extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
                                                                        List 
*live_childrels);
 
-- 
2.45.2

Reply via email to