This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch REL_2_STABLE
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit b80b084ad455691d176a9b1d601109091ed552c6
Author: Tom Lane <[email protected]>
AuthorDate: Fri Aug 5 13:58:37 2022 -0400

    Fix non-bulletproof ScalarArrayOpExpr code for extended statistics.
    
    statext_is_compatible_clause_internal() checked that the arguments
    of a ScalarArrayOpExpr are one Var and one Const, but it would allow
    cases where the Const was on the left.  Subsequent uses of the clause
    are not expecting that and would suffer assertion failures or core
    dumps.  mcv.c also had not bothered to cope with the case of a NULL
    array constant, which seems really unacceptably sloppy of somebody.
    (Although our tools failed us there too, since AFAIK neither Coverity
    nor any compiler warned of the obvious use-of-uninitialized-variable
    condition.)  It seems best to handle that by having
    statext_is_compatible_clause_internal() reject it.
    
    Noted while fixing bug #17570.  Back-patch to v13 where the
    extended stats code grew some awareness of ScalarArrayOpExpr.
---
 src/backend/statistics/extended_stats.c           | 12 +++++++++---
 src/backend/statistics/mcv.c                      | 23 ++++++++++-------------
 src/test/regress/expected/stats_ext.out           | 22 ++++++++++++++++++----
 src/test/regress/expected/stats_ext_optimizer.out | 22 ++++++++++++++++++----
 src/test/regress/sql/stats_ext.sql                | 14 ++++++++++----
 5 files changed, 65 insertions(+), 28 deletions(-)

diff --git a/src/backend/statistics/extended_stats.c 
b/src/backend/statistics/extended_stats.c
index ee1c25416bd..d91d80357ff 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1333,8 +1333,8 @@ choose_best_statistics(List *stats, char requiredkind,
  *
  * (c) combinations using AND/OR/NOT
  *
- * (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (array)) or (Var/Expr
- * op ALL (array))
+ * (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (Const)) or
+ * (Var/Expr op ALL (Const))
  *
  * In the future, the range of supported clauses may be expanded to more
  * complex cases, for example (Var op Var).
@@ -1454,13 +1454,19 @@ statext_is_compatible_clause_internal(PlannerInfo 
*root, Node *clause,
                RangeTblEntry *rte = root->simple_rte_array[relid];
                ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
                Node       *clause_expr;
+               Const      *cst;
+               bool            expronleft;
 
                /* Only expressions with two arguments are considered 
compatible. */
                if (list_length(expr->args) != 2)
                        return false;
 
                /* Check if the expression has the right shape (one Var, one 
Const) */
-               if (!examine_opclause_args(expr->args, &clause_expr, NULL, 
NULL))
+               if (!examine_opclause_args(expr->args, &clause_expr, &cst, 
&expronleft))
+                       return false;
+
+               /* We only support Var on left and non-null array constants */
+               if (!expronleft || cst->constisnull)
                        return false;
 
                /*
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index e6a60865282..9cbd093fce7 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1746,20 +1746,17 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
                        if (!examine_opclause_args(expr->args, &clause_expr, 
&cst, &expronleft))
                                elog(ERROR, "incompatible clause");
 
-                       /* ScalarArrayOpExpr has the Var always on the left */
-                       Assert(expronleft);
+                       /* We expect Var on left and non-null constant on right 
*/
+                       if (!expronleft || cst->constisnull)
+                               elog(ERROR, "incompatible clause");
 
-                       /* XXX what if (cst->constisnull == NULL)? */
-                       if (!cst->constisnull)
-                       {
-                               arrayval = DatumGetArrayTypeP(cst->constvalue);
-                               get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
-                                                                        
&elmlen, &elmbyval, &elmalign);
-                               deconstruct_array(arrayval,
-                                                                 
ARR_ELEMTYPE(arrayval),
-                                                                 elmlen, 
elmbyval, elmalign,
-                                                                 &elem_values, 
&elem_nulls, &num_elems);
-                       }
+                       arrayval = DatumGetArrayTypeP(cst->constvalue);
+                       get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
+                                                                &elmlen, 
&elmbyval, &elmalign);
+                       deconstruct_array(arrayval,
+                                                         
ARR_ELEMTYPE(arrayval),
+                                                         elmlen, elmbyval, 
elmalign,
+                                                         &elem_values, 
&elem_nulls, &num_elems);
 
                        /* match the attribute/expression to a dimension of the 
statistic */
                        idx = mcv_match_expression(clause_expr, keys, exprs, 
&collid);
diff --git a/src/test/regress/expected/stats_ext.out 
b/src/test/regress/expected/stats_ext.out
index 06aff4d5bd0..60c7915eff0 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -1835,7 +1835,8 @@ CREATE TABLE mcv_lists (
     b VARCHAR,
     filler3 DATE,
     c INT,
-    d TEXT
+    d TEXT,
+    ia INT[]
 )
 WITH (autovacuum_enabled = off);
 -- random data (no MCV list)
@@ -1905,8 +1906,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE mod(a,7) = 1 A
 -- 100 distinct combinations, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
-INSERT INTO mcv_lists (a, b, c, filler1)
-     SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) 
s(i);
+INSERT INTO mcv_lists (a, b, c, ia, filler1)
+     SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i
+       FROM generate_series(1,5000) s(i);
 ANALYZE mcv_lists;
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b 
= ''1''');
  estimated | actual 
@@ -2046,8 +2048,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a < ALL (ARRAY
          1 |    100
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY 
(ARRAY[4,5]) AND 4 = ANY(ia)');
+ estimated | actual 
+-----------+--------
+         4 |     50
+(1 row)
+
 -- create statistics
-CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
+CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists;
 ANALYZE mcv_lists;
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b 
= ''1''');
  estimated | actual 
@@ -2193,6 +2201,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a < ALL (ARRAY
        100 |    100
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY 
(ARRAY[4,5]) AND 4 = ANY(ia)');
+ estimated | actual 
+-----------+--------
+         4 |     50
+(1 row)
+
 -- check change of unrelated column type does not reset the MCV statistics
 ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
 SELECT d.stxdmcv IS NOT NULL
diff --git a/src/test/regress/expected/stats_ext_optimizer.out 
b/src/test/regress/expected/stats_ext_optimizer.out
index dafbf0a28b4..9ad26109b59 100644
--- a/src/test/regress/expected/stats_ext_optimizer.out
+++ b/src/test/regress/expected/stats_ext_optimizer.out
@@ -1857,7 +1857,8 @@ CREATE TABLE mcv_lists (
     b VARCHAR,
     filler3 DATE,
     c INT,
-    d TEXT
+    d TEXT,
+    ia INT[]
 )
 WITH (autovacuum_enabled = off);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 
'filler1' as the Apache Cloudberry data distribution key for this table.
@@ -1929,8 +1930,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE mod(a,7) = 1 A
 -- 100 distinct combinations, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
-INSERT INTO mcv_lists (a, b, c, filler1)
-     SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) 
s(i);
+INSERT INTO mcv_lists (a, b, c, ia, filler1)
+     SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i
+       FROM generate_series(1,5000) s(i);
 ANALYZE mcv_lists;
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b 
= ''1''');
  estimated | actual 
@@ -2070,8 +2072,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a < ALL (ARRAY
         22 |    100
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY 
(ARRAY[4,5]) AND 4 = ANY(ia)');
+ estimated | actual 
+-----------+--------
+         4 |     50
+(1 row)
+
 -- create statistics
-CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
+CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists;
 ANALYZE mcv_lists;
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b 
= ''1''');
  estimated | actual 
@@ -2217,6 +2225,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a < ALL (ARRAY
         22 |    100
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY 
(ARRAY[4,5]) AND 4 = ANY(ia)');
+ estimated | actual 
+-----------+--------
+         4 |     50
+(1 row)
+
 -- check change of unrelated column type does not reset the MCV statistics
 ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
 SELECT d.stxdmcv IS NOT NULL
diff --git a/src/test/regress/sql/stats_ext.sql 
b/src/test/regress/sql/stats_ext.sql
index 744bb00c161..abd1d0b8608 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -923,7 +923,8 @@ CREATE TABLE mcv_lists (
     b VARCHAR,
     filler3 DATE,
     c INT,
-    d TEXT
+    d TEXT,
+    ia INT[]
 )
 WITH (autovacuum_enabled = off);
 
@@ -972,8 +973,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists 
WHERE mod(a,7) = 1 A
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
 
-INSERT INTO mcv_lists (a, b, c, filler1)
-     SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) 
s(i);
+INSERT INTO mcv_lists (a, b, c, ia, filler1)
+     SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i
+       FROM generate_series(1,5000) s(i);
 
 ANALYZE mcv_lists;
 
@@ -1023,8 +1025,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a < ALL (ARRAY
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL 
(ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, 
NULL, 3])');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY 
(ARRAY[4,5]) AND 4 = ANY(ia)');
+
 -- create statistics
-CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
+CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists;
 
 ANALYZE mcv_lists;
 
@@ -1076,6 +1080,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a < ALL (ARRAY
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL 
(ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, 
NULL, 3])');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY 
(ARRAY[4,5]) AND 4 = ANY(ia)');
+
 -- check change of unrelated column type does not reset the MCV statistics
 ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to