Rebased the v7-patches to fix git apply failure.
-- Best regards, Ilia Evdokimov, Tantor Labs LLC, https://tantorlabs.com/
From 33094d8e110152c5c9af119373699be2ceb42abb Mon Sep 17 00:00:00 2001 From: Ilia Evdokimov <[email protected]> Date: Tue, 17 Mar 2026 13:40:57 +0300 Subject: [PATCH v7 2/2] Reduce planning time for large NOT IN lists containing NULL For x <> ALL (...), the presence of a NULL makes the selectivity 0.0. The planner currently still iterates over all elements and computes per-element selectivity, even though the final result is known. Add an early NULL check for constant arrays and immediately return 0.0 under ALL semantics. This reduces planning time for large <> ALL lists without changing semantics. --- src/backend/utils/adt/selfuncs.c | 17 +++++++++++ src/test/regress/expected/selectivity_est.out | 30 +++++++++++++++++++ src/test/regress/sql/selectivity_est.sql | 22 ++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index d4da0e8dea9..073d93f4f3d 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2018,6 +2018,15 @@ scalararraysel(PlannerInfo *root, if (arrayisnull) /* qual can't succeed if null array */ return (Selectivity) 0.0; arrayval = DatumGetArrayTypeP(arraydatum); + + /* + * For ALL semantics, if the array contains NULL, assume operator is + * strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so return + * zero. + */ + if (!useOr && array_contains_nulls(arrayval)) + return (Selectivity) 0.0; + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), &elmlen, &elmbyval, &elmalign); deconstruct_array(arrayval, @@ -2115,6 +2124,14 @@ scalararraysel(PlannerInfo *root, List *args; Selectivity s2; + /* + * For ALL semantics, if the array contains NULL, assume operator + * is strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so + * return zero. + */ + if (!useOr && IsA(elem, Const) && ((Const *) elem)->constisnull) + return (Selectivity) 0.0; + /* * Theoretically, if elem isn't of nominal_element_type we should * insert a RelabelType, but it seems unlikely that any operator diff --git a/src/test/regress/expected/selectivity_est.out b/src/test/regress/expected/selectivity_est.out index 8fc5c9c9e07..d482f2ae7a0 100644 --- a/src/test/regress/expected/selectivity_est.out +++ b/src/test/regress/expected/selectivity_est.out @@ -175,4 +175,34 @@ false, true, false, true); Function Scan on generate_series g (cost=N..N rows=1000 width=N) (1 row) +-- +-- Test <> ALL when array initially contained NULL but no longer does +-- +CREATE FUNCTION replace_elem(arr int[], idx int, val int) +RETURNS int[] AS $$ +BEGIN + arr[idx] := val; + RETURN arr; +end; +$$ language plpgsql IMMUTABLE; +SELECT explain_mask_costs( + 'SELECT * FROM tenk1 WHERE unique1 <> ALL(ARRAY[1,99,3])', +false, true, false, true ); + explain_mask_costs +---------------------------------------------------- + Seq Scan on tenk1 (cost=N..N rows=9997 width=N) + Filter: (unique1 <> ALL ('{1,99,3}'::integer[])) +(2 rows) + +-- same array, constructed from an array with a NULL +SELECT explain_mask_costs( + 'SELECT * FROM tenk1 WHERE unique1 <> ALL(replace_elem(ARRAY[1,NULL,3], 2, 99))', +false, true, false, true ); + explain_mask_costs +---------------------------------------------------- + Seq Scan on tenk1 (cost=N..N rows=9997 width=N) + Filter: (unique1 <> ALL ('{1,99,3}'::integer[])) +(2 rows) + +DROP FUNCTION replace_elem; DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); diff --git a/src/test/regress/sql/selectivity_est.sql b/src/test/regress/sql/selectivity_est.sql index 416d5ea1f75..3ffec43907f 100644 --- a/src/test/regress/sql/selectivity_est.sql +++ b/src/test/regress/sql/selectivity_est.sql @@ -122,5 +122,27 @@ SELECT explain_mask_costs($$ SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, false, true, false, true); +-- +-- Test <> ALL when array initially contained NULL but no longer does +-- +CREATE FUNCTION replace_elem(arr int[], idx int, val int) +RETURNS int[] AS $$ +BEGIN + arr[idx] := val; + RETURN arr; +end; +$$ language plpgsql IMMUTABLE; + +SELECT explain_mask_costs( + 'SELECT * FROM tenk1 WHERE unique1 <> ALL(ARRAY[1,99,3])', +false, true, false, true ); + +-- same array, constructed from an array with a NULL +SELECT explain_mask_costs( + 'SELECT * FROM tenk1 WHERE unique1 <> ALL(replace_elem(ARRAY[1,NULL,3], 2, 99))', +false, true, false, true ); + +DROP FUNCTION replace_elem; + DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); -- 2.34.1
From 644fa9a077aaf7d77c3a58add925d2e01cbcae20 Mon Sep 17 00:00:00 2001 From: Ilia Evdokimov <[email protected]> Date: Tue, 17 Mar 2026 13:39:38 +0300 Subject: [PATCH v7 1/2] Move planner row-estimation tests to selectivity.sql Move explain_mask_costs() and the associated planner row-estimation tests from misc_functions.sql to a new regression test file, selectivity.sql. The tests exercise SupportRequestRows support functions and other planner selectivity estimation behavior, so they do not logically belong in misc_functions.sql. --- src/test/regress/expected/misc_functions.out | 178 ------------------ src/test/regress/expected/selectivity_est.out | 178 ++++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/misc_functions.sql | 125 ------------ src/test/regress/sql/selectivity_est.sql | 126 +++++++++++++ 5 files changed, 305 insertions(+), 304 deletions(-) create mode 100644 src/test/regress/expected/selectivity_est.out create mode 100644 src/test/regress/sql/selectivity_est.sql diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out index 6c03b1a79d7..cf55cdf3688 100644 --- a/src/test/regress/expected/misc_functions.out +++ b/src/test/regress/expected/misc_functions.out @@ -2,46 +2,6 @@ \getenv libdir PG_LIBDIR \getenv dlsuffix PG_DLSUFFIX \set regresslib :libdir '/regress' :dlsuffix --- Function to assist with verifying EXPLAIN which includes costs. A series --- of bool flags allows control over which portions are masked out -CREATE FUNCTION explain_mask_costs(query text, do_analyze bool, - hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text -LANGUAGE plpgsql AS -$$ -DECLARE - ln text; - analyze_str text; -BEGIN - IF do_analyze = true THEN - analyze_str := 'on'; - ELSE - analyze_str := 'off'; - END IF; - - -- avoid jit related output by disabling it - SET LOCAL jit = 0; - - FOR ln IN - EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s', - analyze_str, query) - LOOP - IF hide_costs = true THEN - ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N'); - END IF; - - IF hide_row_est = true THEN - -- don't use 'g' so that we leave the actual rows intact - ln := regexp_replace(ln, 'rows=\d+', 'rows=N'); - END IF; - - IF hide_width = true THEN - ln := regexp_replace(ln, 'width=\d+', 'width=N'); - END IF; - - RETURN NEXT ln; - END LOOP; -END; -$$; -- -- num_nulls() -- @@ -671,143 +631,6 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g; Index Cond: (unique1 = g.g) (4 rows) --- --- Test the SupportRequestRows support function for generate_series_timestamp() --- --- Ensure the row estimate matches the actual rows -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) -(1 row) - --- As above but with generate_series_timestamp -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) -(1 row) - --- As above but with generate_series_timestamptz_at_zone() -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) -(1 row) - --- Ensure the estimated and actual row counts match when the range isn't --- evenly divisible by the step -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$, -true, true, false, true); - explain_mask_costs -------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5.00 loops=1) -(1 row) - --- Ensure the estimates match when step is decreasing -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) -(1 row) - --- Ensure an empty range estimates 1 row -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - explain_mask_costs -------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) -(1 row) - --- Ensure we get the default row estimate for infinity values -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$, -false, true, false, true); - explain_mask_costs -------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1000 width=N) -(1 row) - --- Ensure the row estimate behaves correctly when step size is zero. --- We expect generate_series_timestamp() to throw the error rather than in --- the support function. -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); -ERROR: step size cannot equal zero --- --- Test the SupportRequestRows support function for generate_series_numeric() --- --- Ensure the row estimate matches the actual rows -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0) g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1) -(1 row) - --- As above but with non-default step -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=13 width=N) (actual rows=13.00 loops=1) -(1 row) - --- Ensure the estimates match when step is decreasing -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1) -(1 row) - --- Ensure an empty range estimates 1 row -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$, -true, true, false, true); - explain_mask_costs -------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) -(1 row) - --- Ensure we get the default row estimate for error cases (infinity/NaN values --- and zero step size) -SELECT explain_mask_costs($$ -SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$, -false, true, false, true); - explain_mask_costs -------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1000 width=N) -(1 row) - -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$, -false, true, false, true); - explain_mask_costs -------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1000 width=N) -(1 row) - -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, -false, true, false, true); - explain_mask_costs -------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1000 width=N) -(1 row) - -- -- Test SupportRequestInlineInFrom request -- @@ -970,7 +793,6 @@ SELECT pg_column_toast_chunk_id(a) IS NULL, (1 row) DROP TABLE test_chunk_id; -DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); -- test stratnum translation support functions SELECT gist_translate_cmptype_common(7); gist_translate_cmptype_common diff --git a/src/test/regress/expected/selectivity_est.out b/src/test/regress/expected/selectivity_est.out new file mode 100644 index 00000000000..8fc5c9c9e07 --- /dev/null +++ b/src/test/regress/expected/selectivity_est.out @@ -0,0 +1,178 @@ +-- Function to assist with verifying EXPLAIN which includes costs. A series +-- of bool flags allows control over which portions are masked out +CREATE FUNCTION explain_mask_costs(query text, do_analyze bool, + hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text +LANGUAGE plpgsql AS +$$ +DECLARE + ln text; + analyze_str text; +BEGIN + IF do_analyze = true THEN + analyze_str := 'on'; + ELSE + analyze_str := 'off'; + END IF; + + -- avoid jit related output by disabling it + SET LOCAL jit = 0; + + FOR ln IN + EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s', + analyze_str, query) + LOOP + IF hide_costs = true THEN + ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N'); + END IF; + + IF hide_row_est = true THEN + -- don't use 'g' so that we leave the actual rows intact + ln := regexp_replace(ln, 'rows=\d+', 'rows=N'); + END IF; + + IF hide_width = true THEN + ln := regexp_replace(ln, 'width=\d+', 'width=N'); + END IF; + + RETURN NEXT ln; + END LOOP; +END; +$$; +-- +-- Test the SupportRequestRows support function for generate_series_timestamp() +-- +-- Ensure the row estimate matches the actual rows +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) +(1 row) + +-- As above but with generate_series_timestamp +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) +(1 row) + +-- As above but with generate_series_timestamptz_at_zone() +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) +(1 row) + +-- Ensure the estimated and actual row counts match when the range isn't +-- evenly divisible by the step +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$, +true, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5.00 loops=1) +(1 row) + +-- Ensure the estimates match when step is decreasing +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) +(1 row) + +-- Ensure an empty range estimates 1 row +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) +(1 row) + +-- Ensure we get the default row estimate for infinity values +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + +-- Ensure the row estimate behaves correctly when step size is zero. +-- We expect generate_series_timestamp() to throw the error rather than in +-- the support function. +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); +ERROR: step size cannot equal zero +-- +-- Test the SupportRequestRows support function for generate_series_numeric() +-- +-- Ensure the row estimate matches the actual rows +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0) g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1) +(1 row) + +-- As above but with non-default step +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=13 width=N) (actual rows=13.00 loops=1) +(1 row) + +-- Ensure the estimates match when step is decreasing +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1) +(1 row) + +-- Ensure an empty range estimates 1 row +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$, +true, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) +(1 row) + +-- Ensure we get the default row estimate for error cases (infinity/NaN values +-- and zero step size) +SELECT explain_mask_costs($$ +SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + +DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index e779ada70cb..645f493319b 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -123,7 +123,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate graph_table_rls +test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate graph_table_rls selectivity_est # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql index 35b7983996c..c8226652f2c 100644 --- a/src/test/regress/sql/misc_functions.sql +++ b/src/test/regress/sql/misc_functions.sql @@ -4,47 +4,6 @@ \set regresslib :libdir '/regress' :dlsuffix --- Function to assist with verifying EXPLAIN which includes costs. A series --- of bool flags allows control over which portions are masked out -CREATE FUNCTION explain_mask_costs(query text, do_analyze bool, - hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text -LANGUAGE plpgsql AS -$$ -DECLARE - ln text; - analyze_str text; -BEGIN - IF do_analyze = true THEN - analyze_str := 'on'; - ELSE - analyze_str := 'off'; - END IF; - - -- avoid jit related output by disabling it - SET LOCAL jit = 0; - - FOR ln IN - EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s', - analyze_str, query) - LOOP - IF hide_costs = true THEN - ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N'); - END IF; - - IF hide_row_est = true THEN - -- don't use 'g' so that we leave the actual rows intact - ln := regexp_replace(ln, 'rows=\d+', 'rows=N'); - END IF; - - IF hide_width = true THEN - ln := regexp_replace(ln, 'width=\d+', 'width=N'); - END IF; - - RETURN NEXT ln; - END LOOP; -END; -$$; - -- -- num_nulls() -- @@ -277,89 +236,6 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g; EXPLAIN (COSTS OFF) SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g; --- --- Test the SupportRequestRows support function for generate_series_timestamp() --- - --- Ensure the row estimate matches the actual rows -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - --- As above but with generate_series_timestamp -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - --- As above but with generate_series_timestamptz_at_zone() -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$, -true, true, false, true); - --- Ensure the estimated and actual row counts match when the range isn't --- evenly divisible by the step -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$, -true, true, false, true); - --- Ensure the estimates match when step is decreasing -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$, -true, true, false, true); - --- Ensure an empty range estimates 1 row -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - --- Ensure we get the default row estimate for infinity values -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$, -false, true, false, true); - --- Ensure the row estimate behaves correctly when step size is zero. --- We expect generate_series_timestamp() to throw the error rather than in --- the support function. -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); - --- --- Test the SupportRequestRows support function for generate_series_numeric() --- - --- Ensure the row estimate matches the actual rows -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0) g(s);$$, -true, true, false, true); - --- As above but with non-default step -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$, -true, true, false, true); - --- Ensure the estimates match when step is decreasing -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$, -true, true, false, true); - --- Ensure an empty range estimates 1 row -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$, -true, true, false, true); - --- Ensure we get the default row estimate for error cases (infinity/NaN values --- and zero step size) -SELECT explain_mask_costs($$ -SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$, -false, true, false, true); - -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$, -false, true, false, true); - -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, -false, true, false, true); - -- -- Test SupportRequestInlineInFrom request -- @@ -443,7 +319,6 @@ SELECT pg_column_toast_chunk_id(a) IS NULL, pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel) FROM test_chunk_id; DROP TABLE test_chunk_id; -DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); -- test stratnum translation support functions SELECT gist_translate_cmptype_common(7); diff --git a/src/test/regress/sql/selectivity_est.sql b/src/test/regress/sql/selectivity_est.sql new file mode 100644 index 00000000000..416d5ea1f75 --- /dev/null +++ b/src/test/regress/sql/selectivity_est.sql @@ -0,0 +1,126 @@ +-- Function to assist with verifying EXPLAIN which includes costs. A series +-- of bool flags allows control over which portions are masked out +CREATE FUNCTION explain_mask_costs(query text, do_analyze bool, + hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text +LANGUAGE plpgsql AS +$$ +DECLARE + ln text; + analyze_str text; +BEGIN + IF do_analyze = true THEN + analyze_str := 'on'; + ELSE + analyze_str := 'off'; + END IF; + + -- avoid jit related output by disabling it + SET LOCAL jit = 0; + + FOR ln IN + EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s', + analyze_str, query) + LOOP + IF hide_costs = true THEN + ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N'); + END IF; + + IF hide_row_est = true THEN + -- don't use 'g' so that we leave the actual rows intact + ln := regexp_replace(ln, 'rows=\d+', 'rows=N'); + END IF; + + IF hide_width = true THEN + ln := regexp_replace(ln, 'width=\d+', 'width=N'); + END IF; + + RETURN NEXT ln; + END LOOP; +END; +$$; + +-- +-- Test the SupportRequestRows support function for generate_series_timestamp() +-- + +-- Ensure the row estimate matches the actual rows +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + +-- As above but with generate_series_timestamp +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + +-- As above but with generate_series_timestamptz_at_zone() +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$, +true, true, false, true); + +-- Ensure the estimated and actual row counts match when the range isn't +-- evenly divisible by the step +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$, +true, true, false, true); + +-- Ensure the estimates match when step is decreasing +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$, +true, true, false, true); + +-- Ensure an empty range estimates 1 row +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + +-- Ensure we get the default row estimate for infinity values +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$, +false, true, false, true); + +-- Ensure the row estimate behaves correctly when step size is zero. +-- We expect generate_series_timestamp() to throw the error rather than in +-- the support function. +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); + +-- +-- Test the SupportRequestRows support function for generate_series_numeric() +-- + +-- Ensure the row estimate matches the actual rows +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0) g(s);$$, +true, true, false, true); + +-- As above but with non-default step +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$, +true, true, false, true); + +-- Ensure the estimates match when step is decreasing +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$, +true, true, false, true); + +-- Ensure an empty range estimates 1 row +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$, +true, true, false, true); + +-- Ensure we get the default row estimate for error cases (infinity/NaN values +-- and zero step size) +SELECT explain_mask_costs($$ +SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$, +false, true, false, true); + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$, +false, true, false, true); + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, +false, true, false, true); + + +DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); -- 2.34.1
