public inbox for [email protected]
help / color / mirror / Atom feedFrom: Ilia Evdokimov <[email protected]>
To: David Rowley <[email protected]>
Cc: Zsolt Parragi <[email protected]>
Cc: David Geier <[email protected]>
Cc: PostgreSQL Developers <[email protected]>
Subject: Re: Reduce planning time for large NOT IN lists containing NULL
Date: Tue, 17 Mar 2026 13:50:59 +0300
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
References: <[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<CAN4CZFNJHro_pEVTHggBX43f+AebwKLZdp39+V4pV0EB4BZuZw@mail.gmail.com>
<[email protected]>
<[email protected]>
<[email protected]>
<CAN4CZFOTAyVk9srBp560049Xu655xUDZv5NBTvj7RYND-xKymA@mail.gmail.com>
<[email protected]>
<CAApHDvphShGABn-3AoE36dTvGHW7gUpFSw0_ZZnH84wGCW3hHw@mail.gmail.com>
<[email protected]>
Rebased the v7-patches to fix git apply failure.
--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC,
https://tantorlabs.com/
Attachments:
[text/x-patch] v7-0002-Reduce-planning-time-for-large-NOT-IN-lists-conta.patch (4.6K, 2-v7-0002-Reduce-planning-time-for-large-NOT-IN-lists-conta.patch)
download | inline diff:
From 33094d8e110152c5c9af119373699be2ceb42abb Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <[email protected]>
Date: Tue, 17 Mar 2026 13:40:57 +0300
Subject: [PATCH v7 2/2] Reduce planning time for large NOT IN lists containing
NULL
For x <> ALL (...), the presence of a NULL makes the selectivity 0.0.
The planner currently still iterates over all elements and computes
per-element selectivity, even though the final result is known.
Add an early NULL check for constant arrays and immediately return
0.0 under ALL semantics.
This reduces planning time for large <> ALL lists without
changing semantics.
---
src/backend/utils/adt/selfuncs.c | 17 +++++++++++
src/test/regress/expected/selectivity_est.out | 30 +++++++++++++++++++
src/test/regress/sql/selectivity_est.sql | 22 ++++++++++++++
3 files changed, 69 insertions(+)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d4da0e8dea9..073d93f4f3d 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2018,6 +2018,15 @@ scalararraysel(PlannerInfo *root,
if (arrayisnull) /* qual can't succeed if null array */
return (Selectivity) 0.0;
arrayval = DatumGetArrayTypeP(arraydatum);
+
+ /*
+ * For ALL semantics, if the array contains NULL, assume operator is
+ * strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so return
+ * zero.
+ */
+ if (!useOr && array_contains_nulls(arrayval))
+ return (Selectivity) 0.0;
+
get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
&elmlen, &elmbyval, &elmalign);
deconstruct_array(arrayval,
@@ -2115,6 +2124,14 @@ scalararraysel(PlannerInfo *root,
List *args;
Selectivity s2;
+ /*
+ * For ALL semantics, if the array contains NULL, assume operator
+ * is strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so
+ * return zero.
+ */
+ if (!useOr && IsA(elem, Const) && ((Const *) elem)->constisnull)
+ return (Selectivity) 0.0;
+
/*
* Theoretically, if elem isn't of nominal_element_type we should
* insert a RelabelType, but it seems unlikely that any operator
diff --git a/src/test/regress/expected/selectivity_est.out b/src/test/regress/expected/selectivity_est.out
index 8fc5c9c9e07..d482f2ae7a0 100644
--- a/src/test/regress/expected/selectivity_est.out
+++ b/src/test/regress/expected/selectivity_est.out
@@ -175,4 +175,34 @@ false, true, false, true);
Function Scan on generate_series g (cost=N..N rows=1000 width=N)
(1 row)
+--
+-- Test <> ALL when array initially contained NULL but no longer does
+--
+CREATE FUNCTION replace_elem(arr int[], idx int, val int)
+RETURNS int[] AS $$
+BEGIN
+ arr[idx] := val;
+ RETURN arr;
+end;
+$$ language plpgsql IMMUTABLE;
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(ARRAY[1,99,3])',
+false, true, false, true );
+ explain_mask_costs
+----------------------------------------------------
+ Seq Scan on tenk1 (cost=N..N rows=9997 width=N)
+ Filter: (unique1 <> ALL ('{1,99,3}'::integer[]))
+(2 rows)
+
+-- same array, constructed from an array with a NULL
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(replace_elem(ARRAY[1,NULL,3], 2, 99))',
+false, true, false, true );
+ explain_mask_costs
+----------------------------------------------------
+ Seq Scan on tenk1 (cost=N..N rows=9997 width=N)
+ Filter: (unique1 <> ALL ('{1,99,3}'::integer[]))
+(2 rows)
+
+DROP FUNCTION replace_elem;
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
diff --git a/src/test/regress/sql/selectivity_est.sql b/src/test/regress/sql/selectivity_est.sql
index 416d5ea1f75..3ffec43907f 100644
--- a/src/test/regress/sql/selectivity_est.sql
+++ b/src/test/regress/sql/selectivity_est.sql
@@ -122,5 +122,27 @@ SELECT explain_mask_costs($$
SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
false, true, false, true);
+--
+-- Test <> ALL when array initially contained NULL but no longer does
+--
+CREATE FUNCTION replace_elem(arr int[], idx int, val int)
+RETURNS int[] AS $$
+BEGIN
+ arr[idx] := val;
+ RETURN arr;
+end;
+$$ language plpgsql IMMUTABLE;
+
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(ARRAY[1,99,3])',
+false, true, false, true );
+
+-- same array, constructed from an array with a NULL
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(replace_elem(ARRAY[1,NULL,3], 2, 99))',
+false, true, false, true );
+
+DROP FUNCTION replace_elem;
+
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
--
2.34.1
[text/x-patch] v7-0001-Move-planner-row-estimation-tests-to-selectivity..patch (29.0K, 3-v7-0001-Move-planner-row-estimation-tests-to-selectivity..patch)
download | inline diff:
From 644fa9a077aaf7d77c3a58add925d2e01cbcae20 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <[email protected]>
Date: Tue, 17 Mar 2026 13:39:38 +0300
Subject: [PATCH v7 1/2] Move planner row-estimation tests to selectivity.sql
Move explain_mask_costs() and the associated planner row-estimation
tests from misc_functions.sql to a new regression test file,
selectivity.sql.
The tests exercise SupportRequestRows support functions and other
planner selectivity estimation behavior, so they do not logically
belong in misc_functions.sql.
---
src/test/regress/expected/misc_functions.out | 178 ------------------
src/test/regress/expected/selectivity_est.out | 178 ++++++++++++++++++
src/test/regress/parallel_schedule | 2 +-
src/test/regress/sql/misc_functions.sql | 125 ------------
src/test/regress/sql/selectivity_est.sql | 126 +++++++++++++
5 files changed, 305 insertions(+), 304 deletions(-)
create mode 100644 src/test/regress/expected/selectivity_est.out
create mode 100644 src/test/regress/sql/selectivity_est.sql
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out
index 6c03b1a79d7..cf55cdf3688 100644
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -2,46 +2,6 @@
\getenv libdir PG_LIBDIR
\getenv dlsuffix PG_DLSUFFIX
\set regresslib :libdir '/regress' :dlsuffix
--- Function to assist with verifying EXPLAIN which includes costs. A series
--- of bool flags allows control over which portions are masked out
-CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
- hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
-LANGUAGE plpgsql AS
-$$
-DECLARE
- ln text;
- analyze_str text;
-BEGIN
- IF do_analyze = true THEN
- analyze_str := 'on';
- ELSE
- analyze_str := 'off';
- END IF;
-
- -- avoid jit related output by disabling it
- SET LOCAL jit = 0;
-
- FOR ln IN
- EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s',
- analyze_str, query)
- LOOP
- IF hide_costs = true THEN
- ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
- END IF;
-
- IF hide_row_est = true THEN
- -- don't use 'g' so that we leave the actual rows intact
- ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
- END IF;
-
- IF hide_width = true THEN
- ln := regexp_replace(ln, 'width=\d+', 'width=N');
- END IF;
-
- RETURN NEXT ln;
- END LOOP;
-END;
-$$;
--
-- num_nulls()
--
@@ -671,143 +631,6 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
Index Cond: (unique1 = g.g)
(4 rows)
---
--- Test the SupportRequestRows support function for generate_series_timestamp()
---
--- Ensure the row estimate matches the actual rows
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
-true, true, false, true);
- explain_mask_costs
----------------------------------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1)
-(1 row)
-
--- As above but with generate_series_timestamp
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
-true, true, false, true);
- explain_mask_costs
----------------------------------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1)
-(1 row)
-
--- As above but with generate_series_timestamptz_at_zone()
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
-true, true, false, true);
- explain_mask_costs
----------------------------------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1)
-(1 row)
-
--- Ensure the estimated and actual row counts match when the range isn't
--- evenly divisible by the step
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
-true, true, false, true);
- explain_mask_costs
--------------------------------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5.00 loops=1)
-(1 row)
-
--- Ensure the estimates match when step is decreasing
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
-true, true, false, true);
- explain_mask_costs
----------------------------------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1)
-(1 row)
-
--- Ensure an empty range estimates 1 row
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
-true, true, false, true);
- explain_mask_costs
--------------------------------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1)
-(1 row)
-
--- Ensure we get the default row estimate for infinity values
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
-false, true, false, true);
- explain_mask_costs
--------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=1000 width=N)
-(1 row)
-
--- Ensure the row estimate behaves correctly when step size is zero.
--- We expect generate_series_timestamp() to throw the error rather than in
--- the support function.
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
-ERROR: step size cannot equal zero
---
--- Test the SupportRequestRows support function for generate_series_numeric()
---
--- Ensure the row estimate matches the actual rows
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(1.0, 25.0) g(s);$$,
-true, true, false, true);
- explain_mask_costs
----------------------------------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1)
-(1 row)
-
--- As above but with non-default step
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$,
-true, true, false, true);
- explain_mask_costs
----------------------------------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=13 width=N) (actual rows=13.00 loops=1)
-(1 row)
-
--- Ensure the estimates match when step is decreasing
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$,
-true, true, false, true);
- explain_mask_costs
----------------------------------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1)
-(1 row)
-
--- Ensure an empty range estimates 1 row
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$,
-true, true, false, true);
- explain_mask_costs
--------------------------------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1)
-(1 row)
-
--- Ensure we get the default row estimate for error cases (infinity/NaN values
--- and zero step size)
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$,
-false, true, false, true);
- explain_mask_costs
--------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=1000 width=N)
-(1 row)
-
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$,
-false, true, false, true);
- explain_mask_costs
--------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=1000 width=N)
-(1 row)
-
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
-false, true, false, true);
- explain_mask_costs
--------------------------------------------------------------------
- Function Scan on generate_series g (cost=N..N rows=1000 width=N)
-(1 row)
-
--
-- Test SupportRequestInlineInFrom request
--
@@ -970,7 +793,6 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
(1 row)
DROP TABLE test_chunk_id;
-DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
-- test stratnum translation support functions
SELECT gist_translate_cmptype_common(7);
gist_translate_cmptype_common
diff --git a/src/test/regress/expected/selectivity_est.out b/src/test/regress/expected/selectivity_est.out
new file mode 100644
index 00000000000..8fc5c9c9e07
--- /dev/null
+++ b/src/test/regress/expected/selectivity_est.out
@@ -0,0 +1,178 @@
+-- Function to assist with verifying EXPLAIN which includes costs. A series
+-- of bool flags allows control over which portions are masked out
+CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
+ hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
+LANGUAGE plpgsql AS
+$$
+DECLARE
+ ln text;
+ analyze_str text;
+BEGIN
+ IF do_analyze = true THEN
+ analyze_str := 'on';
+ ELSE
+ analyze_str := 'off';
+ END IF;
+
+ -- avoid jit related output by disabling it
+ SET LOCAL jit = 0;
+
+ FOR ln IN
+ EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s',
+ analyze_str, query)
+ LOOP
+ IF hide_costs = true THEN
+ ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
+ END IF;
+
+ IF hide_row_est = true THEN
+ -- don't use 'g' so that we leave the actual rows intact
+ ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
+ END IF;
+
+ IF hide_width = true THEN
+ ln := regexp_replace(ln, 'width=\d+', 'width=N');
+ END IF;
+
+ RETURN NEXT ln;
+ END LOOP;
+END;
+$$;
+--
+-- Test the SupportRequestRows support function for generate_series_timestamp()
+--
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+---------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1)
+(1 row)
+
+-- As above but with generate_series_timestamp
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+---------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1)
+(1 row)
+
+-- As above but with generate_series_timestamptz_at_zone()
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+---------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1)
+(1 row)
+
+-- Ensure the estimated and actual row counts match when the range isn't
+-- evenly divisible by the step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5.00 loops=1)
+(1 row)
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+---------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1)
+(1 row)
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1)
+(1 row)
+
+-- Ensure we get the default row estimate for infinity values
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
+-- Ensure the row estimate behaves correctly when step size is zero.
+-- We expect generate_series_timestamp() to throw the error rather than in
+-- the support function.
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
+ERROR: step size cannot equal zero
+--
+-- Test the SupportRequestRows support function for generate_series_numeric()
+--
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0) g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+---------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1)
+(1 row)
+
+-- As above but with non-default step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+---------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=13 width=N) (actual rows=13.00 loops=1)
+(1 row)
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+---------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1)
+(1 row)
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1)
+(1 row)
+
+-- Ensure we get the default row estimate for error cases (infinity/NaN values
+-- and zero step size)
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
+DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index e779ada70cb..645f493319b 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -123,7 +123,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
# The stats test resets stats, so nothing else needing stats access can be in
# this group.
# ----------
-test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate graph_table_rls
+test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate graph_table_rls selectivity_est
# event_trigger depends on create_am and cannot run concurrently with
# any test that runs DDL
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql
index 35b7983996c..c8226652f2c 100644
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -4,47 +4,6 @@
\set regresslib :libdir '/regress' :dlsuffix
--- Function to assist with verifying EXPLAIN which includes costs. A series
--- of bool flags allows control over which portions are masked out
-CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
- hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
-LANGUAGE plpgsql AS
-$$
-DECLARE
- ln text;
- analyze_str text;
-BEGIN
- IF do_analyze = true THEN
- analyze_str := 'on';
- ELSE
- analyze_str := 'off';
- END IF;
-
- -- avoid jit related output by disabling it
- SET LOCAL jit = 0;
-
- FOR ln IN
- EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s',
- analyze_str, query)
- LOOP
- IF hide_costs = true THEN
- ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
- END IF;
-
- IF hide_row_est = true THEN
- -- don't use 'g' so that we leave the actual rows intact
- ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
- END IF;
-
- IF hide_width = true THEN
- ln := regexp_replace(ln, 'width=\d+', 'width=N');
- END IF;
-
- RETURN NEXT ln;
- END LOOP;
-END;
-$$;
-
--
-- num_nulls()
--
@@ -277,89 +236,6 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
---
--- Test the SupportRequestRows support function for generate_series_timestamp()
---
-
--- Ensure the row estimate matches the actual rows
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
-true, true, false, true);
-
--- As above but with generate_series_timestamp
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
-true, true, false, true);
-
--- As above but with generate_series_timestamptz_at_zone()
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
-true, true, false, true);
-
--- Ensure the estimated and actual row counts match when the range isn't
--- evenly divisible by the step
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
-true, true, false, true);
-
--- Ensure the estimates match when step is decreasing
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
-true, true, false, true);
-
--- Ensure an empty range estimates 1 row
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
-true, true, false, true);
-
--- Ensure we get the default row estimate for infinity values
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
-false, true, false, true);
-
--- Ensure the row estimate behaves correctly when step size is zero.
--- We expect generate_series_timestamp() to throw the error rather than in
--- the support function.
-SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
-
---
--- Test the SupportRequestRows support function for generate_series_numeric()
---
-
--- Ensure the row estimate matches the actual rows
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(1.0, 25.0) g(s);$$,
-true, true, false, true);
-
--- As above but with non-default step
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$,
-true, true, false, true);
-
--- Ensure the estimates match when step is decreasing
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$,
-true, true, false, true);
-
--- Ensure an empty range estimates 1 row
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$,
-true, true, false, true);
-
--- Ensure we get the default row estimate for error cases (infinity/NaN values
--- and zero step size)
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$,
-false, true, false, true);
-
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$,
-false, true, false, true);
-
-SELECT explain_mask_costs($$
-SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
-false, true, false, true);
-
--
-- Test SupportRequestInlineInFrom request
--
@@ -443,7 +319,6 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel)
FROM test_chunk_id;
DROP TABLE test_chunk_id;
-DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
-- test stratnum translation support functions
SELECT gist_translate_cmptype_common(7);
diff --git a/src/test/regress/sql/selectivity_est.sql b/src/test/regress/sql/selectivity_est.sql
new file mode 100644
index 00000000000..416d5ea1f75
--- /dev/null
+++ b/src/test/regress/sql/selectivity_est.sql
@@ -0,0 +1,126 @@
+-- Function to assist with verifying EXPLAIN which includes costs. A series
+-- of bool flags allows control over which portions are masked out
+CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
+ hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
+LANGUAGE plpgsql AS
+$$
+DECLARE
+ ln text;
+ analyze_str text;
+BEGIN
+ IF do_analyze = true THEN
+ analyze_str := 'on';
+ ELSE
+ analyze_str := 'off';
+ END IF;
+
+ -- avoid jit related output by disabling it
+ SET LOCAL jit = 0;
+
+ FOR ln IN
+ EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s',
+ analyze_str, query)
+ LOOP
+ IF hide_costs = true THEN
+ ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
+ END IF;
+
+ IF hide_row_est = true THEN
+ -- don't use 'g' so that we leave the actual rows intact
+ ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
+ END IF;
+
+ IF hide_width = true THEN
+ ln := regexp_replace(ln, 'width=\d+', 'width=N');
+ END IF;
+
+ RETURN NEXT ln;
+ END LOOP;
+END;
+$$;
+
+--
+-- Test the SupportRequestRows support function for generate_series_timestamp()
+--
+
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- As above but with generate_series_timestamp
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- As above but with generate_series_timestamptz_at_zone()
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimated and actual row counts match when the range isn't
+-- evenly divisible by the step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure we get the default row estimate for infinity values
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
+false, true, false, true);
+
+-- Ensure the row estimate behaves correctly when step size is zero.
+-- We expect generate_series_timestamp() to throw the error rather than in
+-- the support function.
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
+
+--
+-- Test the SupportRequestRows support function for generate_series_numeric()
+--
+
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0) g(s);$$,
+true, true, false, true);
+
+-- As above but with non-default step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$,
+true, true, false, true);
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$,
+true, true, false, true);
+
+-- Ensure we get the default row estimate for error cases (infinity/NaN values
+-- and zero step size)
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$,
+false, true, false, true);
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$,
+false, true, false, true);
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
+false, true, false, true);
+
+
+DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
--
2.34.1
view thread (9+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected], [email protected], [email protected]
Subject: Re: Reduce planning time for large NOT IN lists containing NULL
In-Reply-To: <[email protected]>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox