public inbox for [email protected]
help / color / mirror / Atom feedFrom: David Rowley <[email protected]>
To: Ilia Evdokimov <[email protected]>
Cc: Zsolt Parragi <[email protected]>
Cc: David Geier <[email protected]>
Cc: PostgreSQL Developers <[email protected]>
Subject: Re: Reduce planning time for large NOT IN lists containing NULL
Date: Wed, 18 Mar 2026 17:32:47 +1300
Message-ID: <CAApHDvq+Tk=BUwzJZsPsm49_QxAXRe9Vd2iFroMsB_Mnt8LdHg@mail.gmail.com> (raw)
In-Reply-To: <[email protected]>
References: <[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<[email protected]>
<CAN4CZFNJHro_pEVTHggBX43f+AebwKLZdp39+V4pV0EB4BZuZw@mail.gmail.com>
<[email protected]>
<[email protected]>
<[email protected]>
<CAN4CZFOTAyVk9srBp560049Xu655xUDZv5NBTvj7RYND-xKymA@mail.gmail.com>
<[email protected]>
<CAApHDvphShGABn-3AoE36dTvGHW7gUpFSw0_ZZnH84wGCW3hHw@mail.gmail.com>
<[email protected]>
<[email protected]>
On Tue, 17 Mar 2026 at 23:51, Ilia Evdokimov
<[email protected]> wrote:
> Rebased the v7-patches to fix git apply failure.
Thanks. I've pushed 0001.
I ended up renaming the new file to planner_est.sql as the function
handles width estimate masking too, so I thought just calling it
selectivity_est was a bit too restrictive. I went with planner_est.
That means 0002 needed rebased. I've done that in the attached. Will
look more closely at that one later.
David
From ae698d22b40a665b4048f5db61b3f791be0d246e Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <[email protected]>
Date: Tue, 17 Mar 2026 13:40:57 +0300
Subject: [PATCH v8] Reduce planning time for large NOT IN lists containing
NULL
For x <> ALL (...), the presence of a NULL makes the selectivity 0.0.
The planner currently still iterates over all elements and computes
per-element selectivity, even though the final result is known.
Add an early NULL check for constant arrays and immediately return
0.0 under ALL semantics.
This reduces planning time for large <> ALL lists without
changing semantics.
---
src/backend/utils/adt/selfuncs.c | 17 +++++++++++++
src/test/regress/expected/planner_est.out | 30 +++++++++++++++++++++++
src/test/regress/sql/planner_est.sql | 22 +++++++++++++++++
3 files changed, 69 insertions(+)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d4da0e8dea9..073d93f4f3d 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2018,6 +2018,15 @@ scalararraysel(PlannerInfo *root,
if (arrayisnull) /* qual can't succeed if null array */
return (Selectivity) 0.0;
arrayval = DatumGetArrayTypeP(arraydatum);
+
+ /*
+ * For ALL semantics, if the array contains NULL, assume operator is
+ * strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so return
+ * zero.
+ */
+ if (!useOr && array_contains_nulls(arrayval))
+ return (Selectivity) 0.0;
+
get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
&elmlen, &elmbyval, &elmalign);
deconstruct_array(arrayval,
@@ -2115,6 +2124,14 @@ scalararraysel(PlannerInfo *root,
List *args;
Selectivity s2;
+ /*
+ * For ALL semantics, if the array contains NULL, assume operator
+ * is strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so
+ * return zero.
+ */
+ if (!useOr && IsA(elem, Const) && ((Const *) elem)->constisnull)
+ return (Selectivity) 0.0;
+
/*
* Theoretically, if elem isn't of nominal_element_type we should
* insert a RelabelType, but it seems unlikely that any operator
diff --git a/src/test/regress/expected/planner_est.out b/src/test/regress/expected/planner_est.out
index 3a47061800a..7718197283b 100644
--- a/src/test/regress/expected/planner_est.out
+++ b/src/test/regress/expected/planner_est.out
@@ -183,4 +183,34 @@ false, true, false, true);
Function Scan on generate_series g (cost=N..N rows=1000 width=N)
(1 row)
+--
+-- Test <> ALL when array initially contained NULL but no longer does
+--
+CREATE FUNCTION replace_elem(arr int[], idx int, val int)
+RETURNS int[] AS $$
+BEGIN
+ arr[idx] := val;
+ RETURN arr;
+end;
+$$ language plpgsql IMMUTABLE;
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(ARRAY[1,99,3])',
+false, true, false, true );
+ explain_mask_costs
+----------------------------------------------------
+ Seq Scan on tenk1 (cost=N..N rows=9997 width=N)
+ Filter: (unique1 <> ALL ('{1,99,3}'::integer[]))
+(2 rows)
+
+-- same array, constructed from an array with a NULL
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(replace_elem(ARRAY[1,NULL,3], 2, 99))',
+false, true, false, true );
+ explain_mask_costs
+----------------------------------------------------
+ Seq Scan on tenk1 (cost=N..N rows=9997 width=N)
+ Filter: (unique1 <> ALL ('{1,99,3}'::integer[]))
+(2 rows)
+
+DROP FUNCTION replace_elem;
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
diff --git a/src/test/regress/sql/planner_est.sql b/src/test/regress/sql/planner_est.sql
index 47d5ae679c7..391ec42a6e7 100644
--- a/src/test/regress/sql/planner_est.sql
+++ b/src/test/regress/sql/planner_est.sql
@@ -131,5 +131,27 @@ SELECT explain_mask_costs($$
SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
false, true, false, true);
+--
+-- Test <> ALL when array initially contained NULL but no longer does
+--
+CREATE FUNCTION replace_elem(arr int[], idx int, val int)
+RETURNS int[] AS $$
+BEGIN
+ arr[idx] := val;
+ RETURN arr;
+end;
+$$ language plpgsql IMMUTABLE;
+
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(ARRAY[1,99,3])',
+false, true, false, true );
+
+-- same array, constructed from an array with a NULL
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(replace_elem(ARRAY[1,NULL,3], 2, 99))',
+false, true, false, true );
+
+DROP FUNCTION replace_elem;
+
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
--
2.51.0
Attachments:
[text/plain] v8-0001-Reduce-planning-time-for-large-NOT-IN-lists-conta.patch (4.5K, 2-v8-0001-Reduce-planning-time-for-large-NOT-IN-lists-conta.patch)
download | inline diff:
From ae698d22b40a665b4048f5db61b3f791be0d246e Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <[email protected]>
Date: Tue, 17 Mar 2026 13:40:57 +0300
Subject: [PATCH v8] Reduce planning time for large NOT IN lists containing
NULL
For x <> ALL (...), the presence of a NULL makes the selectivity 0.0.
The planner currently still iterates over all elements and computes
per-element selectivity, even though the final result is known.
Add an early NULL check for constant arrays and immediately return
0.0 under ALL semantics.
This reduces planning time for large <> ALL lists without
changing semantics.
---
src/backend/utils/adt/selfuncs.c | 17 +++++++++++++
src/test/regress/expected/planner_est.out | 30 +++++++++++++++++++++++
src/test/regress/sql/planner_est.sql | 22 +++++++++++++++++
3 files changed, 69 insertions(+)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d4da0e8dea9..073d93f4f3d 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2018,6 +2018,15 @@ scalararraysel(PlannerInfo *root,
if (arrayisnull) /* qual can't succeed if null array */
return (Selectivity) 0.0;
arrayval = DatumGetArrayTypeP(arraydatum);
+
+ /*
+ * For ALL semantics, if the array contains NULL, assume operator is
+ * strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so return
+ * zero.
+ */
+ if (!useOr && array_contains_nulls(arrayval))
+ return (Selectivity) 0.0;
+
get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
&elmlen, &elmbyval, &elmalign);
deconstruct_array(arrayval,
@@ -2115,6 +2124,14 @@ scalararraysel(PlannerInfo *root,
List *args;
Selectivity s2;
+ /*
+ * For ALL semantics, if the array contains NULL, assume operator
+ * is strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so
+ * return zero.
+ */
+ if (!useOr && IsA(elem, Const) && ((Const *) elem)->constisnull)
+ return (Selectivity) 0.0;
+
/*
* Theoretically, if elem isn't of nominal_element_type we should
* insert a RelabelType, but it seems unlikely that any operator
diff --git a/src/test/regress/expected/planner_est.out b/src/test/regress/expected/planner_est.out
index 3a47061800a..7718197283b 100644
--- a/src/test/regress/expected/planner_est.out
+++ b/src/test/regress/expected/planner_est.out
@@ -183,4 +183,34 @@ false, true, false, true);
Function Scan on generate_series g (cost=N..N rows=1000 width=N)
(1 row)
+--
+-- Test <> ALL when array initially contained NULL but no longer does
+--
+CREATE FUNCTION replace_elem(arr int[], idx int, val int)
+RETURNS int[] AS $$
+BEGIN
+ arr[idx] := val;
+ RETURN arr;
+end;
+$$ language plpgsql IMMUTABLE;
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(ARRAY[1,99,3])',
+false, true, false, true );
+ explain_mask_costs
+----------------------------------------------------
+ Seq Scan on tenk1 (cost=N..N rows=9997 width=N)
+ Filter: (unique1 <> ALL ('{1,99,3}'::integer[]))
+(2 rows)
+
+-- same array, constructed from an array with a NULL
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(replace_elem(ARRAY[1,NULL,3], 2, 99))',
+false, true, false, true );
+ explain_mask_costs
+----------------------------------------------------
+ Seq Scan on tenk1 (cost=N..N rows=9997 width=N)
+ Filter: (unique1 <> ALL ('{1,99,3}'::integer[]))
+(2 rows)
+
+DROP FUNCTION replace_elem;
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
diff --git a/src/test/regress/sql/planner_est.sql b/src/test/regress/sql/planner_est.sql
index 47d5ae679c7..391ec42a6e7 100644
--- a/src/test/regress/sql/planner_est.sql
+++ b/src/test/regress/sql/planner_est.sql
@@ -131,5 +131,27 @@ SELECT explain_mask_costs($$
SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
false, true, false, true);
+--
+-- Test <> ALL when array initially contained NULL but no longer does
+--
+CREATE FUNCTION replace_elem(arr int[], idx int, val int)
+RETURNS int[] AS $$
+BEGIN
+ arr[idx] := val;
+ RETURN arr;
+end;
+$$ language plpgsql IMMUTABLE;
+
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(ARRAY[1,99,3])',
+false, true, false, true );
+
+-- same array, constructed from an array with a NULL
+SELECT explain_mask_costs(
+ 'SELECT * FROM tenk1 WHERE unique1 <> ALL(replace_elem(ARRAY[1,NULL,3], 2, 99))',
+false, true, false, true );
+
+DROP FUNCTION replace_elem;
+
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
--
2.51.0
view thread (9+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected], [email protected], [email protected]
Subject: Re: Reduce planning time for large NOT IN lists containing NULL
In-Reply-To: <CAApHDvq+Tk=BUwzJZsPsm49_QxAXRe9Vd2iFroMsB_Mnt8LdHg@mail.gmail.com>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox