From c0ba98141c183090783653d882d206acb7c38a29 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdokimov@tantorlabs.ru>
Date: Wed, 18 Mar 2026 17:23:50 +0300
Subject: [PATCH v9] Reduce planning time for large NOT IN lists containing
 NULL

For x <> ALL (...), the presence of a NULL makes the selectivity 0.0.

The planner currently still iterates over all elements and computes
per-element selectivity, even though the final result is known.

Add an early NULL check for constant arrays and immediately return
0.0 under ALL semantics.

This reduces planning time for large <> ALL lists without
changing semantics.
---
 src/backend/utils/adt/selfuncs.c          | 17 +++++++++++
 src/test/regress/expected/planner_est.out | 35 +++++++++++++++++++++++
 src/test/regress/sql/planner_est.sql      | 16 +++++++++++
 3 files changed, 68 insertions(+)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d4da0e8dea9..073d93f4f3d 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2018,6 +2018,15 @@ scalararraysel(PlannerInfo *root,
 		if (arrayisnull)		/* qual can't succeed if null array */
 			return (Selectivity) 0.0;
 		arrayval = DatumGetArrayTypeP(arraydatum);
+
+		/*
+		 * For ALL semantics, if the array contains NULL, assume operator is
+		 * strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so return
+		 * zero.
+		 */
+		if (!useOr && array_contains_nulls(arrayval))
+			return (Selectivity) 0.0;
+
 		get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
 							 &elmlen, &elmbyval, &elmalign);
 		deconstruct_array(arrayval,
@@ -2115,6 +2124,14 @@ scalararraysel(PlannerInfo *root,
 			List	   *args;
 			Selectivity s2;
 
+			/*
+			 * For ALL semantics, if the array contains NULL, assume operator
+			 * is strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so
+			 * return zero.
+			 */
+			if (!useOr && IsA(elem, Const) && ((Const *) elem)->constisnull)
+				return (Selectivity) 0.0;
+
 			/*
 			 * Theoretically, if elem isn't of nominal_element_type we should
 			 * insert a RelabelType, but it seems unlikely that any operator
diff --git a/src/test/regress/expected/planner_est.out b/src/test/regress/expected/planner_est.out
index 3a47061800a..624d5858687 100644
--- a/src/test/regress/expected/planner_est.out
+++ b/src/test/regress/expected/planner_est.out
@@ -183,4 +183,39 @@ false, true, false, true);
  Function Scan on generate_series g  (cost=N..N rows=1000 width=N)
 (1 row)
 
+--
+-- Test <> ALL behavior when NULL appears in the array
+--
+SELECT explain_mask_costs($$
+SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[NULL]::integer[]);$$,
+true, true, false, true);
+                            explain_mask_costs                            
+--------------------------------------------------------------------------
+ Seq Scan on tenk1  (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1)
+   Filter: (unique1 <> ALL ('{NULL}'::integer[]))
+   Rows Removed by Filter: 10000
+(3 rows)
+
+SELECT explain_mask_costs($$
+SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 99, NULL]);$$,
+true, true, false, true);
+                            explain_mask_costs                            
+--------------------------------------------------------------------------
+ Seq Scan on tenk1  (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1)
+   Filter: (unique1 <> ALL ('{1,2,99,NULL}'::integer[]))
+   Rows Removed by Filter: 10000
+(3 rows)
+
+SELECT explain_mask_costs($$
+SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 98, (SELECT 99), NULL]);$$,
+true, true, false, true);
+                                 explain_mask_costs                                  
+-------------------------------------------------------------------------------------
+ Seq Scan on tenk1  (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1)
+   Filter: (unique1 <> ALL (ARRAY[1, 2, 98, (InitPlan expr_1).col1, NULL::integer]))
+   Rows Removed by Filter: 10000
+   InitPlan expr_1
+     ->  Result  (cost=N..N rows=1 width=N) (actual rows=1.00 loops=1)
+(5 rows)
+
 DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
diff --git a/src/test/regress/sql/planner_est.sql b/src/test/regress/sql/planner_est.sql
index 47d5ae679c7..61c9f640761 100644
--- a/src/test/regress/sql/planner_est.sql
+++ b/src/test/regress/sql/planner_est.sql
@@ -131,5 +131,21 @@ SELECT explain_mask_costs($$
 SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
 false, true, false, true);
 
+--
+-- Test <> ALL behavior when NULL appears in the array
+--
+
+SELECT explain_mask_costs($$
+SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[NULL]::integer[]);$$,
+true, true, false, true);
+
+SELECT explain_mask_costs($$
+SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 99, NULL]);$$,
+true, true, false, true);
+
+SELECT explain_mask_costs($$
+SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 98, (SELECT 99), NULL]);$$,
+true, true, false, true);
+
 
 DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
-- 
2.34.1

