public inbox for [email protected]  
help / color / mirror / Atom feed
From: Chengpeng Yan <[email protected]>
To: PostgreSQL-development <[email protected]>
Cc: David Rowley <[email protected]>
Subject: [PATCH] Fix hashed ScalarArrayOp semantics for NULL LHS with non-strict comparators
Date: Thu, 16 Apr 2026 13:01:38 +0000
Message-ID: <[email protected]> (raw)

Hi,

ExecEvalHashedScalarArrayOp() produces results that are not semantically
equivalent to ExecEvalScalarArrayOp() when the LHS is NULL and the
comparison function is non-strict.

With the attached setup, the following two queries disagree:

select (a in
        (0::myint,1::myint,2::myint,3::myint,4::myint,
         5::myint,6::myint,7::myint,8::myint,9::myint)) is null
from inttest where a is null;

This takes the hashed SAOP path and returns false.

select (a in
        (0::myint,1::myint,2::myint,3::myint,4::myint,5::myint)) is null
from inttest where a is null;

This stays on the linear path and returns true.

This only occurs when:
* the planner selects the hashed SAOP path,
* the LHS evaluates to NULL at execution time, and
* the comparison function is non-strict.

The root cause is that ExecEvalHashedScalarArrayOp() only special-cases
NULL LHS for strict functions, and otherwise proceeds to probe the hash
table. This is incorrect for non-strict functions, and can also result
in probing with an undefined Datum.

The first attached patch fixes this by bypassing hash probing when the
LHS is NULL and the comparator is non-strict, falling back to a linear
evaluation consistent with ExecEvalScalarArrayOp(). For NOT IN, only
non-NULL results are inverted.

The second patch is a cleanup that factors out the common array scan and
boolean reduction logic shared by ExecEvalScalarArrayOp() and the new
fallback path. No functional change intended.

The patches include regression tests using a custom type with a
non-strict, hashable “=” operator. A standalone SQL reproducer is also
attached.

--
Best regards,
Chengpeng Yan





Attachments:

  [application/octet-stream] v1-0002-Factor-out-common-ScalarArrayOp-array-reduction-l.patch (6.9K, 3-v1-0002-Factor-out-common-ScalarArrayOp-array-reduction-l.patch)
  download | inline diff:
From a617677bbd71acf3413d59220b1180f175cafdaa Mon Sep 17 00:00:00 2001
From: Chengpeng Yan <[email protected]>
Date: Mon, 13 Apr 2026 12:31:05 +0800
Subject: [PATCH v1 2/2] Factor out common ScalarArrayOp array reduction logic

ExecEvalScalarArrayOp() and the NULL-lhs fallback in
ExecEvalHashedScalarArrayOp() both need the same array-element scan and
boolean reduction logic.  Move that logic into a shared helper so the
semantics live in one place.

No functional change intended.
---
 src/backend/executor/execExprInterp.c | 148 +++++++++++---------------
 1 file changed, 65 insertions(+), 83 deletions(-)

diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 5437a8cd4b7..8f3d72d19e4 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -154,6 +154,12 @@ static TupleDesc get_cached_rowtype(Oid type_id, int32 typmod,
 									bool *changed);
 static void ExecEvalRowNullInt(ExprState *state, ExprEvalStep *op,
 							   ExprContext *econtext, bool checkisnull);
+static void ExecEvalArrayCompareReduce(FunctionCallInfo fcinfo,
+									   ArrayType *arr, int16 typlen,
+									   bool typbyval, char typalign,
+									   bool useOr,
+									   bool invert_nonnull_result,
+									   Datum *result, bool *resultnull);
 
 /* fast-path evaluation functions */
 static Datum ExecJustInnerVar(ExprState *state, ExprContext *econtext, bool *isnull);
@@ -4031,13 +4037,6 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 	int			nitems;
 	Datum		result;
 	bool		resultnull;
-	int16		typlen;
-	bool		typbyval;
-	char		typalign;
-	uint8		typalignby;
-	char	   *s;
-	uint8	   *bitmap;
-	int			bitmask;
 
 	/*
 	 * If the array is NULL then we return NULL --- it's not very meaningful
@@ -4086,14 +4085,49 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 		op->d.scalararrayop.element_type = ARR_ELEMTYPE(arr);
 	}
 
-	typlen = op->d.scalararrayop.typlen;
-	typbyval = op->d.scalararrayop.typbyval;
-	typalign = op->d.scalararrayop.typalign;
-	typalignby = typalign_to_alignby(typalign);
+	ExecEvalArrayCompareReduce(fcinfo,
+							   arr,
+							   op->d.scalararrayop.typlen,
+							   op->d.scalararrayop.typbyval,
+							   op->d.scalararrayop.typalign,
+							   useOr,
+							   false,
+							   &result,
+							   &resultnull);
+
+	*op->resvalue = result;
+	*op->resnull = resultnull;
+}
+
+/*
+ * Shared helper for ExecEvalScalarArrayOp() and the hashed NULL-lhs fallback.
+ * Callers must handle the scalar-NULL strict fast path before invoking this.
+ *
+ * The linear path passes the expression's comparison function and uses useOr
+ * to select OR or AND reduction.  The hashed NULL-lhs fallback instead passes
+ * the equality function used by hash probes, so NOT IN inverts non-NULL
+ * results after reduction.
+ */
+static void
+ExecEvalArrayCompareReduce(FunctionCallInfo fcinfo,
+						   ArrayType *arr, int16 typlen,
+						   bool typbyval, char typalign,
+						   bool useOr,
+						   bool invert_nonnull_result,
+						   Datum *result, bool *resultnull)
+{
+	uint8		typalignby = typalign_to_alignby(typalign);
+	int			nitems;
+	char	   *s;
+	uint8	   *bitmap;
+	int			bitmask;
+	bool		strictfunc = fcinfo->flinfo->fn_strict;
+
+	nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
 
 	/* Initialize result appropriately depending on useOr */
-	result = BoolGetDatum(!useOr);
-	resultnull = false;
+	*result = BoolGetDatum(!useOr);
+	*resultnull = false;
 
 	/* Loop over the array elements */
 	s = (char *) ARR_DATA_PTR(arr);
@@ -4129,18 +4163,18 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 		else
 		{
 			fcinfo->isnull = false;
-			thisresult = op->d.scalararrayop.fn_addr(fcinfo);
+			thisresult = fcinfo->flinfo->fn_addr(fcinfo);
 		}
 
 		/* Combine results per OR or AND semantics */
 		if (fcinfo->isnull)
-			resultnull = true;
+			*resultnull = true;
 		else if (useOr)
 		{
 			if (DatumGetBool(thisresult))
 			{
-				result = BoolGetDatum(true);
-				resultnull = false;
+				*result = BoolGetDatum(true);
+				*resultnull = false;
 				break;			/* needn't look at any more elements */
 			}
 		}
@@ -4148,8 +4182,8 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 		{
 			if (!DatumGetBool(thisresult))
 			{
-				result = BoolGetDatum(false);
-				resultnull = false;
+				*result = BoolGetDatum(false);
+				*resultnull = false;
 				break;			/* needn't look at any more elements */
 			}
 		}
@@ -4166,8 +4200,9 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 		}
 	}
 
-	*op->resvalue = result;
-	*op->resnull = resultnull;
+	/* Apply the caller-requested NOT IN inversion, preserving NULL. */
+	if (invert_nonnull_result && !*resultnull)
+		*result = BoolGetDatum(!DatumGetBool(*result));
 }
 
 /*
@@ -4254,11 +4289,6 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
 		int16		typlen;
 		bool		typbyval;
 		char		typalign;
-		uint8		typalignby;
-		int			nitems;
-		char	   *s;
-		uint8	   *bitmap;
-		int			bitmask;
 
 		if (strictfunc)
 		{
@@ -4272,67 +4302,19 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
 							 &typlen,
 							 &typbyval,
 							 &typalign);
-		typalignby = typalign_to_alignby(typalign);
-
-		nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
-
-		/* Compute IN as an OR reduction of equality results. */
-		result = BoolGetDatum(false);
-		resultnull = false;
 
 		fcinfo->args[0].value = (Datum) 0;
 		fcinfo->args[0].isnull = true;
 
-		s = (char *) ARR_DATA_PTR(arr);
-		bitmap = ARR_NULLBITMAP(arr);
-		bitmask = 1;
-		for (int i = 0; i < nitems; i++)
-		{
-			Datum		elt;
-			Datum		thisresult;
-
-			/* Get array element, checking for NULL. */
-			if (bitmap && (*bitmap & bitmask) == 0)
-			{
-				fcinfo->args[1].value = (Datum) 0;
-				fcinfo->args[1].isnull = true;
-			}
-			else
-			{
-				elt = fetch_att(s, typbyval, typlen);
-				s = att_addlength_pointer(s, typlen, s);
-				s = (char *) att_nominal_alignby(s, typalignby);
-				fcinfo->args[1].value = elt;
-				fcinfo->args[1].isnull = false;
-			}
-
-			fcinfo->isnull = false;
-			thisresult = op->d.hashedscalararrayop.finfo->fn_addr(fcinfo);
-
-			if (fcinfo->isnull)
-				resultnull = true;
-			else if (DatumGetBool(thisresult))
-			{
-				result = BoolGetDatum(true);
-				resultnull = false;
-				break;
-			}
-
-			/* Advance bitmap pointer if any. */
-			if (bitmap)
-			{
-				bitmask <<= 1;
-				if (bitmask == 0x100)
-				{
-					bitmap++;
-					bitmask = 1;
-				}
-			}
-		}
-
-		/* Reverse for NOT IN; NULL stays NULL. */
-		if (!inclause && !resultnull)
-			result = BoolGetDatum(!DatumGetBool(result));
+		ExecEvalArrayCompareReduce(fcinfo,
+								   arr,
+								   typlen,
+								   typbyval,
+								   typalign,
+								   true,
+								   !inclause,
+								   &result,
+								   &resultnull);
 
 		*op->resvalue = result;
 		*op->resnull = resultnull;
-- 
2.50.1 (Apple Git-155)



  [application/octet-stream] v1-0001-Fix-hashed-ScalarArrayOp-NULL-handling-for-non-st.patch (6.5K, 4-v1-0001-Fix-hashed-ScalarArrayOp-NULL-handling-for-non-st.patch)
  download | inline diff:
From 1b2c8bb9bf44534d50f565acc4ea0bec2786cfa2 Mon Sep 17 00:00:00 2001
From: Chengpeng Yan <[email protected]>
Date: Fri, 13 Feb 2026 13:11:20 +0800
Subject: [PATCH v1 1/2] Fix hashed ScalarArrayOp NULL handling for non-strict
 operators

ExecEvalHashedScalarArrayOp() only short-circuits NULL lhs values for
strict operators.  For non-strict operators, a NULL lhs could still reach
the hash probe path, diverging from ExecEvalScalarArrayOp() and returning
a non-NULL result where SQL's three-valued logic requires NULL.  The hash
probe would also depend on a NULL lhs Datum payload, whose value is not
meaningful.

Fix this by bypassing the hash lookup for NULL lhs values with non-strict
operators and falling back to the same per-element reduction used by the
linear ScalarArrayOp path.  Preserve the existing strict fast path and add
regression coverage for hashed and non-hashed IN / NOT IN cases.
---
 src/backend/executor/execExprInterp.c     | 95 ++++++++++++++++++++++-
 src/test/regress/expected/expressions.out | 30 +++++++
 src/test/regress/sql/expressions.sql      | 11 +++
 3 files changed, 132 insertions(+), 4 deletions(-)

diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 3c4843cde86..5437a8cd4b7 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4243,12 +4243,99 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
 	Assert(!*op->resnull);
 
 	/*
-	 * If the scalar is NULL, and the function is strict, return NULL; no
-	 * point in executing the search.
+	 * If the scalar is NULL, we can only use the hash table with strict
+	 * functions.  For non-strict functions we must evaluate each element to
+	 * preserve SQL's three-valued logic; probing the hash table would also
+	 * depend on the NULL lhs Datum payload, whose value is not meaningful.
 	 */
-	if (fcinfo->args[0].isnull && strictfunc)
+	if (scalar_isnull)
 	{
-		*op->resnull = true;
+		ArrayType  *arr;
+		int16		typlen;
+		bool		typbyval;
+		char		typalign;
+		uint8		typalignby;
+		int			nitems;
+		char	   *s;
+		uint8	   *bitmap;
+		int			bitmask;
+
+		if (strictfunc)
+		{
+			*op->resnull = true;
+			return;
+		}
+
+		arr = DatumGetArrayTypeP(*op->resvalue);
+
+		get_typlenbyvalalign(ARR_ELEMTYPE(arr),
+							 &typlen,
+							 &typbyval,
+							 &typalign);
+		typalignby = typalign_to_alignby(typalign);
+
+		nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
+
+		/* Compute IN as an OR reduction of equality results. */
+		result = BoolGetDatum(false);
+		resultnull = false;
+
+		fcinfo->args[0].value = (Datum) 0;
+		fcinfo->args[0].isnull = true;
+
+		s = (char *) ARR_DATA_PTR(arr);
+		bitmap = ARR_NULLBITMAP(arr);
+		bitmask = 1;
+		for (int i = 0; i < nitems; i++)
+		{
+			Datum		elt;
+			Datum		thisresult;
+
+			/* Get array element, checking for NULL. */
+			if (bitmap && (*bitmap & bitmask) == 0)
+			{
+				fcinfo->args[1].value = (Datum) 0;
+				fcinfo->args[1].isnull = true;
+			}
+			else
+			{
+				elt = fetch_att(s, typbyval, typlen);
+				s = att_addlength_pointer(s, typlen, s);
+				s = (char *) att_nominal_alignby(s, typalignby);
+				fcinfo->args[1].value = elt;
+				fcinfo->args[1].isnull = false;
+			}
+
+			fcinfo->isnull = false;
+			thisresult = op->d.hashedscalararrayop.finfo->fn_addr(fcinfo);
+
+			if (fcinfo->isnull)
+				resultnull = true;
+			else if (DatumGetBool(thisresult))
+			{
+				result = BoolGetDatum(true);
+				resultnull = false;
+				break;
+			}
+
+			/* Advance bitmap pointer if any. */
+			if (bitmap)
+			{
+				bitmask <<= 1;
+				if (bitmask == 0x100)
+				{
+					bitmap++;
+					bitmask = 1;
+				}
+			}
+		}
+
+		/* Reverse for NOT IN; NULL stays NULL. */
+		if (!inclause && !resultnull)
+			result = BoolGetDatum(!DatumGetBool(result));
+
+		*op->resvalue = result;
+		*op->resnull = resultnull;
 		return;
 	}
 
diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out
index 9a3c97b15a3..5ce0c0d11de 100644
--- a/src/test/regress/expected/expressions.out
+++ b/src/test/regress/expected/expressions.out
@@ -388,6 +388,36 @@ default for type myint using hash as
   function    1   myinthash(myint);
 create table inttest (a myint);
 insert into inttest values(1::myint),(null);
+-- scalar NULL with a non-strict operator still requires per-element checks.
+-- With no NULLs on the right side, IN and NOT IN should both yield NULL.
+select (a in (0::myint,1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint)) is null
+from inttest where a is null;
+ ?column? 
+----------
+ t
+(1 row)
+
+select (a in (0::myint,1::myint,2::myint,3::myint,4::myint,5::myint)) is null
+from inttest where a is null;
+ ?column? 
+----------
+ t
+(1 row)
+
+select (a not in (0::myint,1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint)) is null
+from inttest where a is null;
+ ?column? 
+----------
+ t
+(1 row)
+
+select (a not in (0::myint,1::myint,2::myint,3::myint,4::myint,5::myint)) is null
+from inttest where a is null;
+ ?column? 
+----------
+ t
+(1 row)
+
 -- try an array with enough elements to cause hashing
 select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
  a 
diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql
index e02c21f3368..33af3a17128 100644
--- a/src/test/regress/sql/expressions.sql
+++ b/src/test/regress/sql/expressions.sql
@@ -198,6 +198,17 @@ default for type myint using hash as
 create table inttest (a myint);
 insert into inttest values(1::myint),(null);
 
+-- scalar NULL with a non-strict operator still requires per-element checks.
+-- With no NULLs on the right side, IN and NOT IN should both yield NULL.
+select (a in (0::myint,1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint)) is null
+from inttest where a is null;
+select (a in (0::myint,1::myint,2::myint,3::myint,4::myint,5::myint)) is null
+from inttest where a is null;
+select (a not in (0::myint,1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint)) is null
+from inttest where a is null;
+select (a not in (0::myint,1::myint,2::myint,3::myint,4::myint,5::myint)) is null
+from inttest where a is null;
+
 -- try an array with enough elements to cause hashing
 select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
 select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
-- 
2.50.1 (Apple Git-155)



  [application/octet-stream] repro.sql (2.3K, 5-repro.sql)
  download

view thread (11+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected]
  Subject: Re: [PATCH] Fix hashed ScalarArrayOp semantics for NULL LHS with non-strict comparators
  In-Reply-To: <[email protected]>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox