public inbox for [email protected]
help / color / mirror / Atom feedFrom: Chao Li <[email protected]>
To: PostgreSQL-development <[email protected]>
Cc: Tatsuo Ishii <[email protected]>
Cc: [email protected]
Subject: Should IGNORE NULLS cache nullness for volatile arguments?
Date: Thu, 14 May 2026 12:14:54 +0800
Message-ID: <[email protected]> (raw)
Hi,
I tested the new IGNORE NULLS support for window functions and noticed one behavior that looks strange to me.
To avoid repeated evaluation, the current code caches whether an argument value is NULL or NOT NULL. That is fine for stable expressions, but it looks unsafe for volatile arguments. For example, an argument may be evaluated as NOT NULL when its nullness is first checked, but when the value is needed later and the argument is evaluated again, the result may become NULL. That can lead to surprising results for volatile functions.
I do not have full confidence to call this a bug yet, but I think it is at least worth discussing. If the value of a NOT NULL argument were also cached, then I guess this behavior might be acceptable. But with the current implementation, the argument can be re-evaluated later and produce the opposite nullness result, which seems wrong to me.
The attached patch makes a small change in that direction. It only uses the IGNORE NULLS nullness cache when the argument is safe to reuse. For non-cacheable arguments, the nullness is treated as unknown and the argument is evaluated again.
See the attached patch for details.
Best regards,
--
Chao Li (Evan)
HighGo Software Co., Ltd.
https://www.highgo.com/
Attachments:
[application/octet-stream] v1-0001-Fix-IGNORE-NULLS-nullness-cache-for-volatile-wind.patch (9.3K, 2-v1-0001-Fix-IGNORE-NULLS-nullness-cache-for-volatile-wind.patch)
download | inline diff:
From 502082a0fbbc597ed6e8048edbb7ab58e7f4e614 Mon Sep 17 00:00:00 2001
From: "Chao Li (Evan)" <[email protected]>
Date: Thu, 14 May 2026 11:59:34 +0800
Subject: [PATCH v1] Fix IGNORE NULLS nullness cache for volatile window
arguments
The IGNORE NULLS implementation caches whether a window function argument
evaluated to NULL or NOT NULL for a given partition row. That is safe for
ordinary expressions, but not for volatile expressions, where evaluating the
same argument on the same row can produce a different NULL/NOT NULL result
later.
This could produce wrong results in two ways. A row previously cached as
NULL could be skipped even though a later evaluation would return NOT NULL.
Conversely, a row cached as NOT NULL could be chosen as the target row, then
re-evaluated to fetch the actual value and return NULL.
Make the nullness cache conditional per argument. Do not use it for
arguments containing volatile functions or subplans, following the same
conservative approach used for moving window aggregates. Also avoid
re-evaluating non-cacheable partition arguments after the scan has already
found the target row.
Add regression tests covering volatile arguments and subplan arguments with
IGNORE NULLS.
Author: Chao Li <[email protected]>
---
src/backend/executor/nodeWindowAgg.c | 60 ++++++++++++++++++-------
src/test/regress/expected/window.out | 66 ++++++++++++++++++++++++++++
src/test/regress/sql/window.sql | 28 ++++++++++++
3 files changed, 139 insertions(+), 15 deletions(-)
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index 5cc39bd9086..42cc3a1f3c3 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -76,6 +76,7 @@ typedef struct WindowObjectData
int64 *num_notnull_info; /* track size (number of tuples in
* partition) of the notnull_info array
* for each func args */
+ bool *notnull_info_cacheable; /* can we cache notnull_info? */
/*
* Null treatment options. One of: NO_NULLTREATMENT, PARSER_IGNORE_NULLS,
@@ -3454,7 +3455,10 @@ ignorenulls_getfuncarginframe(WindowObject winobj, int argno,
if (isout)
*isout = false;
- v = get_notnull_info(winobj, abs_pos, argno);
+ if (winobj->notnull_info_cacheable[argno])
+ v = get_notnull_info(winobj, abs_pos, argno);
+ else
+ v = NN_UNKNOWN;
if (v == NN_NULL) /* this row is known to be NULL */
goto advance;
@@ -3471,8 +3475,9 @@ ignorenulls_getfuncarginframe(WindowObject winobj, int argno,
if (!*isnull)
notnull_offset++;
- /* record the row status */
- put_notnull_info(winobj, abs_pos, argno, *isnull);
+ /* record the row status if it is safe to reuse */
+ if (winobj->notnull_info_cacheable[argno])
+ put_notnull_info(winobj, abs_pos, argno, *isnull);
}
else /* this row is known to be NOT NULL */
{
@@ -3518,8 +3523,23 @@ init_notnull_info(WindowObject winobj, WindowStatePerFunc perfuncstate)
if (winobj->ignore_nulls == PARSER_IGNORE_NULLS)
{
+ int argno = 0;
+ ListCell *lc;
+
winobj->notnull_info = palloc0_array(uint8 *, numargs);
winobj->num_notnull_info = palloc0_array(int64, numargs);
+ winobj->notnull_info_cacheable = palloc_array(bool, numargs);
+
+ foreach(lc, perfuncstate->wfunc->args)
+ {
+ Node *arg = (Node *) lfirst(lc);
+
+ winobj->notnull_info_cacheable[argno] =
+ !contain_volatile_functions(arg) &&
+ !contain_subplans(arg);
+
+ argno++;
+ }
}
}
@@ -3812,6 +3832,7 @@ WinGetFuncArgInPartition(WindowObject winobj, int argno,
int notnull_relpos;
int forward;
bool myisout;
+ bool got_datum;
Assert(WindowObjectIsValid(winobj));
winstate = winobj->winstate;
@@ -3860,6 +3881,7 @@ WinGetFuncArgInPartition(WindowObject winobj, int argno,
notnull_relpos = abs(relpos);
forward = relpos > 0 ? 1 : -1;
myisout = false;
+ got_datum = false;
datum = 0;
/*
@@ -3895,8 +3917,11 @@ WinGetFuncArgInPartition(WindowObject winobj, int argno,
if (abs_pos < 0) /* clearly out of partition */
break;
- /* check NOT NULL cached info */
- nn_info = get_notnull_info(winobj, abs_pos, argno);
+ /* check NOT NULL cached info if it is safe to reuse */
+ if (winobj->notnull_info_cacheable[argno])
+ nn_info = get_notnull_info(winobj, abs_pos, argno);
+ else
+ nn_info = NN_UNKNOWN;
if (nn_info == NN_NOTNULL) /* this row is known to be NOT NULL */
notnull_offset++;
else if (nn_info == NN_NULL) /* this row is known to be NULL */
@@ -3905,25 +3930,30 @@ WinGetFuncArgInPartition(WindowObject winobj, int argno,
{
/*
* NOT NULL info does not exist yet. Get tuple and evaluate func
- * arg in partition. We ignore the return value from
- * gettuple_eval_partition because we are just interested in
- * whether we are inside or outside of partition, NULL or NOT
- * NULL.
+ * arg in partition. Keep the return value in case this row is the
+ * target; re-evaluating a volatile argument could give a
+ * different nullness status.
*/
- (void) gettuple_eval_partition(winobj, argno,
- abs_pos, isnull, &myisout);
+ datum = gettuple_eval_partition(winobj, argno,
+ abs_pos, isnull, &myisout);
if (myisout) /* out of partition? */
break;
if (!*isnull)
+ {
notnull_offset++;
- /* record the row status */
- put_notnull_info(winobj, abs_pos, argno, *isnull);
+ if (notnull_offset >= notnull_relpos)
+ got_datum = true;
+ }
+ /* record the row status if it is safe to reuse */
+ if (winobj->notnull_info_cacheable[argno])
+ put_notnull_info(winobj, abs_pos, argno, *isnull);
}
} while (notnull_offset < notnull_relpos);
/* get tuple and evaluate func arg in partition */
- datum = gettuple_eval_partition(winobj, argno,
- abs_pos, isnull, &myisout);
+ if (!got_datum)
+ datum = gettuple_eval_partition(winobj, argno,
+ abs_pos, isnull, &myisout);
if (!myisout && set_mark)
WinSetMarkPosition(winobj, mark_pos);
if (isout)
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index e6aac27a2a9..de0e14a686e 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -5964,6 +5964,72 @@ WINDOW w AS (ORDER BY x ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING);
5 | 4
(5 rows)
+-- volatile arguments cannot use the IGNORE NULLS nullness cache
+CREATE TEMPORARY SEQUENCE null_treatment_seq;
+CREATE FUNCTION pg_temp.volatile_null(i int) RETURNS int
+LANGUAGE sql VOLATILE AS
+$$
+ SELECT CASE WHEN nextval('null_treatment_seq') % 2 = 0 THEN i ELSE NULL END;
+$$;
+SELECT x,
+ first_value(pg_temp.volatile_null(x)) IGNORE NULLS
+ OVER (ORDER BY x ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
+FROM generate_series(1,5) g(x);
+ x | first_value
+---+-------------
+ 1 |
+ 2 | 1
+ 3 | 2
+ 4 | 2
+ 5 | 2
+(5 rows)
+
+SELECT last_value FROM null_treatment_seq;
+ last_value
+------------
+ 8
+(1 row)
+
+ALTER SEQUENCE null_treatment_seq RESTART WITH 1;
+SELECT x,
+ lead(pg_temp.volatile_null(x), 1) IGNORE NULLS OVER (ORDER BY x)
+FROM generate_series(1,5) g(x);
+ x | lead
+---+------
+ 1 | 3
+ 2 | 4
+ 3 | 5
+ 4 |
+ 5 |
+(5 rows)
+
+SELECT last_value FROM null_treatment_seq;
+ last_value
+------------
+ 7
+(1 row)
+
+ALTER SEQUENCE null_treatment_seq RESTART WITH 1;
+SELECT x,
+ first_value((SELECT CASE WHEN nextval('null_treatment_seq') % 2 = 0
+ THEN x ELSE NULL END)) IGNORE NULLS
+ OVER (ORDER BY x ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
+FROM generate_series(1,5) g(x);
+ x | first_value
+---+-------------
+ 1 |
+ 2 | 1
+ 3 | 2
+ 4 | 2
+ 5 | 2
+(5 rows)
+
+SELECT last_value FROM null_treatment_seq;
+ last_value
+------------
+ 8
+(1 row)
+
--cleanup
DROP TABLE planets CASCADE;
NOTICE: drop cascades to view planets_view
diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql
index 305549b104d..17261135dc3 100644
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@@ -2157,5 +2157,33 @@ SELECT x,
FROM generate_series(1,5) g(x)
WINDOW w AS (ORDER BY x ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING);
+-- volatile arguments cannot use the IGNORE NULLS nullness cache
+CREATE TEMPORARY SEQUENCE null_treatment_seq;
+CREATE FUNCTION pg_temp.volatile_null(i int) RETURNS int
+LANGUAGE sql VOLATILE AS
+$$
+ SELECT CASE WHEN nextval('null_treatment_seq') % 2 = 0 THEN i ELSE NULL END;
+$$;
+
+SELECT x,
+ first_value(pg_temp.volatile_null(x)) IGNORE NULLS
+ OVER (ORDER BY x ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
+FROM generate_series(1,5) g(x);
+SELECT last_value FROM null_treatment_seq;
+
+ALTER SEQUENCE null_treatment_seq RESTART WITH 1;
+SELECT x,
+ lead(pg_temp.volatile_null(x), 1) IGNORE NULLS OVER (ORDER BY x)
+FROM generate_series(1,5) g(x);
+SELECT last_value FROM null_treatment_seq;
+
+ALTER SEQUENCE null_treatment_seq RESTART WITH 1;
+SELECT x,
+ first_value((SELECT CASE WHEN nextval('null_treatment_seq') % 2 = 0
+ THEN x ELSE NULL END)) IGNORE NULLS
+ OVER (ORDER BY x ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
+FROM generate_series(1,5) g(x);
+SELECT last_value FROM null_treatment_seq;
+
--cleanup
DROP TABLE planets CASCADE;
--
2.50.1 (Apple Git-155)
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected]
Subject: Re: Should IGNORE NULLS cache nullness for volatile arguments?
In-Reply-To: <[email protected]>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox