From ee7ca6d6a27b1872c7fa0d43d2df90631b5df1b0 Mon Sep 17 00:00:00 2001 From: Haibo Yan Date: Tue, 7 Apr 2026 11:57:50 -0700 Subject: [PATCH v1 2/5] Implement executor support for DISTINCT in whole-partition window aggregates Patch 1 taught PostgreSQL to parse and deparse DISTINCT in window aggregate calls, while still rejecting execution with a temporary FEATURE_NOT_SUPPORTED error. This patch implements the first executor support for that feature, but keeps the scope intentionally narrow: DISTINCT is now supported for plain aggregate window functions only when the effective window frame is equivalent to the entire partition. Examples now supported include: count(DISTINCT x) OVER (PARTITION BY p) sum(DISTINCT x) OVER (PARTITION BY p) avg(DISTINCT x) OVER () count(DISTINCT x) OVER ( PARTITION BY p ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) The implementation does not yet support: count(DISTINCT x) OVER (PARTITION BY p ORDER BY o) count(DISTINCT x) OVER ( PARTITION BY p ORDER BY o ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) count(DISTINCT x) OVER ( PARTITION BY p ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) nor any other moving/grow-only/sliding frame cases, EXCLUDE clauses, aggregate ORDER BY within window functions, or multi-argument DISTINCT. The key design choice is to cut scope by frame shape rather than by aggregate kind. The hard part of DISTINCT in window aggregates is not whether the aggregate is count(), sum(), or avg(), but whether the window frame changes from row to row. Once the frame can move, DISTINCT needs incremental membership tracking and, for true sliding frames, correct handling of values leaving the frame. That is a much larger executor problem. For frames equivalent to the whole partition, the semantics are much cleaner: the DISTINCT aggregate result is constant for every row in the partition. The executor can therefore: 1. collect the aggregate input values for the partition, 2. apply FILTER during collection, 3. sort the collected values, 4. deduplicate them, 5. invoke the aggregate transition function once per distinct value, 6. finalize once, and 7. reuse the cached final result for all rows in the partition. This patch implements that model inside WindowAgg using a dedicated sort-and-dedup path for DISTINCT aggregates in scope. Existing finalization and result caching logic is reused. Ordinary non-DISTINCT window aggregates continue to use the existing row-by-row accumulation path, so mixed queries containing both DISTINCT and non-DISTINCT window aggregates are supported. The implementation currently supports only single-argument DISTINCT. That keeps the first executor patch small and avoids introducing tuple- sorting and multi-column equality machinery into this step. Multi- argument DISTINCT can be added later as a follow-up. Out-of-scope cases continue to fail at executor init time with FEATURE_NOT_SUPPORTED, replacing the temporary blanket rejection from Patch 1 with a narrower scope check. Regression tests cover: * basic count/sum/avg DISTINCT window aggregate execution; * default no-ORDER-BY whole-partition frames; * explicit whole-partition frames; * NULL and duplicate-heavy inputs; * mixed DISTINCT and non-DISTINCT window aggregates; * multiple DISTINCT window aggregates in the same query; and * rejection of ORDER BY default-frame, ROWS CURRENT ROW, sliding, EXCLUDE, and multi-argument DISTINCT cases. This is intended as the first executor step for DISTINCT in window aggregates. More complex frame classes can be added in later patches. --- src/backend/executor/nodeWindowAgg.c | 393 ++++++++++++++++++++++++++- src/test/regress/expected/window.out | 166 ++++++++++- src/test/regress/sql/window.sql | 64 ++++- 3 files changed, 609 insertions(+), 14 deletions(-) diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 9431cae9ae0..9e9115141b7 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -46,6 +46,7 @@ #include "optimizer/optimizer.h" #include "parser/parse_agg.h" #include "parser/parse_coerce.h" +#include "parser/parse_oper.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/datum.h" @@ -54,6 +55,7 @@ #include "utils/memutils.h" #include "utils/regproc.h" #include "utils/syscache.h" +#include "utils/tuplesort.h" #include "utils/tuplestore.h" #include "windowapi.h" @@ -170,6 +172,14 @@ typedef struct WindowStatePerAggData /* Data local to eval_windowaggregates() */ bool restart; /* need to restart this agg in this cycle? */ + + /* DISTINCT support */ + bool windistinct; /* DISTINCT specified on this aggregate */ + Oid inputtypeOid; /* OID of the single DISTINCT argument type */ + Oid sortOperator; /* btree < operator for sorting */ + Oid sortCollation; /* collation for sort/equality */ + bool sortNullsFirst; /* NULLS FIRST? */ + FmgrInfo equalfn; /* equality comparison function */ } WindowStatePerAggData; static void initialize_windowaggregate(WindowAggState *winstate, @@ -206,6 +216,11 @@ static WindowStatePerAggData *initialize_peragg(WindowAggState *winstate, WindowStatePerAgg peraggstate); static Datum GetAggInitVal(Datum textInitVal, Oid transtype); +static bool is_whole_partition_frame(WindowAggState *winstate); +static void eval_windowaggregate_distinct(WindowAggState *winstate, + WindowStatePerFunc perfuncstate, + WindowStatePerAgg peraggstate); + static bool are_peers(WindowAggState *winstate, TupleTableSlot *slot1, TupleTableSlot *slot2); static bool window_gettupleslot(WindowObject winobj, int64 pos, @@ -693,6 +708,297 @@ finalize_windowaggregate(WindowAggState *winstate, MemoryContextSwitchTo(oldContext); } +/* + * is_whole_partition_frame + * + * Returns true if the window frame is guaranteed to cover the entire + * partition. This is the case when the start is UNBOUNDED PRECEDING, + * there is no EXCLUSION clause, and the end is either UNBOUNDED FOLLOWING + * or CURRENT ROW with no ORDER BY in RANGE or GROUPS mode (which means + * all rows are peers, so CURRENT ROW extends to the partition boundary). + * In ROWS mode, CURRENT ROW always means exactly the current row. + */ +static bool +is_whole_partition_frame(WindowAggState *winstate) +{ + WindowAgg *node = (WindowAgg *) winstate->ss.ps.plan; + int frameOptions = winstate->frameOptions; + + /* Must start at UNBOUNDED PRECEDING */ + if (!(frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING)) + return false; + + /* Must not have an EXCLUSION clause */ + if (frameOptions & FRAMEOPTION_EXCLUSION) + return false; + + /* End must be UNBOUNDED FOLLOWING ... */ + if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING) + return true; + + /* + * ... or CURRENT ROW with no ORDER BY (all rows are peers), but only + * for RANGE or GROUPS mode. In ROWS mode, CURRENT ROW means exactly + * the current row regardless of peers. + */ + if ((frameOptions & FRAMEOPTION_END_CURRENT_ROW) && + !(frameOptions & FRAMEOPTION_ROWS) && + node->ordNumCols == 0) + return true; + + return false; +} + +/* + * eval_windowaggregate_distinct + * + * Compute a single-argument DISTINCT window aggregate over the whole + * partition. We collect all argument values (applying any FILTER clause), + * sort them, skip duplicates, and feed the distinct values into the + * aggregate's transition function. + * + * This follows the pattern of process_ordered_aggregate_single() in + * nodeAgg.c. + */ +static void +eval_windowaggregate_distinct(WindowAggState *winstate, + WindowStatePerFunc perfuncstate, + WindowStatePerAgg peraggstate) +{ + WindowObject agg_winobj = winstate->agg_winobj; + TupleTableSlot *temp_slot = winstate->temp_slot_1; + ExprContext *econtext = winstate->tmpcontext; + WindowFuncExprState *wfuncstate = perfuncstate->wfuncstate; + ExprState *filter = wfuncstate->aggfilter; + int numArguments = perfuncstate->numArguments; + LOCAL_FCINFO(fcinfo, FUNC_MAX_ARGS); + Tuplesortstate *sortstate; + Datum newVal; + bool newIsNull; + Datum newAbbrevVal; + Datum oldVal = (Datum) 0; + bool oldIsNull = true; + bool haveOldVal = false; + Datum oldAbbrevVal = (Datum) 0; + MemoryContext oldContext; + int64 total_rows; + int64 row; + + /* Ensure all partition rows are spooled */ + spool_tuples(winstate, -1); + total_rows = winstate->spooled_rows; + + /* Create a tuplesort for the single DISTINCT argument */ + sortstate = tuplesort_begin_datum(peraggstate->inputtypeOid, + peraggstate->sortOperator, + peraggstate->sortCollation, + peraggstate->sortNullsFirst, + work_mem, NULL, TUPLESORT_NONE); + + /* + * Loop over all rows in the partition, evaluate FILTER and the argument, + * and feed values into the sort. + */ + for (row = 0; row < total_rows; row++) + { + if (!window_gettupleslot(agg_winobj, row, temp_slot)) + break; + + econtext->ecxt_outertuple = temp_slot; + + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + /* Skip anything FILTERed out */ + if (filter) + { + bool isnull; + Datum res = ExecEvalExpr(filter, econtext, &isnull); + + if (isnull || !DatumGetBool(res)) + { + MemoryContextSwitchTo(oldContext); + ResetExprContext(econtext); + ExecClearTuple(temp_slot); + continue; + } + } + + /* Evaluate the single argument */ + { + ExprState *argstate = (ExprState *) linitial(wfuncstate->args); + Datum val; + bool isnull; + + val = ExecEvalExpr(argstate, econtext, &isnull); + + MemoryContextSwitchTo(oldContext); + + /* Feed into sort */ + tuplesort_putdatum(sortstate, val, isnull); + } + + ResetExprContext(econtext); + ExecClearTuple(temp_slot); + } + + /* Sort */ + tuplesort_performsort(sortstate); + + /* + * Read back sorted values, skip duplicates, and feed distinct values + * into the transition function. This mirrors + * process_ordered_aggregate_single() in nodeAgg.c. + */ + while (tuplesort_getdatum(sortstate, true, false, + &newVal, &newIsNull, &newAbbrevVal)) + { + ResetExprContext(econtext); + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + /* + * If DISTINCT mode, skip if not distinct from prior value. + */ + if (haveOldVal && + ((oldIsNull && newIsNull) || + (!oldIsNull && !newIsNull && + oldAbbrevVal == newAbbrevVal && + DatumGetBool(FunctionCall2Coll(&peraggstate->equalfn, + peraggstate->sortCollation, + oldVal, newVal))))) + { + MemoryContextSwitchTo(oldContext); + continue; + } + + /* + * Advance the transition function with this distinct value. + * This replicates the strict-function handling from + * advance_windowaggregate(). + */ + if (peraggstate->transfn.fn_strict) + { + /* For strict transfn, skip NULL inputs */ + if (newIsNull) + { + MemoryContextSwitchTo(oldContext); + goto remember_value; + } + + /* + * For strict transition functions with initial value NULL, + * use the first non-NULL input as the initial state. + */ + if (peraggstate->transValueCount == 0 && + peraggstate->transValueIsNull) + { + MemoryContextSwitchTo(peraggstate->aggcontext); + peraggstate->transValue = datumCopy(newVal, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + peraggstate->transValueIsNull = false; + peraggstate->transValueCount = 1; + MemoryContextSwitchTo(oldContext); + goto remember_value; + } + + if (peraggstate->transValueIsNull) + { + /* + * Don't call a strict function with NULL inputs. + */ + MemoryContextSwitchTo(oldContext); + goto remember_value; + } + } + + /* OK to call the transition function */ + InitFunctionCallInfoData(*fcinfo, &(peraggstate->transfn), + numArguments + 1, + perfuncstate->winCollation, + (Node *) winstate, NULL); + fcinfo->args[0].value = peraggstate->transValue; + fcinfo->args[0].isnull = peraggstate->transValueIsNull; + fcinfo->args[1].value = newVal; + fcinfo->args[1].isnull = newIsNull; + winstate->curaggcontext = peraggstate->aggcontext; + + { + Datum result; + + result = FunctionCallInvoke(fcinfo); + winstate->curaggcontext = NULL; + + peraggstate->transValueCount++; + + /* + * If pass-by-ref datatype, must copy the new value into + * aggcontext and free the prior transValue. But if transfn + * returned a pointer to its first input, we don't need to do + * anything. Also, if transfn returned a pointer to a R/W + * expanded object that is already a child of the aggcontext, + * assume we can adopt that value without copying it. + * + * This must match advance_windowaggregate's logic exactly. + */ + if (!peraggstate->transtypeByVal && + DatumGetPointer(result) != DatumGetPointer(peraggstate->transValue)) + { + if (!fcinfo->isnull) + { + MemoryContextSwitchTo(peraggstate->aggcontext); + if (DatumIsReadWriteExpandedObject(result, + false, + peraggstate->transtypeLen) && + MemoryContextGetParent(DatumGetEOHP(result)->eoh_context) == CurrentMemoryContext) + /* do nothing */ ; + else + result = datumCopy(result, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + } + if (!peraggstate->transValueIsNull) + { + if (DatumIsReadWriteExpandedObject(peraggstate->transValue, + false, + peraggstate->transtypeLen)) + DeleteExpandedObject(peraggstate->transValue); + else + pfree(DatumGetPointer(peraggstate->transValue)); + } + } + + MemoryContextSwitchTo(oldContext); + peraggstate->transValue = result; + peraggstate->transValueIsNull = fcinfo->isnull; + } + +remember_value: + /* + * Remember the current value for subsequent duplicate checks. + */ + if (!peraggstate->inputtypeByVal) + { + if (!oldIsNull) + pfree(DatumGetPointer(oldVal)); + if (!newIsNull) + oldVal = datumCopy(newVal, peraggstate->inputtypeByVal, + peraggstate->inputtypeLen); + else + oldVal = (Datum) 0; + } + else + oldVal = newVal; + oldAbbrevVal = newAbbrevVal; + oldIsNull = newIsNull; + haveOldVal = true; + } + + if (!oldIsNull && !peraggstate->inputtypeByVal) + pfree(DatumGetPointer(oldVal)); + + tuplesort_end(sortstate); +} + /* * eval_windowaggregates * evaluate plain aggregates being used as window functions @@ -946,6 +1252,22 @@ eval_windowaggregates(WindowAggState *winstate) } } + /* + * Compute DISTINCT aggregates for the whole partition. These are handled + * separately via sort-based deduplication rather than the main + * accumulation loop below. + */ + for (i = 0; i < numaggs; i++) + { + peraggstate = &winstate->peragg[i]; + if (!peraggstate->windistinct || !peraggstate->restart) + continue; + wfuncno = peraggstate->wfuncno; + eval_windowaggregate_distinct(winstate, + &winstate->perfunc[wfuncno], + peraggstate); + } + /* * Non-restarted aggregates now contain the rows between aggregatedbase * (i.e., frameheadpos) and aggregatedupto, while restarted aggregates @@ -1002,6 +1324,10 @@ eval_windowaggregates(WindowAggState *winstate) { peraggstate = &winstate->peragg[i]; + /* DISTINCT aggregates are handled separately */ + if (peraggstate->windistinct) + continue; + /* Non-restarted aggs skip until aggregatedupto_nonrestarted */ if (!peraggstate->restart && winstate->aggregatedupto < aggregatedupto_nonrestarted) @@ -1169,6 +1495,26 @@ prepare_tuplestore(WindowAggState *winstate) readptr_flags |= EXEC_FLAG_BACKWARD; } + /* + * If any aggregate uses DISTINCT, the read pointer also needs + * BACKWARD capability. The DISTINCT helper reads through the + * entire partition to collect values for sorting, which advances + * the read pointer to the end. The main accumulation loop (for + * non-DISTINCT aggregates in the same WindowAgg node) then needs + * to rewind back to the frame head. + */ + if (!(readptr_flags & EXEC_FLAG_BACKWARD)) + { + for (int i = 0; i < winstate->numaggs; i++) + { + if (winstate->peragg[i].windistinct) + { + readptr_flags |= EXEC_FLAG_BACKWARD; + break; + } + } + } + agg_winobj->readptr = tuplestore_alloc_read_pointer(winstate->buffer, readptr_flags); } @@ -2882,7 +3228,8 @@ ExecReScanWindowAgg(WindowAggState *node) /* * initialize_peragg * - * Almost same as in nodeAgg.c, except we don't support DISTINCT currently. + * Almost same as in nodeAgg.c, except we only support DISTINCT for + * whole-partition frames and single-argument aggregates. */ static WindowStatePerAggData * initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc, @@ -2909,13 +3256,20 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc, ListCell *lc; /* - * Temporary: reject DISTINCT window aggregates until executor support - * lands. Patch 2 will replace this with actual DISTINCT handling. + * Validate DISTINCT usage. Currently we only support DISTINCT for + * whole-partition frames (where the result is constant across the + * partition) and single-argument aggregates only. */ - if (wfunc->windistinct) + if (wfunc->windistinct && !is_whole_partition_frame(winstate)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("DISTINCT is not yet implemented for window aggregates"))); + errmsg("DISTINCT is only supported for window functions with a frame that covers the entire partition"), + errhint("Remove ORDER BY from the window definition, or use ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING."))); + + if (wfunc->windistinct && list_length(wfunc->args) != 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("DISTINCT is not supported for window aggregate functions with more than one argument"))); numArguments = list_length(wfunc->args); @@ -3161,6 +3515,35 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc, else peraggstate->aggcontext = winstate->aggcontext; + /* + * Set up DISTINCT state if needed. We need sort and equality operators + * for the single argument type, plus its type length and by-value info + * for datum copying during the dedup loop. + */ + if (wfunc->windistinct) + { + Oid ltOpr, + eqOpr; + Oid inputType = inputTypes[0]; + + peraggstate->windistinct = true; + peraggstate->inputtypeOid = inputType; + + get_sort_group_operators(inputType, + true, true, false, + <Opr, &eqOpr, NULL, + NULL); + + peraggstate->sortOperator = ltOpr; + peraggstate->sortCollation = wfunc->inputcollid; + peraggstate->sortNullsFirst = false; + fmgr_info(get_opcode(eqOpr), &peraggstate->equalfn); + + get_typlenbyval(inputType, + &peraggstate->inputtypeLen, + &peraggstate->inputtypeByVal); + } + ReleaseSysCache(aggTuple); return peraggstate; diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 0f4dc2fe96f..d2db09f83b8 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -5877,8 +5877,7 @@ WINDOW w AS (ORDER BY x ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING); DROP TABLE planets CASCADE; NOTICE: drop cascades to view planets_view -- --- Test DISTINCT in window aggregates (parse/deparse plumbing only; --- execution support is not yet implemented) +-- Test DISTINCT in window aggregates -- -- Should parse successfully and round-trip through a view definition CREATE TEMP VIEW window_distinct_view AS @@ -5896,6 +5895,163 @@ SELECT ntile(DISTINCT 4) OVER () FROM tenk1; -- error ERROR: DISTINCT is not implemented for non-aggregate window functions LINE 1: SELECT ntile(DISTINCT 4) OVER () FROM tenk1; ^ --- Execution fails with a clear executor-side error -SELECT count(DISTINCT four) OVER (PARTITION BY ten) FROM tenk1; -- error -ERROR: DISTINCT is not yet implemented for window aggregates +-- Basic DISTINCT whole-partition cases (should succeed) +SELECT count(DISTINCT four) OVER (PARTITION BY ten) +FROM tenk1 LIMIT 20; + count +------- + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 +(20 rows) + +-- DISTINCT with no PARTITION BY (whole single partition) +SELECT x, sum(DISTINCT x % 3) OVER () +FROM generate_series(1, 9) g(x); + x | sum +---+----- + 1 | 3 + 2 | 3 + 3 | 3 + 4 | 3 + 5 | 3 + 6 | 3 + 7 | 3 + 8 | 3 + 9 | 3 +(9 rows) + +-- DISTINCT with explicit UNBOUNDED PRECEDING to UNBOUNDED FOLLOWING +SELECT x, avg(DISTINCT x % 4) OVER (PARTITION BY x > 5 + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +FROM generate_series(1, 10) g(x); + x | avg +----+-------------------- + 1 | 1.5000000000000000 + 2 | 1.5000000000000000 + 3 | 1.5000000000000000 + 4 | 1.5000000000000000 + 5 | 1.5000000000000000 + 6 | 1.5000000000000000 + 7 | 1.5000000000000000 + 8 | 1.5000000000000000 + 9 | 1.5000000000000000 + 10 | 1.5000000000000000 +(10 rows) + +-- DISTINCT with FILTER +SELECT x, + count(DISTINCT x % 3) FILTER (WHERE x > 3) OVER (PARTITION BY x > 5) +FROM generate_series(1, 10) g(x); + x | count +----+------- + 1 | 2 + 2 | 2 + 3 | 2 + 4 | 2 + 5 | 2 + 6 | 3 + 7 | 3 + 8 | 3 + 9 | 3 + 10 | 3 +(10 rows) + +-- NULL handling +SELECT x, + count(DISTINCT x) OVER () +FROM (VALUES (1),(2),(NULL),(2),(NULL),(1),(3)) v(x); + x | count +---+------- + 1 | 3 + 2 | 3 + | 3 + 2 | 3 + | 3 + 1 | 3 + 3 | 3 +(7 rows) + +-- Mixed DISTINCT and non-DISTINCT aggregates in same window +SELECT x, + count(DISTINCT x % 3) OVER w, + sum(x) OVER w +FROM generate_series(1, 9) g(x) +WINDOW w AS (PARTITION BY x > 5); + x | count | sum +---+-------+----- + 1 | 3 | 15 + 2 | 3 | 15 + 3 | 3 | 15 + 4 | 3 | 15 + 5 | 3 | 15 + 6 | 3 | 30 + 7 | 3 | 30 + 8 | 3 | 30 + 9 | 3 | 30 +(9 rows) + +-- Multiple DISTINCT aggregates in same query +SELECT x, + count(DISTINCT x % 2) OVER (PARTITION BY x > 5), + sum(DISTINCT x % 3) OVER (PARTITION BY x > 5) +FROM generate_series(1, 10) g(x); + x | count | sum +----+-------+----- + 1 | 2 | 3 + 2 | 2 | 3 + 3 | 2 | 3 + 4 | 2 | 3 + 5 | 2 | 3 + 6 | 2 | 3 + 7 | 2 | 3 + 8 | 2 | 3 + 9 | 2 | 3 + 10 | 2 | 3 +(10 rows) + +-- Error: non-whole-partition frame (has ORDER BY -> RANGE UNBOUNDED PRECEDING to CURRENT ROW) +SELECT count(DISTINCT x) OVER (PARTITION BY x > 5 ORDER BY x) +FROM generate_series(1, 10) g(x); -- error +ERROR: DISTINCT is only supported for window functions with a frame that covers the entire partition +HINT: Remove ORDER BY from the window definition, or use ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING. +-- Error: partial ROWS frame +SELECT count(DISTINCT x) OVER (ROWS 3 PRECEDING) +FROM generate_series(1, 10) g(x); -- error +ERROR: DISTINCT is only supported for window functions with a frame that covers the entire partition +HINT: Remove ORDER BY from the window definition, or use ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING. +-- Error: EXCLUDE clause +SELECT count(DISTINCT x) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING EXCLUDE CURRENT ROW) +FROM generate_series(1, 10) g(x); -- error +ERROR: DISTINCT is only supported for window functions with a frame that covers the entire partition +HINT: Remove ORDER BY from the window definition, or use ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING. +-- Error: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW (not whole-partition +-- even without ORDER BY, because ROWS CURRENT ROW means exactly one row) +SELECT x, + count(DISTINCT x % 3) OVER ( + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) +FROM generate_series(1, 10) g(x); -- error +ERROR: DISTINCT is only supported for window functions with a frame that covers the entire partition +HINT: Remove ORDER BY from the window definition, or use ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING. +-- Error: multi-argument DISTINCT window aggregate (not yet supported) +SELECT string_agg(DISTINCT four::text, ',') OVER (PARTITION BY ten) +FROM tenk1; -- error +ERROR: DISTINCT is not supported for window aggregate functions with more than one argument diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index be45bd5f14f..9c05c778108 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -2135,8 +2135,7 @@ WINDOW w AS (ORDER BY x ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING); DROP TABLE planets CASCADE; -- --- Test DISTINCT in window aggregates (parse/deparse plumbing only; --- execution support is not yet implemented) +-- Test DISTINCT in window aggregates -- -- Should parse successfully and round-trip through a view definition @@ -2149,5 +2148,62 @@ DROP VIEW window_distinct_view; -- DISTINCT on a non-aggregate window function is still a parse error SELECT ntile(DISTINCT 4) OVER () FROM tenk1; -- error --- Execution fails with a clear executor-side error -SELECT count(DISTINCT four) OVER (PARTITION BY ten) FROM tenk1; -- error +-- Basic DISTINCT whole-partition cases (should succeed) +SELECT count(DISTINCT four) OVER (PARTITION BY ten) +FROM tenk1 LIMIT 20; + +-- DISTINCT with no PARTITION BY (whole single partition) +SELECT x, sum(DISTINCT x % 3) OVER () +FROM generate_series(1, 9) g(x); + +-- DISTINCT with explicit UNBOUNDED PRECEDING to UNBOUNDED FOLLOWING +SELECT x, avg(DISTINCT x % 4) OVER (PARTITION BY x > 5 + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +FROM generate_series(1, 10) g(x); + +-- DISTINCT with FILTER +SELECT x, + count(DISTINCT x % 3) FILTER (WHERE x > 3) OVER (PARTITION BY x > 5) +FROM generate_series(1, 10) g(x); + +-- NULL handling +SELECT x, + count(DISTINCT x) OVER () +FROM (VALUES (1),(2),(NULL),(2),(NULL),(1),(3)) v(x); + +-- Mixed DISTINCT and non-DISTINCT aggregates in same window +SELECT x, + count(DISTINCT x % 3) OVER w, + sum(x) OVER w +FROM generate_series(1, 9) g(x) +WINDOW w AS (PARTITION BY x > 5); + +-- Multiple DISTINCT aggregates in same query +SELECT x, + count(DISTINCT x % 2) OVER (PARTITION BY x > 5), + sum(DISTINCT x % 3) OVER (PARTITION BY x > 5) +FROM generate_series(1, 10) g(x); + +-- Error: non-whole-partition frame (has ORDER BY -> RANGE UNBOUNDED PRECEDING to CURRENT ROW) +SELECT count(DISTINCT x) OVER (PARTITION BY x > 5 ORDER BY x) +FROM generate_series(1, 10) g(x); -- error + +-- Error: partial ROWS frame +SELECT count(DISTINCT x) OVER (ROWS 3 PRECEDING) +FROM generate_series(1, 10) g(x); -- error + +-- Error: EXCLUDE clause +SELECT count(DISTINCT x) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING EXCLUDE CURRENT ROW) +FROM generate_series(1, 10) g(x); -- error + +-- Error: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW (not whole-partition +-- even without ORDER BY, because ROWS CURRENT ROW means exactly one row) +SELECT x, + count(DISTINCT x % 3) OVER ( + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) +FROM generate_series(1, 10) g(x); -- error + +-- Error: multi-argument DISTINCT window aggregate (not yet supported) +SELECT string_agg(DISTINCT four::text, ',') OVER (PARTITION BY ten) +FROM tenk1; -- error -- 2.52.0