public inbox for [email protected]
help / color / mirror / Atom feedGROUP BY ALL
42+ messages / 11 participants
[nested] [flat]
* GROUP BY ALL
@ 2022-12-19 04:19 Andrey Borodin <[email protected]>
0 siblings, 2 replies; 42+ messages in thread
From: Andrey Borodin @ 2022-12-19 04:19 UTC (permalink / raw)
To: pgsql-hackers
Hi hackers!
I saw a thread in a social network[0] about GROUP BY ALL. The idea seems useful.
I always was writing something like
select datname, usename, count(*) from pg_stat_activity group by 1,2;
and then rewriting to
select datname, usename, query, count(*) from pg_stat_activity group by 1,2;
and then "aaahhhh, add a number at the end".
With the proposed feature I can write just
select datname, usename, count(*) from pg_stat_activity group by all;
PFA very dummy implementation just for a discussion. I think we can
add all non-aggregating targets.
What do you think?
Best regards, Andrey Borodin.
[0] https://www.linkedin.com/posts/mosha_duckdb-firebolt-snowflake-activity-7009615821006131200-VQ0o/
Attachments:
[application/octet-stream] v1-0001-Implement-GROUP-BY-ALL.patch (5.6K, 2-v1-0001-Implement-GROUP-BY-ALL.patch)
download | inline diff:
From e5f9ca89d577926155cc94e0ea5b5bbfefbd331d Mon Sep 17 00:00:00 2001
From: Andrey Borodin <[email protected]>
Date: Sun, 18 Dec 2022 19:52:48 -0800
Subject: [PATCH v1] Implement GROUP BY ALL
---
src/backend/parser/analyze.c | 1 +
src/backend/parser/gram.y | 14 ++++++++++++++
src/backend/parser/parse_agg.c | 23 ++++++++++++++++++++++-
src/backend/utils/adt/ruleutils.c | 3 +++
src/backend/utils/misc/queryjumble.c | 1 +
src/include/nodes/parsenodes.h | 2 ++
6 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 6688c2a865..71d12ead79 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1347,6 +1347,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
EXPR_KIND_GROUP_BY,
false /* allow SQL92 rules */ );
qry->groupDistinct = stmt->groupDistinct;
+ qry->groupAll = stmt->groupAll;
if (stmt->distinctClause == NIL)
{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index b1ae5f834c..84f8a4146a 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -135,6 +135,7 @@ typedef struct SelectLimit
typedef struct GroupClause
{
bool distinct;
+ bool all;
List *list;
} GroupClause;
@@ -12580,6 +12581,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -12597,6 +12599,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13074,14 +13077,25 @@ group_clause:
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
+ n->all = false;
n->list = $4;
$$ = n;
}
+ | GROUP_P BY ALL
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+
+ n->all = true;
+ n->distinct = false;
+ n->list = NIL;
+ $$ = n;
+ }
| /*EMPTY*/
{
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = false;
+ n->all = false;
n->list = NIL;
$$ = n;
}
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 3ef9e8ee5e..8826829dbc 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -20,6 +20,7 @@
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
+#include "parser/analyze.h"
#include "parser/parse_agg.h"
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
@@ -1072,7 +1073,27 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
Node *clause;
/* This should only be called if we found aggregates or grouping */
- Assert(pstate->p_hasAggs || qry->groupClause || qry->havingQual || qry->groupingSets);
+ Assert(pstate->p_hasAggs || qry->groupClause || qry->havingQual || qry->groupingSets || qry->groupAll);
+
+ Assert((!qry->groupAll) || (qry->groupClause == NULL));
+
+ if (qry->groupAll)
+ {
+ Index idx = 1;
+ Index sge_idx = 1;
+ foreach(l, qry->targetList)
+ {
+ TargetEntry *tle = lfirst(l);
+ if (IsA(tle->expr, Var))
+ {
+ Oid restype = exprType((Node *) tle->expr);
+ SortGroupClause *sgc = makeSortGroupClauseForSetOp(restype, false);
+ sgc->tleSortGroupRef = sge_idx++;
+ qry->groupClause = lappend(qry->groupClause, sgc);
+ tle->ressortgroupref = idx++;
+ }
+ }
+ }
/*
* If we have grouping sets, expand them and find the intersection of all
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index a20a1b069b..d0d4711c53 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -5959,6 +5959,9 @@ get_basic_select_query(Query *query, deparse_context *context,
if (query->groupDistinct)
appendStringInfoString(buf, "DISTINCT ");
+ if (query->groupAll)
+ appendStringInfoString(buf, "ALL ");
+
save_exprkind = context->special_exprkind;
context->special_exprkind = EXPR_KIND_GROUP_BY;
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index 0ace74de78..feac9aa8b2 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -254,6 +254,7 @@ JumbleQueryInternal(JumbleState *jstate, Query *query)
JumbleExpr(jstate, (Node *) query->returningList);
JumbleExpr(jstate, (Node *) query->groupClause);
APP_JUMB(query->groupDistinct);
+ APP_JUMB(query->groupAll);
JumbleExpr(jstate, (Node *) query->groupingSets);
JumbleExpr(jstate, query->havingQual);
JumbleExpr(jstate, (Node *) query->windowClause);
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 6112cd85c8..5a0b1a43cf 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -170,6 +170,7 @@ typedef struct Query
List *groupClause; /* a list of SortGroupClause's */
bool groupDistinct; /* is the group by clause distinct? */
+ bool groupAll;
List *groupingSets; /* a list of GroupingSet's if present */
@@ -1737,6 +1738,7 @@ typedef struct SelectStmt
Node *whereClause; /* WHERE qualification */
List *groupClause; /* GROUP BY clauses */
bool groupDistinct; /* Is this GROUP BY DISTINCT? */
+ bool groupAll;
Node *havingClause; /* HAVING conditional-expression */
List *windowClause; /* WINDOW window_name AS (...), ... */
--
2.37.0 (Apple Git-136)
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: GROUP BY ALL
@ 2022-12-19 04:30 Tom Lane <[email protected]>
parent: Andrey Borodin <[email protected]>
1 sibling, 3 replies; 42+ messages in thread
From: Tom Lane @ 2022-12-19 04:30 UTC (permalink / raw)
To: Andrey Borodin <[email protected]>; +Cc: pgsql-hackers
Andrey Borodin <[email protected]> writes:
> I saw a thread in a social network[0] about GROUP BY ALL. The idea seems useful.
Isn't that just a nonstandard spelling of SELECT DISTINCT?
What would happen if there are aggregate functions in the tlist?
I'm not especially on board with "ALL" meaning "ALL (oh, but not
aggregates)".
regards, tom lane
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: GROUP BY ALL
@ 2022-12-19 04:40 Andrey Borodin <[email protected]>
parent: Tom Lane <[email protected]>
2 siblings, 0 replies; 42+ messages in thread
From: Andrey Borodin @ 2022-12-19 04:40 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: pgsql-hackers
On Sun, Dec 18, 2022 at 8:30 PM Tom Lane <[email protected]> wrote:
>
> I'm not especially on board with "ALL" meaning "ALL (oh, but not
> aggregates)".
Yes, that's the weak part of the proposal. I even thought about
renaming it to "GROUP BY SOMEHOW" or even "GROUP BY SURPRISE ME".
I mean I see some cases when it's useful and much less cases when it's
dangerously ambiguous. E.g. grouping by result of a subquery looks way
too complex and unpredictable. But with simple Vars... what could go
wrong?
Best regards, Andrey Borodin.
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: GROUP BY ALL
@ 2022-12-19 04:45 David G. Johnston <[email protected]>
parent: Tom Lane <[email protected]>
2 siblings, 0 replies; 42+ messages in thread
From: David G. Johnston @ 2022-12-19 04:45 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: Andrey Borodin <[email protected]>; pgsql-hackers
On Sunday, December 18, 2022, Tom Lane <[email protected]> wrote:
> Andrey Borodin <[email protected]> writes:
> > I saw a thread in a social network[0] about GROUP BY ALL. The idea seems
> useful.
>
> Isn't that just a nonstandard spelling of SELECT DISTINCT?
>
> What would happen if there are aggregate functions in the tlist?
> I'm not especially on board with "ALL" meaning "ALL (oh, but not
> aggregates)".
>
>
IIUC some systems treat any non-aggregated column as an implicit group by
column. This proposal is an explicit way to enable that implicit behavior
in PostgreSQL. It is, as you note, an odd meaning for the word ALL.
We tend to not accept non-standard usability syntax extensions even if
others systems implement them. I don’t see this one ending up being an
exception…
David J.
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: GROUP BY ALL
@ 2022-12-19 13:44 Isaac Morland <[email protected]>
parent: Tom Lane <[email protected]>
2 siblings, 0 replies; 42+ messages in thread
From: Isaac Morland @ 2022-12-19 13:44 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: Andrey Borodin <[email protected]>; pgsql-hackers
On Sun, 18 Dec 2022 at 23:30, Tom Lane <[email protected]> wrote:
> Andrey Borodin <[email protected]> writes:
> > I saw a thread in a social network[0] about GROUP BY ALL. The idea seems
> useful.
>
> Isn't that just a nonstandard spelling of SELECT DISTINCT?
>
In a pure relational system, yes; but since Postgres allows duplicate rows,
both in actual table data and in intermediate and final result sets, no.
Although I'm pretty sure no aggregates other than count() are useful - any
other aggregate would always just combine count() copies of the duplicated
value in some way.
What would happen if there are aggregate functions in the tlist?
> I'm not especially on board with "ALL" meaning "ALL (oh, but not
> aggregates)".
>
The requested behaviour can be accomplished by an invocation something like:
select (t).*, count(*) from (select (…field1, field2, …) as t from
…tables…) s group by t;
So we collect all the required fields as a tuple, group by the tuple, and
then unpack it into separate columns in the outer query.
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: GROUP BY ALL
@ 2022-12-19 16:53 Vik Fearing <[email protected]>
parent: Andrey Borodin <[email protected]>
1 sibling, 0 replies; 42+ messages in thread
From: Vik Fearing @ 2022-12-19 16:53 UTC (permalink / raw)
To: Andrey Borodin <[email protected]>; pgsql-hackers
On 12/19/22 05:19, Andrey Borodin wrote:
> Hi hackers!
>
> I saw a thread in a social network[0] about GROUP BY ALL. The idea seems useful.
> I always was writing something like
> select datname, usename, count(*) from pg_stat_activity group by 1,2;
> and then rewriting to
> select datname, usename, query, count(*) from pg_stat_activity group by 1,2;
> and then "aaahhhh, add a number at the end".
>
> With the proposed feature I can write just
> select datname, usename, count(*) from pg_stat_activity group by all;
We already have GROUP BY ALL, but it doesn't do this.
> PFA very dummy implementation just for a discussion. I think we can
> add all non-aggregating targets.
>
> What do you think?
I think this is a pretty terrible idea. If we want that kind of
behavior, we should just allow the GROUP BY to be omitted since without
grouping sets, it is kind of redundant anyway.
I don't know what my opinion is on that.
--
Vik Fearing
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-08-17 17:12 Jelte Fennema-Nio <[email protected]>
0 siblings, 1 reply; 42+ messages in thread
From: Jelte Fennema-Nio @ 2025-08-17 17:12 UTC (permalink / raw)
To: Peter Eisentraut <[email protected]>; +Cc: Tom Lane <[email protected]>; David G. Johnston <[email protected]>; David Christensen <[email protected]>; pgsql-hackers
On Tue, 23 Jul 2024 at 22:02, Peter Eisentraut <[email protected]> wrote:
> Looks like the main existing implementations take it to mean all entries
> in the SELECT list that are not aggregate functions.
>
> https://duckdb.org/docs/sql/query_syntax/groupby.html#group-by-all
> https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-groupby.html#parameters
> https://docs.snowflake.com/en/sql-reference/constructs/group-by#parameters
Oracle added support for GROUP BY ALL too now:
https://danischnider.wordpress.com/2025/08/05/oracle-23-9-supports-group-by-all/
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 07:02 Peter Eisentraut <[email protected]>
parent: Jelte Fennema-Nio <[email protected]>
0 siblings, 2 replies; 42+ messages in thread
From: Peter Eisentraut @ 2025-09-26 07:02 UTC (permalink / raw)
To: pgsql-hackers; +Cc: Tom Lane <[email protected]>; David G. Johnston <[email protected]>; David Christensen <[email protected]>; Jelte Fennema-Nio <[email protected]>
On 17.08.25 19:12, Jelte Fennema-Nio wrote:
> On Tue, 23 Jul 2024 at 22:02, Peter Eisentraut <[email protected]> wrote:
>> Looks like the main existing implementations take it to mean all entries
>> in the SELECT list that are not aggregate functions.
>>
>> https://duckdb.org/docs/sql/query_syntax/groupby.html#group-by-all
>> https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-groupby.html#parameters
>> https://docs.snowflake.com/en/sql-reference/constructs/group-by#parameters
>
> Oracle added support for GROUP BY ALL too now:
> https://danischnider.wordpress.com/2025/08/05/oracle-23-9-supports-group-by-all/
The proposal for GROUP BY ALL was accepted into the SQL standard draft
yesterday. So maybe someone wants to take this up again.
The initially proposed patch appears to have the right idea overall.
But it does not handle more complex cases like
SELECT a, SUM(b)+a FROM t1 GROUP BY ALL;
correctly. The piece of code that does
if (!IsA(n->expr,Aggref))
should be generalized to check for aggregates not only at the top level.
(For explanation: GROUP BY ALL expands to all select list entries that
do not contain aggregates. So the above would expand to
SELECT a, SUM(b)+a FROM t1 GROUP BY a;
which should then be rejected based on the existing rules.)
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 08:45 Andrey Borodin <[email protected]>
parent: Peter Eisentraut <[email protected]>
1 sibling, 1 reply; 42+ messages in thread
From: Andrey Borodin @ 2025-09-26 08:45 UTC (permalink / raw)
To: Peter Eisentraut <[email protected]>; +Cc: pgsql-hackers; Tom Lane <[email protected]>; David G. Johnston <[email protected]>; David Christensen <[email protected]>; Jelte Fennema-Nio <[email protected]>
> On 26 Sep 2025, at 12:02, Peter Eisentraut <[email protected]> wrote:
>
> maybe someone wants to take this up again.
I've compared my old patch with David's, and his version looks better.
If David is not going to work on this in nearest future - I'l like to pick up the work.
Either way I'll join as reviewer.
Thanks!
Best regards, Andrey Borodin.
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 11:16 David Christensen <[email protected]>
parent: Andrey Borodin <[email protected]>
0 siblings, 1 reply; 42+ messages in thread
From: David Christensen @ 2025-09-26 11:16 UTC (permalink / raw)
To: Andrey Borodin <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; pgsql-hackers; Tom Lane <[email protected]>; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
> On Sep 26, 2025, at 3:45 AM, Andrey Borodin <[email protected]> wrote:
>
>
>
>> On 26 Sep 2025, at 12:02, Peter Eisentraut <[email protected]> wrote:
>>
>> maybe someone wants to take this up again.
>
> I've compared my old patch with David's, and his version looks better.
> If David is not going to work on this in nearest future - I'l like to pick up the work.
> Either way I'll join as reviewer.
>
> Thanks!
I’m interested in picking it up again but would appreciate the review.
Thanks,
David
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 14:11 Tom Lane <[email protected]>
parent: Peter Eisentraut <[email protected]>
1 sibling, 2 replies; 42+ messages in thread
From: Tom Lane @ 2025-09-26 14:11 UTC (permalink / raw)
To: Peter Eisentraut <[email protected]>; +Cc: pgsql-hackers; David G. Johnston <[email protected]>; David Christensen <[email protected]>; Jelte Fennema-Nio <[email protected]>
Peter Eisentraut <[email protected]> writes:
> The initially proposed patch appears to have the right idea overall.
> But it does not handle more complex cases like
> SELECT a, SUM(b)+a FROM t1 GROUP BY ALL;
> (For explanation: GROUP BY ALL expands to all select list entries that
> do not contain aggregates. So the above would expand to
> SELECT a, SUM(b)+a FROM t1 GROUP BY a;
> which should then be rejected based on the existing rules.)
I thought I understood this definition, up till your last
comment. What's invalid about that expanded query?
regression=# create table t1 (a int, b int);
CREATE TABLE
regression=# SELECT a, SUM(b)+a FROM t1 GROUP BY a;
a | ?column?
---+----------
(0 rows)
regards, tom lane
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 15:45 David Christensen <[email protected]>
parent: David Christensen <[email protected]>
0 siblings, 3 replies; 42+ messages in thread
From: David Christensen @ 2025-09-26 15:45 UTC (permalink / raw)
To: Andrey Borodin <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; pgsql-hackers; Tom Lane <[email protected]>; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Fri, Sep 26, 2025 at 6:16 AM David Christensen <[email protected]> wrote:
>
>
>
> > On Sep 26, 2025, at 3:45 AM, Andrey Borodin <[email protected]> wrote:
> >
> >
> >
> >> On 26 Sep 2025, at 12:02, Peter Eisentraut <[email protected]> wrote:
> >>
> >> maybe someone wants to take this up again.
> >
> > I've compared my old patch with David's, and his version looks better.
> > If David is not going to work on this in nearest future - I'l like to pick up the work.
> > Either way I'll join as reviewer.
> >
> > Thanks!
>
> I’m interested in picking it up again but would appreciate the review.
Here is a rebased version with a few more tests. I also changed the
main check here to using `!contain_agg_clause` instead of
`!IsA(Aggref))` directly. (This was defined in `optimizer/clauses.h`,
but we already are pulling in `optimizer.h`, so it felt valid to me.)
David
Attachments:
[application/octet-stream] v2-0001-Add-GROUP-BY-ALL.patch (11.7K, 2-v2-0001-Add-GROUP-BY-ALL.patch)
download | inline diff:
From 0cb474123cb206fac53004b38b83fa6b05c18426 Mon Sep 17 00:00:00 2001
From: David Christensen <[email protected]>
Date: Mon, 11 Mar 2024 10:37:05 -0400
Subject: [PATCH v2] Add GROUP BY ALL
GROUP BY ALL is a form of GROUP BY which adds any TargetExpr that does not
contain an Aggref into the groupClause of the query, effectively making it
exactly equivalent to specifying those same expressions in an explicit GROUP BY list.
Since this is exclusive with any other GROUP BY form, this is fairly simple to
add into the grammar and handle without needing to get into grouping sets or
other more complicated forms.
This greatly improves data exploration in particular, as well as making it so
you don't need to trivially wrap more complicated queries in a subquery or
reproduce long, complicated expressions in the literal GROUP BY.
---
src/backend/parser/analyze.c | 40 ++++++++++--
src/backend/parser/gram.y | 16 ++++-
src/backend/parser/parse_clause.c | 7 +-
src/backend/utils/adt/ruleutils.c | 3 +
src/include/nodes/parsenodes.h | 2 +
src/include/parser/parse_clause.h | 2 +
src/test/regress/expected/aggregates.out | 82 ++++++++++++++++++++++++
src/test/regress/sql/aggregates.sql | 47 ++++++++++++++
8 files changed, 188 insertions(+), 11 deletions(-)
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index b9763ea1714..4a633bb9337 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -32,6 +32,7 @@
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/queryjumble.h"
+#include "optimizer/clauses.h"
#include "optimizer/optimizer.h"
#include "parser/analyze.h"
#include "parser/parse_agg.h"
@@ -1415,6 +1416,29 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
qry->targetList = transformTargetList(pstate, stmt->targetList,
EXPR_KIND_SELECT_TARGET);
+ /*
+ * If groupByAll, expand targetList into groupClause. In this case, we
+ * cannot have any other group clauses, so this is safe.
+ */
+
+ if (stmt->groupAll)
+ {
+ ListCell *l1;
+ /*
+ * Iterate over targets, any non-aggregate gets added as a Target.
+ * Note that it's not enough to check for a top-level Aggref; we need
+ * to ensure that any sub-expression here does not include an Aggref
+ * (for instance an expression such as `sum(col) + 4` should not be
+ * added as a grouping target.
+ */
+ foreach (l1,qry->targetList)
+ {
+ TargetEntry *n = (TargetEntry*)lfirst(l1);
+ if (!contain_agg_clause((Node *)n->expr))
+ qry->groupClause = addTargetToGroupList(pstate, n, qry->groupClause, qry->targetList, 0);
+ }
+ }
+
/* mark column origins */
markTargetListOrigins(pstate, qry->targetList);
@@ -1438,14 +1462,16 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
EXPR_KIND_ORDER_BY,
false /* allow SQL92 rules */ );
- qry->groupClause = transformGroupClause(pstate,
- stmt->groupClause,
- &qry->groupingSets,
- &qry->targetList,
- qry->sortClause,
- EXPR_KIND_GROUP_BY,
- false /* allow SQL92 rules */ );
+ if (!stmt->groupAll)
+ qry->groupClause = transformGroupClause(pstate,
+ stmt->groupClause,
+ &qry->groupingSets,
+ &qry->targetList,
+ qry->sortClause,
+ EXPR_KIND_GROUP_BY,
+ false /* allow SQL92 rules */ );
qry->groupDistinct = stmt->groupDistinct;
+ qry->groupAll = stmt->groupAll;
if (stmt->distinctClause == NIL)
{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9fd48acb1f8..3d97ceddc1e 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -120,6 +120,7 @@ typedef struct SelectLimit
typedef struct GroupClause
{
bool distinct;
+ bool all;
List *list;
} GroupClause;
@@ -12993,6 +12994,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13010,6 +13012,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13502,12 +13505,21 @@ first_or_next: FIRST_P { $$ = 0; }
* GroupingSet node of some type.
*/
group_clause:
- GROUP_P BY set_quantifier group_by_list
+ GROUP_P BY ALL
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = false;
+ n->list = NIL;
+ n->all = true;
+ $$ = n;
+ }
+ | GROUP_P BY set_quantifier group_by_list
{
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
n->list = $4;
+ n->all = false;
$$ = n;
}
| /*EMPTY*/
@@ -13516,6 +13528,7 @@ group_clause:
n->distinct = false;
n->list = NIL;
+ n->all = false;
$$ = n;
}
;
@@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
n->whereClause = $4;
n->groupClause = ($5)->list;
n->groupDistinct = ($5)->distinct;
+ n->groupAll = ($5)->all;
n->havingClause = $6;
n->windowClause = $7;
n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 9f20a70ce13..9c26afd1f3c 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -90,8 +90,6 @@ static int get_matching_location(int sortgroupref,
List *sortgrouprefs, List *exprs);
static List *resolve_unique_index_expr(ParseState *pstate, InferClause *infer,
Relation heapRel);
-static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
- List *grouplist, List *targetlist, int location);
static WindowClause *findWindowClause(List *wclist, const char *name);
static Node *transformFrameOffset(ParseState *pstate, int frameOptions,
Oid rangeopfamily, Oid rangeopcintype, Oid *inRangeFunc,
@@ -2598,6 +2596,9 @@ transformGroupingSet(List **flatresult,
* GROUP BY items will be added to the targetlist (as resjunk columns)
* if not already present, so the targetlist must be passed by reference.
*
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate expressions in the targetList.
+ *
* This is also used for window PARTITION BY clauses (which act almost the
* same, but are always interpreted per SQL99 rules).
*
@@ -3533,7 +3534,7 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle,
*
* Returns the updated SortGroupClause list.
*/
-static List *
+List *
addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
List *grouplist, List *targetlist, int location)
{
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index defcdaa8b34..0827b1e2625 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6199,6 +6199,9 @@ get_basic_select_query(Query *query, deparse_context *context)
sep = ", ";
}
}
+ if (query->groupAll)
+ appendContextKeyword(context, " ALL ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
else
{
sep = "";
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 4ed14fc5b78..ae1a954afeb 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -215,6 +215,7 @@ typedef struct Query
List *groupClause; /* a list of SortGroupClause's */
bool groupDistinct; /* is the group by clause distinct? */
+ bool groupAll; /* is the group by clause distinct? */
List *groupingSets; /* a list of GroupingSet's if present */
@@ -2192,6 +2193,7 @@ typedef struct SelectStmt
Node *whereClause; /* WHERE qualification */
List *groupClause; /* GROUP BY clauses */
bool groupDistinct; /* Is this GROUP BY DISTINCT? */
+ bool groupAll; /* Is this GROUP BY ALL? */
Node *havingClause; /* HAVING conditional-expression */
List *windowClause; /* WINDOW window_name AS (...), ... */
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index 3e9894926de..a8a8f752287 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -50,6 +50,8 @@ extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
List *sortlist, List *targetlist, SortBy *sortby);
extern Index assignSortGroupRef(TargetEntry *tle, List *tlist);
extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList);
+extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
+ List *grouplist, List *targetlist, int location);
/* functions in parse_jsontable.c */
extern ParseNamespaceItem *transformJsonTable(ParseState *pstate, JsonTable *jt);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1f24f6ffd1f..0f04e899365 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1557,6 +1557,88 @@ drop table t2;
drop table t3;
drop table p_t1;
--
+-- Test GROUP BY ALL
+--
+CREATE TEMP TABLE t1 (
+ a int,
+ b int
+);
+COPY t1 FROM STDIN;
+-- basic field check
+SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+ b | count
+---+-------
+ 3 | 2
+ 2 | 2
+ 1 | 1
+(3 rows)
+
+-- throw a null in the values too
+SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+ a | count
+---+-------
+ | 0
+ 1 | 4
+(2 rows)
+
+-- multiple columns, non-consecutive order
+SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+ a | sum | b
+---+-----+---
+ 1 | 1 | 1
+ 1 | 6 | 3
+ 1 | 2 | 2
+ | 2 | 2
+(4 rows)
+
+-- multi columns, no aggregate
+SELECT a + b FROM t1 GROUP BY ALL;
+ ?column?
+----------
+
+ 3
+ 4
+ 2
+(4 rows)
+
+-- non-top-level expression
+SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+ a | ?column?
+---+----------
+ | 6
+ 1 | 13
+(2 rows)
+
+-- including grouped column
+SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+ a | ?column?
+---+----------
+ |
+ 1 | 10
+(2 rows)
+
+-- oops all aggregates
+SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+ count | sum
+-------+-----
+ 4 | 11
+(1 row)
+
+-- empty column list
+SELECT FROM t1 GROUP BY ALL;
+--
+(5 rows)
+
+-- filter
+SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+ a | count
+---+-------
+ | 0
+ 1 | 1
+(2 rows)
+
+DROP TABLE t1;
+--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 62540b1ffa4..1f4c282dc62 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -549,6 +549,53 @@ drop table t2;
drop table t3;
drop table p_t1;
+
+--
+-- Test GROUP BY ALL
+--
+
+CREATE TEMP TABLE t1 (
+ a int,
+ b int
+);
+
+COPY t1 FROM STDIN;
+1 1
+1 2
+1 3
+\N 2
+1 3
+\.
+
+-- basic field check
+SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- throw a null in the values too
+SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+SELECT a + b FROM t1 GROUP BY ALL;
+
+-- non-top-level expression
+SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+
+-- including grouped column
+SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+
+-- oops all aggregates
+SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+
+-- empty column list
+SELECT FROM t1 GROUP BY ALL;
+
+-- filter
+SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+
+DROP TABLE t1;
+
--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
--
2.49.0
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 15:49 David Christensen <[email protected]>
parent: David Christensen <[email protected]>
2 siblings, 0 replies; 42+ messages in thread
From: David Christensen @ 2025-09-26 15:49 UTC (permalink / raw)
To: Andrey Borodin <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; pgsql-hackers; Tom Lane <[email protected]>; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
Added to commitfest as https://commitfest.postgresql.org/patch/6085/
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 15:52 David Christensen <[email protected]>
parent: Tom Lane <[email protected]>
1 sibling, 0 replies; 42+ messages in thread
From: David Christensen @ 2025-09-26 15:52 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Fri, Sep 26, 2025 at 9:12 AM Tom Lane <[email protected]> wrote:
>
> Peter Eisentraut <[email protected]> writes:
> > The initially proposed patch appears to have the right idea overall.
> > But it does not handle more complex cases like
> > SELECT a, SUM(b)+a FROM t1 GROUP BY ALL;
>
> > (For explanation: GROUP BY ALL expands to all select list entries that
> > do not contain aggregates. So the above would expand to
> > SELECT a, SUM(b)+a FROM t1 GROUP BY a;
> > which should then be rejected based on the existing rules.)
>
> I thought I understood this definition, up till your last
> comment. What's invalid about that expanded query?
>
> regression=# create table t1 (a int, b int);
> CREATE TABLE
> regression=# SELECT a, SUM(b)+a FROM t1 GROUP BY a;
> a | ?column?
> ---+----------
> (0 rows)
Agreed that this shouldn't be an error; added a similar test case to
v2 of this patch.
David
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 15:54 Peter Eisentraut <[email protected]>
parent: Tom Lane <[email protected]>
1 sibling, 2 replies; 42+ messages in thread
From: Peter Eisentraut @ 2025-09-26 15:54 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: pgsql-hackers; David G. Johnston <[email protected]>; David Christensen <[email protected]>; Jelte Fennema-Nio <[email protected]>
On 26.09.25 16:11, Tom Lane wrote:
> Peter Eisentraut <[email protected]> writes:
>> The initially proposed patch appears to have the right idea overall.
>> But it does not handle more complex cases like
>> SELECT a, SUM(b)+a FROM t1 GROUP BY ALL;
>
>> (For explanation: GROUP BY ALL expands to all select list entries that
>> do not contain aggregates. So the above would expand to
>> SELECT a, SUM(b)+a FROM t1 GROUP BY a;
>> which should then be rejected based on the existing rules.)
>
> I thought I understood this definition, up till your last
> comment. What's invalid about that expanded query?
>
> regression=# create table t1 (a int, b int);
> CREATE TABLE
> regression=# SELECT a, SUM(b)+a FROM t1 GROUP BY a;
> a | ?column?
> ---+----------
> (0 rows)
This was a sloppy example. Here is a better one:
create table t1 (a int, b int, c int);
select a, sum(b)+c from t1 group by all;
This is equivalent to
select a, sum(b)+c from t1 group by a;
which would be rejected as
ERROR: column "t1.c" must appear in the GROUP BY clause or be used
in an aggregate function
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 15:59 David Christensen <[email protected]>
parent: Peter Eisentraut <[email protected]>
1 sibling, 0 replies; 42+ messages in thread
From: David Christensen @ 2025-09-26 15:59 UTC (permalink / raw)
To: Peter Eisentraut <[email protected]>; +Cc: Tom Lane <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Fri, Sep 26, 2025 at 10:54 AM Peter Eisentraut <[email protected]> wrote:
>
> On 26.09.25 16:11, Tom Lane wrote:
> > Peter Eisentraut <[email protected]> writes:
> >> The initially proposed patch appears to have the right idea overall.
> >> But it does not handle more complex cases like
> >> SELECT a, SUM(b)+a FROM t1 GROUP BY ALL;
> >
> >> (For explanation: GROUP BY ALL expands to all select list entries that
> >> do not contain aggregates. So the above would expand to
> >> SELECT a, SUM(b)+a FROM t1 GROUP BY a;
> >> which should then be rejected based on the existing rules.)
> >
> > I thought I understood this definition, up till your last
> > comment. What's invalid about that expanded query?
> >
> > regression=# create table t1 (a int, b int);
> > CREATE TABLE
> > regression=# SELECT a, SUM(b)+a FROM t1 GROUP BY a;
> > a | ?column?
> > ---+----------
> > (0 rows)
>
> This was a sloppy example. Here is a better one:
>
> create table t1 (a int, b int, c int);
>
> select a, sum(b)+c from t1 group by all;
>
> This is equivalent to
>
> select a, sum(b)+c from t1 group by a;
>
> which would be rejected as
>
> ERROR: column "t1.c" must appear in the GROUP BY clause or be used
> in an aggregate function
Verified that with v2 that this is what happens in this case; will
include this and whatever other feedback there is in a v3.
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 16:04 Tom Lane <[email protected]>
parent: David Christensen <[email protected]>
2 siblings, 1 reply; 42+ messages in thread
From: Tom Lane @ 2025-09-26 16:04 UTC (permalink / raw)
To: David Christensen <[email protected]>; +Cc: Andrey Borodin <[email protected]>; Peter Eisentraut <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
David Christensen <[email protected]> writes:
> Here is a rebased version with a few more tests. I also changed the
> main check here to using `!contain_agg_clause` instead of
> `!IsA(Aggref))` directly. (This was defined in `optimizer/clauses.h`,
> but we already are pulling in `optimizer.h`, so it felt valid to me.)
contain_agg_clause will blow up on a SubLink, so I doubt this is
gonna be robust.
regards, tom lane
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 16:13 David Christensen <[email protected]>
parent: Tom Lane <[email protected]>
0 siblings, 2 replies; 42+ messages in thread
From: David Christensen @ 2025-09-26 16:13 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: Andrey Borodin <[email protected]>; Peter Eisentraut <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Fri, Sep 26, 2025 at 11:05 AM Tom Lane <[email protected]> wrote:
>
> David Christensen <[email protected]> writes:
> > Here is a rebased version with a few more tests. I also changed the
> > main check here to using `!contain_agg_clause` instead of
> > `!IsA(Aggref))` directly. (This was defined in `optimizer/clauses.h`,
> > but we already are pulling in `optimizer.h`, so it felt valid to me.)
>
> contain_agg_clause will blow up on a SubLink, so I doubt this is
> gonna be robust.
Fair enough, see that Assert now; easy enough to make a new
expression_tree_walker that only looks for Aggref and short-circuits
SubLink (which I assume is the right behavior here, but might have to
add some more tests/play around with subqueries in the GROUP BY ALL
part).
David
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 16:17 David Christensen <[email protected]>
parent: David Christensen <[email protected]>
1 sibling, 1 reply; 42+ messages in thread
From: David Christensen @ 2025-09-26 16:17 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: Andrey Borodin <[email protected]>; Peter Eisentraut <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Fri, Sep 26, 2025 at 11:13 AM David Christensen <[email protected]> wrote:
>
> On Fri, Sep 26, 2025 at 11:05 AM Tom Lane <[email protected]> wrote:
> >
> > David Christensen <[email protected]> writes:
> > > Here is a rebased version with a few more tests. I also changed the
> > > main check here to using `!contain_agg_clause` instead of
> > > `!IsA(Aggref))` directly. (This was defined in `optimizer/clauses.h`,
> > > but we already are pulling in `optimizer.h`, so it felt valid to me.)
> >
> > contain_agg_clause will blow up on a SubLink, so I doubt this is
> > gonna be robust.
>
> Fair enough, see that Assert now; easy enough to make a new
> expression_tree_walker that only looks for Aggref and short-circuits
> SubLink (which I assume is the right behavior here, but might have to
> add some more tests/play around with subqueries in the GROUP BY ALL
> part).
Or contain_aggs_of_level(), assuming I can suss out how to get the
current level... :D Anyway, will mess with this for a bit.
David
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 16:23 Tom Lane <[email protected]>
parent: David Christensen <[email protected]>
1 sibling, 1 reply; 42+ messages in thread
From: Tom Lane @ 2025-09-26 16:23 UTC (permalink / raw)
To: David Christensen <[email protected]>; +Cc: Andrey Borodin <[email protected]>; Peter Eisentraut <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
David Christensen <[email protected]> writes:
> On Fri, Sep 26, 2025 at 11:05 AM Tom Lane <[email protected]> wrote:
>> contain_agg_clause will blow up on a SubLink, so I doubt this is
>> gonna be robust.
> Fair enough, see that Assert now; easy enough to make a new
> expression_tree_walker that only looks for Aggref and short-circuits
> SubLink (which I assume is the right behavior here, but might have to
> add some more tests/play around with subqueries in the GROUP BY ALL
> part).
No, I think the correct behavior would have to be to descend into
SubLinks to see if they contain any aggregates belonging to the
outer query level.
However (looks around) we do already have that code.
See contain_aggs_of_level. (contain_agg_clause is essentially
a simplified version that is okay to use in the planner because
it's already gotten rid of sublinks.)
What mainly concerns me at this point is whether we've identified
aggregate levels at the point in parsing where you want to run this.
I have a bit of a worry that that might interact with grouping.
Presumably the SQL committee thought about that, so it's probably
soluble, but ...
regards, tom lane
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 16:24 Tom Lane <[email protected]>
parent: David Christensen <[email protected]>
0 siblings, 0 replies; 42+ messages in thread
From: Tom Lane @ 2025-09-26 16:24 UTC (permalink / raw)
To: David Christensen <[email protected]>; +Cc: Andrey Borodin <[email protected]>; Peter Eisentraut <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
David Christensen <[email protected]> writes:
> Or contain_aggs_of_level(), assuming I can suss out how to get the
> current level... :D Anyway, will mess with this for a bit.
Current level is zero by definition ...
regards, tom lane
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 16:37 jian he <[email protected]>
parent: David Christensen <[email protected]>
2 siblings, 1 reply; 42+ messages in thread
From: jian he @ 2025-09-26 16:37 UTC (permalink / raw)
To: David Christensen <[email protected]>; +Cc: Andrey Borodin <[email protected]>; Peter Eisentraut <[email protected]>; pgsql-hackers; Tom Lane <[email protected]>; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Fri, Sep 26, 2025 at 11:46 PM David Christensen <[email protected]> wrote:
>
> >
> > I’m interested in picking it up again but would appreciate the review.
>
> Here is a rebased version with a few more tests. I also changed the
> main check here to using `!contain_agg_clause` instead of
> `!IsA(Aggref))` directly. (This was defined in `optimizer/clauses.h`,
> but we already are pulling in `optimizer.h`, so it felt valid to me.)
>
hi.
I only briefly browse the patch text file, so forgive me.
seems missing deparse regress tests
i think you may need one test like:
create view v1 as SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
\sv v1
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 16:56 David Christensen <[email protected]>
parent: jian he <[email protected]>
0 siblings, 1 reply; 42+ messages in thread
From: David Christensen @ 2025-09-26 16:56 UTC (permalink / raw)
To: jian he <[email protected]>; +Cc: Andrey Borodin <[email protected]>; Peter Eisentraut <[email protected]>; pgsql-hackers; Tom Lane <[email protected]>; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Fri, Sep 26, 2025 at 11:38 AM jian he <[email protected]> wrote:
>
> On Fri, Sep 26, 2025 at 11:46 PM David Christensen <[email protected]> wrote:
> >
> > >
> > > I’m interested in picking it up again but would appreciate the review.
> >
> > Here is a rebased version with a few more tests. I also changed the
> > main check here to using `!contain_agg_clause` instead of
> > `!IsA(Aggref))` directly. (This was defined in `optimizer/clauses.h`,
> > but we already are pulling in `optimizer.h`, so it felt valid to me.)
> >
>
> hi.
> I only briefly browse the patch text file, so forgive me.
> seems missing deparse regress tests
>
> i think you may need one test like:
>
> create view v1 as SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
> \sv v1
Thanks, good suggestion; not sure the appropriate final location for
this, but it did show a bug in the current patch.
David
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 17:00 David Christensen <[email protected]>
parent: David Christensen <[email protected]>
0 siblings, 1 reply; 42+ messages in thread
From: David Christensen @ 2025-09-26 17:00 UTC (permalink / raw)
To: jian he <[email protected]>; +Cc: Andrey Borodin <[email protected]>; Peter Eisentraut <[email protected]>; pgsql-hackers; Tom Lane <[email protected]>; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
Version 3 of this patch, incorporating some of the feedback thus far:
On Fri, Sep 26, 2025 at 11:56 AM David Christensen <[email protected]> wrote:
>
> On Fri, Sep 26, 2025 at 11:38 AM jian he <[email protected]> wrote:
> >
> > On Fri, Sep 26, 2025 at 11:46 PM David Christensen <[email protected]> wrote:
> > >
> > > >
> > > > I’m interested in picking it up again but would appreciate the review.
> > >
> > > Here is a rebased version with a few more tests. I also changed the
> > > main check here to using `!contain_agg_clause` instead of
> > > `!IsA(Aggref))` directly. (This was defined in `optimizer/clauses.h`,
> > > but we already are pulling in `optimizer.h`, so it felt valid to me.)
> > >
> >
> > hi.
> > I only briefly browse the patch text file, so forgive me.
> > seems missing deparse regress tests
> >
> > i think you may need one test like:
> >
> > create view v1 as SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
> > \sv v1
>
> Thanks, good suggestion; not sure the appropriate final location for
> this, but it did show a bug in the current patch.
>
> David
Attachments:
[application/octet-stream] v3-0001-Add-GROUP-BY-ALL.patch (13.5K, 2-v3-0001-Add-GROUP-BY-ALL.patch)
download | inline diff:
From c24d5e870993bebc1d6fcb35e7d8367b9fca6ddf Mon Sep 17 00:00:00 2001
From: David Christensen <[email protected]>
Date: Mon, 11 Mar 2024 10:37:05 -0400
Subject: [PATCH v3] Add GROUP BY ALL
GROUP BY ALL is a form of GROUP BY which adds any TargetExpr that does not
contain an Aggref into the groupClause of the query, effectively making it
exactly equivalent to specifying those same expressions in an explicit GROUP BY list.
Since this is exclusive with any other GROUP BY form, this is fairly simple to
add into the grammar and handle without needing to get into grouping sets or
other more complicated forms.
This greatly improves data exploration in particular, as well as making it so
you don't need to trivially wrap more complicated queries in a subquery or
reproduce long, complicated expressions in the literal GROUP BY.
---
src/backend/parser/analyze.c | 40 +++++--
src/backend/parser/gram.y | 16 ++-
src/backend/parser/parse_clause.c | 7 +-
src/backend/utils/adt/ruleutils.c | 5 +-
src/include/nodes/parsenodes.h | 2 +
src/include/parser/parse_clause.h | 2 +
src/test/regress/expected/aggregates.out | 133 +++++++++++++++++++++++
src/test/regress/sql/aggregates.sql | 66 +++++++++++
8 files changed, 259 insertions(+), 12 deletions(-)
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index b9763ea1714..2262c0cad08 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -48,6 +48,7 @@
#include "parser/parse_target.h"
#include "parser/parse_type.h"
#include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
#include "utils/backend_status.h"
#include "utils/builtins.h"
#include "utils/guc.h"
@@ -1415,6 +1416,29 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
qry->targetList = transformTargetList(pstate, stmt->targetList,
EXPR_KIND_SELECT_TARGET);
+ /*
+ * If groupByAll, expand targetList into groupClause. In this case, we
+ * cannot have any other group clauses, so this is safe.
+ */
+
+ if (stmt->groupAll)
+ {
+ ListCell *l1;
+ /*
+ * Iterate over targets, any non-aggregate gets added as a Target.
+ * Note that it's not enough to check for a top-level Aggref; we need
+ * to ensure that any sub-expression here does not include an Aggref
+ * (for instance an expression such as `sum(col) + 4` should not be
+ * added as a grouping target.
+ */
+ foreach (l1,qry->targetList)
+ {
+ TargetEntry *n = (TargetEntry*)lfirst(l1);
+ if (!contain_aggs_of_level((Node *)n->expr, 0))
+ qry->groupClause = addTargetToGroupList(pstate, n, qry->groupClause, qry->targetList, 0);
+ }
+ }
+
/* mark column origins */
markTargetListOrigins(pstate, qry->targetList);
@@ -1438,14 +1462,16 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
EXPR_KIND_ORDER_BY,
false /* allow SQL92 rules */ );
- qry->groupClause = transformGroupClause(pstate,
- stmt->groupClause,
- &qry->groupingSets,
- &qry->targetList,
- qry->sortClause,
- EXPR_KIND_GROUP_BY,
- false /* allow SQL92 rules */ );
+ if (!stmt->groupAll)
+ qry->groupClause = transformGroupClause(pstate,
+ stmt->groupClause,
+ &qry->groupingSets,
+ &qry->targetList,
+ qry->sortClause,
+ EXPR_KIND_GROUP_BY,
+ false /* allow SQL92 rules */ );
qry->groupDistinct = stmt->groupDistinct;
+ qry->groupAll = stmt->groupAll;
if (stmt->distinctClause == NIL)
{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9fd48acb1f8..3d97ceddc1e 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -120,6 +120,7 @@ typedef struct SelectLimit
typedef struct GroupClause
{
bool distinct;
+ bool all;
List *list;
} GroupClause;
@@ -12993,6 +12994,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13010,6 +13012,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13502,12 +13505,21 @@ first_or_next: FIRST_P { $$ = 0; }
* GroupingSet node of some type.
*/
group_clause:
- GROUP_P BY set_quantifier group_by_list
+ GROUP_P BY ALL
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = false;
+ n->list = NIL;
+ n->all = true;
+ $$ = n;
+ }
+ | GROUP_P BY set_quantifier group_by_list
{
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
n->list = $4;
+ n->all = false;
$$ = n;
}
| /*EMPTY*/
@@ -13516,6 +13528,7 @@ group_clause:
n->distinct = false;
n->list = NIL;
+ n->all = false;
$$ = n;
}
;
@@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
n->whereClause = $4;
n->groupClause = ($5)->list;
n->groupDistinct = ($5)->distinct;
+ n->groupAll = ($5)->all;
n->havingClause = $6;
n->windowClause = $7;
n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 9f20a70ce13..9c26afd1f3c 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -90,8 +90,6 @@ static int get_matching_location(int sortgroupref,
List *sortgrouprefs, List *exprs);
static List *resolve_unique_index_expr(ParseState *pstate, InferClause *infer,
Relation heapRel);
-static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
- List *grouplist, List *targetlist, int location);
static WindowClause *findWindowClause(List *wclist, const char *name);
static Node *transformFrameOffset(ParseState *pstate, int frameOptions,
Oid rangeopfamily, Oid rangeopcintype, Oid *inRangeFunc,
@@ -2598,6 +2596,9 @@ transformGroupingSet(List **flatresult,
* GROUP BY items will be added to the targetlist (as resjunk columns)
* if not already present, so the targetlist must be passed by reference.
*
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate expressions in the targetList.
+ *
* This is also used for window PARTITION BY clauses (which act almost the
* same, but are always interpreted per SQL99 rules).
*
@@ -3533,7 +3534,7 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle,
*
* Returns the updated SortGroupClause list.
*/
-static List *
+List *
addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
List *grouplist, List *targetlist, int location)
{
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index defcdaa8b34..aa97cc11c78 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6186,7 +6186,10 @@ get_basic_select_query(Query *query, deparse_context *context)
save_ingroupby = context->inGroupBy;
context->inGroupBy = true;
- if (query->groupingSets == NIL)
+ if (query->groupAll)
+ appendContextKeyword(context, " ALL ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ else if (query->groupingSets == NIL)
{
sep = "";
foreach(l, query->groupClause)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f1706df58fd..b0c1292d16a 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -215,6 +215,7 @@ typedef struct Query
List *groupClause; /* a list of SortGroupClause's */
bool groupDistinct; /* is the group by clause distinct? */
+ bool groupAll; /* is the group by clause distinct? */
List *groupingSets; /* a list of GroupingSet's if present */
@@ -2192,6 +2193,7 @@ typedef struct SelectStmt
Node *whereClause; /* WHERE qualification */
List *groupClause; /* GROUP BY clauses */
bool groupDistinct; /* Is this GROUP BY DISTINCT? */
+ bool groupAll; /* Is this GROUP BY ALL? */
Node *havingClause; /* HAVING conditional-expression */
List *windowClause; /* WINDOW window_name AS (...), ... */
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index 3e9894926de..a8a8f752287 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -50,6 +50,8 @@ extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
List *sortlist, List *targetlist, SortBy *sortby);
extern Index assignSortGroupRef(TargetEntry *tle, List *tlist);
extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList);
+extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
+ List *grouplist, List *targetlist, int location);
/* functions in parse_jsontable.c */
extern ParseNamespaceItem *transformJsonTable(ParseState *pstate, JsonTable *jt);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1f24f6ffd1f..74bdfc24bdc 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1557,6 +1557,139 @@ drop table t2;
drop table t3;
drop table p_t1;
--
+-- Test GROUP BY ALL
+--
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+COPY t1 FROM STDIN;
+-- basic field check
+SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+ b | count
+---+-------
+ 3 | 2
+ 2 | 2
+ 1 | 1
+(3 rows)
+
+-- throw a null in the values too
+SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+ a | count
+---+-------
+ | 0
+ 1 | 4
+(2 rows)
+
+-- multiple columns, non-consecutive order
+SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+ a | sum | b
+---+-----+---
+ 1 | 1 | 1
+ 1 | 6 | 3
+ 1 | 2 | 2
+ | 2 | 2
+(4 rows)
+
+-- multi columns, no aggregate
+SELECT a + b FROM t1 GROUP BY ALL;
+ ?column?
+----------
+
+ 3
+ 4
+ 2
+(4 rows)
+
+-- non-top-level expression
+SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+ a | ?column?
+---+----------
+ | 6
+ 1 | 13
+(2 rows)
+
+-- including grouped column
+SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+ a | ?column?
+---+----------
+ |
+ 1 | 10
+(2 rows)
+
+-- oops all aggregates
+SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+ count | sum
+-------+-----
+ 4 | 11
+(1 row)
+
+-- empty column list
+SELECT FROM t1 GROUP BY ALL;
+--
+(5 rows)
+
+-- filter
+SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+ a | count
+---+-------
+ | 0
+ 1 | 1
+(2 rows)
+
+-- all cols
+SELECT * FROM t1 GROUP BY ALL;
+ a | b | c
+---+---+---
+ | 2 | 2
+ 1 | 3 | 1
+ 1 | 2 | 2
+ 1 | 1 | 1
+(4 rows)
+
+SELECT *, count(*) FROM t1 GROUP BY ALL;
+ a | b | c | count
+---+---+---+-------
+ | 2 | 2 | 1
+ 1 | 3 | 1 | 2
+ 1 | 2 | 2 | 1
+ 1 | 1 | 1 | 1
+(4 rows)
+
+-- expression without including aggregate columns
+SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+ERROR: column "t1.c" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+ ^
+-- subquery
+SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL;
+ a | sum
+---+-----
+ 1 | 11
+(1 row)
+
+-- cte
+WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL)
+SELECT AVG(a), sum_b FROM a_query GROUP BY ALL;
+ avg | sum_b
+------------------------+-------
+ 1.00000000000000000000 | 9
+ | 2
+(2 rows)
+
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+NOTICE: view "v1" will be a temporary view
+\sv v1
+CREATE OR REPLACE VIEW pg_temp_93.v1 AS
+ SELECT b,
+ count(*) AS count
+ FROM t1
+ GROUP BY
+ ALL
+DROP VIEW v1;
+DROP TABLE t1;
+--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 62540b1ffa4..11d6161cdf4 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -549,6 +549,72 @@ drop table t2;
drop table t3;
drop table p_t1;
+
+--
+-- Test GROUP BY ALL
+--
+
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+
+COPY t1 FROM STDIN;
+1 1 1
+1 2 2
+1 3 1
+\N 2 2
+1 3 1
+\.
+
+-- basic field check
+SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- throw a null in the values too
+SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+SELECT a + b FROM t1 GROUP BY ALL;
+
+-- non-top-level expression
+SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+
+-- including grouped column
+SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+
+-- oops all aggregates
+SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+
+-- empty column list
+SELECT FROM t1 GROUP BY ALL;
+
+-- filter
+SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+
+-- all cols
+SELECT * FROM t1 GROUP BY ALL;
+SELECT *, count(*) FROM t1 GROUP BY ALL;
+
+-- expression without including aggregate columns
+SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+
+-- subquery
+SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL;
+
+-- cte
+WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL)
+SELECT AVG(a), sum_b FROM a_query GROUP BY ALL;
+
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+\sv v1
+
+DROP VIEW v1;
+DROP TABLE t1;
+
--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
--
2.49.0
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 17:46 Tom Lane <[email protected]>
parent: David Christensen <[email protected]>
0 siblings, 1 reply; 42+ messages in thread
From: Tom Lane @ 2025-09-26 17:46 UTC (permalink / raw)
To: David Christensen <[email protected]>; +Cc: jian he <[email protected]>; Andrey Borodin <[email protected]>; Peter Eisentraut <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
David Christensen <[email protected]> writes:
> Version 3 of this patch, incorporating some of the feedback thus far:
Some random comments:
I don't love where you put the parsing code. Instead of
exposing addTargetToGroupList, I think you should have given
transformGroupClause the responsibility of expanding GROUP BY ALL.
One reason for that is that GROUP BY processing depends on the
results of transformSortClause. This means that in v3, the
behavior of
SELECT x FROM ... GROUP BY x ORDER BY x;
will be subtly different from
SELECT x FROM ... GROUP BY ALL ORDER BY x;
which seems like a bug, or at least not desirable. (You might need a
DESC or USING decoration in the ORDER BY to expose this clearly.)
The parsing code itself is not great:
+ TargetEntry *n = (TargetEntry*)lfirst(l1);
+ if (!contain_aggs_of_level((Node *)n->expr, 0))
+ qry->groupClause = addTargetToGroupList(pstate, n, qry->groupClause, qry->targetList, 0);
You should be skipping resjunk entries, and please use "tle" or some
such name less generic than "n", and "0" is not the correct location
to pass to addTargetToGroupList. Probably the location of the ALL
keyword would be the ideal thing, but if we don't have that, the
notation for "no location known" is -1 not 0. We could also
consider using exprLocation(tle->expr), despite the comment on
addTargetToGroupList that that's not the right thing. This might
actually be better than pointing at the ALL anyway, since that would
give no hint which targetentry caused the error.
The test cases seem poorly designed, because it's very hard to be
sure whether the code expanded the ALL as-expected. I think you
could improve them by not executing the queries but just EXPLAINing
them, so that the expanded group-by list is directly visible.
regards, tom lane
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 18:50 David Christensen <[email protected]>
parent: Tom Lane <[email protected]>
0 siblings, 1 reply; 42+ messages in thread
From: David Christensen @ 2025-09-26 18:50 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: jian he <[email protected]>; Andrey Borodin <[email protected]>; Peter Eisentraut <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Fri, Sep 26, 2025 at 12:46 PM Tom Lane <[email protected]> wrote:
> Some random comments:
[snip]
Great feedback, thanks; attached is v4 which should address these comments.
David
Attachments:
[application/octet-stream] v4-0001-Add-GROUP-BY-ALL.patch (16.3K, 2-v4-0001-Add-GROUP-BY-ALL.patch)
download | inline diff:
From 842bba651d6fd184402bcc404aad6a709c988614 Mon Sep 17 00:00:00 2001
From: David Christensen <[email protected]>
Date: Mon, 11 Mar 2024 10:37:05 -0400
Subject: [PATCH v4] Add GROUP BY ALL
GROUP BY ALL is a form of GROUP BY which adds any TargetExpr that does not
contain an Aggref into the groupClause of the query, effectively making it
exactly equivalent to specifying those same expressions in an explicit GROUP BY list.
Since this is exclusive with any other GROUP BY form, this is fairly simple to
add into the grammar and handle without needing to get into grouping sets or
other more complicated forms.
This greatly improves data exploration in particular, as well as making it so
you don't need to trivially wrap more complicated queries in a subquery or
reproduce long, complicated expressions in the literal GROUP BY.
---
src/backend/parser/analyze.c | 8 +-
src/backend/parser/gram.y | 16 ++-
src/backend/parser/parse_clause.c | 41 ++++++-
src/backend/utils/adt/ruleutils.c | 5 +-
src/include/nodes/parsenodes.h | 3 +
src/include/parser/parse_clause.h | 2 +-
src/test/regress/expected/aggregates.out | 148 +++++++++++++++++++++++
src/test/regress/sql/aggregates.sql | 62 ++++++++++
8 files changed, 278 insertions(+), 7 deletions(-)
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index b9763ea1714..991a5919ab1 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -48,6 +48,7 @@
#include "parser/parse_target.h"
#include "parser/parse_type.h"
#include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
#include "utils/backend_status.h"
#include "utils/builtins.h"
#include "utils/guc.h"
@@ -1444,8 +1445,10 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
&qry->targetList,
qry->sortClause,
EXPR_KIND_GROUP_BY,
- false /* allow SQL92 rules */ );
+ false /* allow SQL92 rules */,
+ stmt->groupAll);
qry->groupDistinct = stmt->groupDistinct;
+ qry->groupAll = stmt->groupAll;
if (stmt->distinctClause == NIL)
{
@@ -2934,7 +2937,8 @@ transformPLAssignStmt(ParseState *pstate, PLAssignStmt *stmt)
&qry->targetList,
qry->sortClause,
EXPR_KIND_GROUP_BY,
- false /* allow SQL92 rules */ );
+ false /* allow SQL92 rules */,
+ qry->groupAll);
if (sstmt->distinctClause == NIL)
{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9fd48acb1f8..3d97ceddc1e 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -120,6 +120,7 @@ typedef struct SelectLimit
typedef struct GroupClause
{
bool distinct;
+ bool all;
List *list;
} GroupClause;
@@ -12993,6 +12994,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13010,6 +13012,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13502,12 +13505,21 @@ first_or_next: FIRST_P { $$ = 0; }
* GroupingSet node of some type.
*/
group_clause:
- GROUP_P BY set_quantifier group_by_list
+ GROUP_P BY ALL
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = false;
+ n->list = NIL;
+ n->all = true;
+ $$ = n;
+ }
+ | GROUP_P BY set_quantifier group_by_list
{
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
n->list = $4;
+ n->all = false;
$$ = n;
}
| /*EMPTY*/
@@ -13516,6 +13528,7 @@ group_clause:
n->distinct = false;
n->list = NIL;
+ n->all = false;
$$ = n;
}
;
@@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
n->whereClause = $4;
n->groupClause = ($5)->list;
n->groupDistinct = ($5)->distinct;
+ n->groupAll = ($5)->all;
n->havingClause = $6;
n->windowClause = $7;
n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 9f20a70ce13..7371a98e1f3 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult,
* GROUP BY items will be added to the targetlist (as resjunk columns)
* if not already present, so the targetlist must be passed by reference.
*
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate expressions in the targetlist.
+ *
* This is also used for window PARTITION BY clauses (which act almost the
* same, but are always interpreted per SQL99 rules).
*
@@ -2627,11 +2630,12 @@ transformGroupingSet(List **flatresult,
* sortClause ORDER BY clause (SortGroupClause nodes)
* exprKind expression kind
* useSQL99 SQL99 rather than SQL92 syntax
+ * groupByAll is this a GROUP BY ALL statement?
*/
List *
transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
List **targetlist, List *sortClause,
- ParseExprKind exprKind, bool useSQL99)
+ ParseExprKind exprKind, bool useSQL99, bool groupByAll)
{
List *result = NIL;
List *flat_grouplist;
@@ -2640,6 +2644,38 @@ transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
bool hasGroupingSets = false;
Bitmapset *seen_local = NULL;
+
+ /*
+ * If we have GROUP BY ALL, we cannot (by definition) have other GROUP BY
+ * options, including grouping sets.
+ */
+
+ if (groupByAll)
+ {
+ Assert(grouplist == NULL);
+
+ /*
+ * Iterate over targets, any non-aggregate gets added as a Target.
+ * Note that it's not enough to check for a top-level Aggref; we need
+ * to ensure that any sub-expression here does not include an Aggref
+ * (for instance an expression such as `sum(col) + 4` should not be
+ * added as a grouping target).
+ *
+ * We also need to skip any resjunk columns, since we only want to
+ * group by real TLEs.
+ */
+ foreach_ptr (TargetEntry, tle, *targetlist)
+ {
+ if (tle->resjunk)
+ continue;
+
+ if (!contain_aggs_of_level((Node *)tle->expr, 0))
+ result = addTargetToGroupList(pstate, tle, result, *targetlist, exprLocation((Node *)tle));
+ }
+
+ return result;
+ }
+
/*
* Recursively flatten implicit RowExprs. (Technically this is only needed
* for GROUP BY, per the syntax rules for grouping sets, but we do it
@@ -2822,7 +2858,8 @@ transformWindowDefinitions(ParseState *pstate,
targetlist,
orderClause,
EXPR_KIND_WINDOW_PARTITION,
- true /* force SQL99 rules */ );
+ true /* force SQL99 rules */,
+ false /* group by all */ );
/*
* And prepare the new WindowClause.
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index defcdaa8b34..aa97cc11c78 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6186,7 +6186,10 @@ get_basic_select_query(Query *query, deparse_context *context)
save_ingroupby = context->inGroupBy;
context->inGroupBy = true;
- if (query->groupingSets == NIL)
+ if (query->groupAll)
+ appendContextKeyword(context, " ALL ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ else if (query->groupingSets == NIL)
{
sep = "";
foreach(l, query->groupClause)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f1706df58fd..fa38d303d2c 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -215,6 +215,8 @@ typedef struct Query
List *groupClause; /* a list of SortGroupClause's */
bool groupDistinct; /* is the group by clause distinct? */
+ bool groupAll; /* is the group by for all non-aggregate
+ * columns? */
List *groupingSets; /* a list of GroupingSet's if present */
@@ -2192,6 +2194,7 @@ typedef struct SelectStmt
Node *whereClause; /* WHERE qualification */
List *groupClause; /* GROUP BY clauses */
bool groupDistinct; /* Is this GROUP BY DISTINCT? */
+ bool groupAll; /* Is this GROUP BY ALL? */
Node *havingClause; /* HAVING conditional-expression */
List *windowClause; /* WINDOW window_name AS (...), ... */
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index 3e9894926de..aa8b54ab5c4 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -28,7 +28,7 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause,
extern List *transformGroupClause(ParseState *pstate, List *grouplist,
List **groupingSets,
List **targetlist, List *sortClause,
- ParseExprKind exprKind, bool useSQL99);
+ ParseExprKind exprKind, bool useSQL99, bool groupByAll);
extern List *transformSortClause(ParseState *pstate, List *orderlist,
List **targetlist, ParseExprKind exprKind,
bool useSQL99);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1f24f6ffd1f..76e7728c678 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1557,6 +1557,154 @@ drop table t2;
drop table t3;
drop table p_t1;
--
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of the
+-- results, so test some queries and verify the EXPLAIN plans.
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+-- basic field check
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: b
+ -> Seq Scan on t1
+(3 rows)
+
+-- throw a null in the values too
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b
+ -> Seq Scan on t1
+(3 rows)
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: (a + b)
+ -> Seq Scan on t1
+(3 rows)
+
+-- non-top-level expression
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- including grouped column
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- oops all aggregates
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ Aggregate
+ -> Seq Scan on t1
+(2 rows)
+
+-- empty column list
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------
+ Seq Scan on t1
+(1 row)
+
+-- filter
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- all cols
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b, c
+ -> Seq Scan on t1
+(3 rows)
+
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b, c
+ -> Seq Scan on t1
+(3 rows)
+
+-- expression without including aggregate columns
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+ERROR: column "t1.c" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: ...OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, SUM(b) + c FROM t1 ...
+ ^
+-- subquery
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+-----------------------------------
+ GroupAggregate
+ InitPlan 1
+ -> Limit
+ -> Seq Scan on t1 t1_1
+ -> Seq Scan on t1
+(5 rows)
+
+-- cte
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL)
+SELECT AVG(a), sum_b FROM a_query GROUP BY ALL;
+ QUERY PLAN
+----------------------------
+ HashAggregate
+ Group Key: sum(t1.b)
+ -> HashAggregate
+ Group Key: t1.a
+ -> Seq Scan on t1
+(5 rows)
+
+-- verify deparse
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+NOTICE: view "v1" will be a temporary view
+SELECT pg_get_viewdef('v1'::regclass);
+ pg_get_viewdef
+-----------------------
+ SELECT b, +
+ count(*) AS count+
+ FROM t1 +
+ GROUP BY +
+ ALL ;
+(1 row)
+
+DROP VIEW v1;
+DROP TABLE t1;
+--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 62540b1ffa4..10c6d4dd1a4 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -549,6 +549,68 @@ drop table t2;
drop table t3;
drop table p_t1;
+
+--
+-- Test GROUP BY ALL
+--
+
+-- We don't care about the data here, just the proper transformation of the
+-- results, so test some queries and verify the EXPLAIN plans.
+
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+
+-- basic field check
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- throw a null in the values too
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+
+-- non-top-level expression
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+
+-- including grouped column
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+
+-- oops all aggregates
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+
+-- empty column list
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT FROM t1 GROUP BY ALL;
+
+-- filter
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+
+-- all cols
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM t1 GROUP BY ALL;
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+
+-- expression without including aggregate columns
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+
+-- subquery
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL;
+
+-- cte
+EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL)
+SELECT AVG(a), sum_b FROM a_query GROUP BY ALL;
+
+-- verify deparse
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+
+DROP VIEW v1;
+DROP TABLE t1;
+
--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
--
2.49.0
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 20:18 Tom Lane <[email protected]>
parent: Peter Eisentraut <[email protected]>
1 sibling, 1 reply; 42+ messages in thread
From: Tom Lane @ 2025-09-26 20:18 UTC (permalink / raw)
To: Peter Eisentraut <[email protected]>; +Cc: pgsql-hackers; David G. Johnston <[email protected]>; David Christensen <[email protected]>; Jelte Fennema-Nio <[email protected]>
Peter Eisentraut <[email protected]> writes:
> On 26.09.25 16:11, Tom Lane wrote:
>> I thought I understood this definition, up till your last
>> comment. What's invalid about that expanded query?
> This was a sloppy example. Here is a better one:
> create table t1 (a int, b int, c int);
> select a, sum(b)+c from t1 group by all;
> This is equivalent to
> select a, sum(b)+c from t1 group by a;
> which would be rejected as
> ERROR: column "t1.c" must appear in the GROUP BY clause or be used
> in an aggregate function
Got it, mostly. There is an edge case, though: what if there are no
candidate grouping items? I see these test cases in David's patch:
+-- oops all aggregates
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ Aggregate
+ -> Seq Scan on t1
+(2 rows)
+
+-- empty column list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------
+ Seq Scan on t1
+(1 row)
That is, in such cases the patch behaves as if there were no GROUP BY
clause at all, which seems kinda dubious. Should this be an error,
and if not what's it supposed to do? The second case is outside the
SQL spec so I'm not expecting guidance on that, but surely the
committee thought about the first case.
Also, what about window functions in the tlist? If you do
regression=# explain select sum(q1) over(partition by q2) from int8_tbl group by 1;
you get
ERROR: window functions are not allowed in GROUP BY
LINE 1: explain select sum(q1) over(partition by q2) from int8_tbl g...
^
but that's not what is happening with
regression=# explain select sum(q1) over(partition by q2) from int8_tbl group by all;
ERROR: column "int8_tbl.q2" must appear in the GROUP BY clause or be used in an aggregate function
LINE 1: explain select sum(q1) over(partition by q2) from int8_tbl g...
^
(I didn't stop to figure out why this isn't giving the same error, but
maybe it's an order-of-checks thing.) In any case: should this give
"window functions are not allowed in GROUP BY", or should the
window-function-containing tlist item be silently skipped by GROUP BY
ALL? Trying to make it work is surely not the right answer.
regards, tom lane
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-26 20:37 Tom Lane <[email protected]>
parent: David Christensen <[email protected]>
0 siblings, 0 replies; 42+ messages in thread
From: Tom Lane @ 2025-09-26 20:37 UTC (permalink / raw)
To: David Christensen <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; pgsql-hackers
[ trimming the cc: list because gmail decided my last was spam ]
David Christensen <[email protected]> writes:
> Great feedback, thanks; attached is v4 which should address these comments.
I did a pass of cleanup over this --- mostly cosmetic, but not
entirely. Along the way I discovered a pre-existing bug:
transformSelectStmt does
qry->groupDistinct = stmt->groupDistinct;
but transformPLAssignStmt fails to, with the result that GROUP BY
DISTINCT would misbehave if used in a plpgsql "expression" context.
I'm not hugely surprised that no one has reported that from the
field, but nonetheless it's broken. In the attached v5 I just
quickly added the missing line and moved on, but we'll need to
back-patch that bit. (Maybe we'd be well advised to refactor to
reduce the amount of duplicated code that needs to be kept in sync?)
I have not attempted to address the definitional issues I just
queried Peter about. Other open items:
* Documentation
* The test cases deserve reconsideration now that we think their
charter only goes as far as EXPLAIN'ing the results; some of them
seem pretty redundant in this context.
regards, tom lane
Attachments:
[text/x-diff] v5-0001-Add-GROUP-BY-ALL.patch (14.1K, 2-v5-0001-Add-GROUP-BY-ALL.patch)
download | inline diff:
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index b9763ea1714..fe632d3dabf 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1440,12 +1440,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
qry->groupClause = transformGroupClause(pstate,
stmt->groupClause,
+ stmt->groupAll,
&qry->groupingSets,
&qry->targetList,
qry->sortClause,
EXPR_KIND_GROUP_BY,
false /* allow SQL92 rules */ );
qry->groupDistinct = stmt->groupDistinct;
+ qry->groupAll = stmt->groupAll;
if (stmt->distinctClause == NIL)
{
@@ -2930,11 +2932,14 @@ transformPLAssignStmt(ParseState *pstate, PLAssignStmt *stmt)
qry->groupClause = transformGroupClause(pstate,
sstmt->groupClause,
+ sstmt->groupAll,
&qry->groupingSets,
&qry->targetList,
qry->sortClause,
EXPR_KIND_GROUP_BY,
false /* allow SQL92 rules */ );
+ qry->groupDistinct = sstmt->groupDistinct;
+ qry->groupAll = sstmt->groupAll;
if (sstmt->distinctClause == NIL)
{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9fd48acb1f8..8a0aa1899e2 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -120,6 +120,7 @@ typedef struct SelectLimit
typedef struct GroupClause
{
bool distinct;
+ bool all;
List *list;
} GroupClause;
@@ -12993,6 +12994,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13010,6 +13012,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13507,14 +13510,24 @@ group_clause:
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
+ n->all = false;
n->list = $4;
$$ = n;
}
+ | GROUP_P BY ALL
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = false;
+ n->all = true;
+ n->list = NIL;
+ $$ = n;
+ }
| /*EMPTY*/
{
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = false;
+ n->all = false;
n->list = NIL;
$$ = n;
}
@@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
n->whereClause = $4;
n->groupClause = ($5)->list;
n->groupDistinct = ($5)->distinct;
+ n->groupAll = ($5)->all;
n->havingClause = $6;
n->windowClause = $7;
n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 9f20a70ce13..562e3e90e34 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult,
* GROUP BY items will be added to the targetlist (as resjunk columns)
* if not already present, so the targetlist must be passed by reference.
*
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate expressions in the targetlist.
+ *
* This is also used for window PARTITION BY clauses (which act almost the
* same, but are always interpreted per SQL99 rules).
*
@@ -2622,6 +2625,7 @@ transformGroupingSet(List **flatresult,
*
* pstate ParseState
* grouplist clause to transform
+ * groupByAll is this a GROUP BY ALL statement?
* groupingSets reference to list to contain the grouping set tree
* targetlist reference to TargetEntry list
* sortClause ORDER BY clause (SortGroupClause nodes)
@@ -2629,7 +2633,8 @@ transformGroupingSet(List **flatresult,
* useSQL99 SQL99 rather than SQL92 syntax
*/
List *
-transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
+transformGroupClause(ParseState *pstate, List *grouplist, bool groupByAll,
+ List **groupingSets,
List **targetlist, List *sortClause,
ParseExprKind exprKind, bool useSQL99)
{
@@ -2640,6 +2645,41 @@ transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
bool hasGroupingSets = false;
Bitmapset *seen_local = NULL;
+ /* Handle GROUP BY ALL */
+ if (groupByAll)
+ {
+ /* There cannot have been any explicit list items */
+ Assert(grouplist == NIL);
+
+ /*
+ * Iterate over targets, any non-aggregate gets added as a Target.
+ * Note that it's not enough to check for a top-level Aggref; we need
+ * to ensure that any sub-expression here does not include an Aggref
+ * (for instance an expression such as `sum(col) + 4` should not be
+ * added as a grouping target).
+ *
+ * We specify the parse location as the TLE's location, despite the
+ * comment for addTargetToGroupList discouraging that. The only other
+ * thing we could point to is the ALL keyword, which seems unhelpful
+ * when there are multiple TLEs.
+ */
+ foreach_ptr(TargetEntry, tle, *targetlist)
+ {
+ /* Ignore junk TLEs */
+ if (tle->resjunk)
+ continue;
+
+ /* Use contain_aggs_of_level to detect Aggrefs in SubLinks */
+ if (!(pstate->p_hasAggs &&
+ contain_aggs_of_level((Node *) tle->expr, 0)))
+ result = addTargetToGroupList(pstate, tle,
+ result, *targetlist,
+ exprLocation((Node *) tle->expr));
+ }
+
+ return result;
+ }
+
/*
* Recursively flatten implicit RowExprs. (Technically this is only needed
* for GROUP BY, per the syntax rules for grouping sets, but we do it
@@ -2818,6 +2858,7 @@ transformWindowDefinitions(ParseState *pstate,
true /* force SQL99 rules */ );
partitionClause = transformGroupClause(pstate,
windef->partitionClause,
+ false /* not GROUP BY ALL */ ,
NULL,
targetlist,
orderClause,
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index defcdaa8b34..d0c6d3b55bd 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6186,7 +6186,9 @@ get_basic_select_query(Query *query, deparse_context *context)
save_ingroupby = context->inGroupBy;
context->inGroupBy = true;
- if (query->groupingSets == NIL)
+ if (query->groupAll)
+ appendStringInfoString(buf, "ALL");
+ else if (query->groupingSets == NIL)
{
sep = "";
foreach(l, query->groupClause)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 4ed14fc5b78..d379879ae80 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -214,7 +214,8 @@ typedef struct Query
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
- bool groupDistinct; /* is the group by clause distinct? */
+ bool groupDistinct; /* was GROUP BY DISTINCT used? */
+ bool groupAll; /* was GROUP BY ALL used? */
List *groupingSets; /* a list of GroupingSet's if present */
@@ -2192,6 +2193,7 @@ typedef struct SelectStmt
Node *whereClause; /* WHERE qualification */
List *groupClause; /* GROUP BY clauses */
bool groupDistinct; /* Is this GROUP BY DISTINCT? */
+ bool groupAll; /* Is this GROUP BY ALL? */
Node *havingClause; /* HAVING conditional-expression */
List *windowClause; /* WINDOW window_name AS (...), ... */
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index 3e9894926de..ede3903d1dd 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -26,6 +26,7 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause,
ParseExprKind exprKind, const char *constructName,
LimitOption limitOption);
extern List *transformGroupClause(ParseState *pstate, List *grouplist,
+ bool groupByAll,
List **groupingSets,
List **targetlist, List *sortClause,
ParseExprKind exprKind, bool useSQL99);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1f24f6ffd1f..aeeb452ec26 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1557,6 +1557,153 @@ drop table t2;
drop table t3;
drop table p_t1;
--
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+-- basic field check
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: b
+ -> Seq Scan on t1
+(3 rows)
+
+-- throw a null in the values too
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b
+ -> Seq Scan on t1
+(3 rows)
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: (a + b)
+ -> Seq Scan on t1
+(3 rows)
+
+-- non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- including grouped column
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- oops all aggregates
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ Aggregate
+ -> Seq Scan on t1
+(2 rows)
+
+-- empty column list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------
+ Seq Scan on t1
+(1 row)
+
+-- filter
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b, c
+ -> Seq Scan on t1
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b, c
+ -> Seq Scan on t1
+(3 rows)
+
+-- expression without including aggregate columns
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+ERROR: column "t1.c" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY AL...
+ ^
+-- subquery
+EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+-----------------------------------
+ GroupAggregate
+ InitPlan 1
+ -> Limit
+ -> Seq Scan on t1 t1_1
+ -> Seq Scan on t1
+(5 rows)
+
+-- cte
+EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL)
+SELECT AVG(a), sum_b FROM a_query GROUP BY ALL;
+ QUERY PLAN
+----------------------------
+ HashAggregate
+ Group Key: sum(t1.b)
+ -> HashAggregate
+ Group Key: t1.a
+ -> Seq Scan on t1
+(5 rows)
+
+-- verify deparse
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+NOTICE: view "v1" will be a temporary view
+SELECT pg_get_viewdef('v1'::regclass);
+ pg_get_viewdef
+-----------------------
+ SELECT b, +
+ count(*) AS count+
+ FROM t1 +
+ GROUP BY ALL;
+(1 row)
+
+DROP VIEW v1;
+DROP TABLE t1;
+--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 62540b1ffa4..6b2516cd9b1 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -549,6 +549,68 @@ drop table t2;
drop table t3;
drop table p_t1;
+
+--
+-- Test GROUP BY ALL
+--
+
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+
+-- basic field check
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- throw a null in the values too
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+
+-- non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+
+-- including grouped column
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+
+-- oops all aggregates
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+
+-- empty column list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+
+-- filter
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL;
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL;
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+
+-- expression without including aggregate columns
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+
+-- subquery
+EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL;
+
+-- cte
+EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL)
+SELECT AVG(a), sum_b FROM a_query GROUP BY ALL;
+
+-- verify deparse
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+
+DROP VIEW v1;
+DROP TABLE t1;
+
--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-27 13:30 Peter Eisentraut <[email protected]>
parent: Tom Lane <[email protected]>
0 siblings, 1 reply; 42+ messages in thread
From: Peter Eisentraut @ 2025-09-27 13:30 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: pgsql-hackers; David G. Johnston <[email protected]>; David Christensen <[email protected]>; Jelte Fennema-Nio <[email protected]>
On 26.09.25 22:18, Tom Lane wrote:
> Got it, mostly. There is an edge case, though: what if there are no
> candidate grouping items? I see these test cases in David's patch:
>
> +-- oops all aggregates
> +EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
> + QUERY PLAN
> +----------------------
> + Aggregate
> + -> Seq Scan on t1
> +(2 rows)
> +
> +-- empty column list
> +EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
> + QUERY PLAN
> +----------------
> + Seq Scan on t1
> +(1 row)
>
> That is, in such cases the patch behaves as if there were no GROUP BY
> clause at all, which seems kinda dubious. Should this be an error,
> and if not what's it supposed to do?
These should resolve to GROUP BY ().
> Also, what about window functions in the tlist?
> (I didn't stop to figure out why this isn't giving the same error, but
> maybe it's an order-of-checks thing.) In any case: should this give
> "window functions are not allowed in GROUP BY", or should the
> window-function-containing tlist item be silently skipped by GROUP BY
> ALL? Trying to make it work is surely not the right answer.
Hmm, I don't know. The syntactic transformation talks about select list
elements that "do not directly contain an <aggregate function>", but
that can also appear as part of <window function>, so the syntactic
transformation might appear to apply only to some types of window
functions, which doesn't make sense to me.
I don't know what a sensible behavior should be here. Maybe in this
first patch version just reject use of GROUP BY ALL if you find any
window functions in the select list.
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-27 13:40 Peter Eisentraut <[email protected]>
parent: Tom Lane <[email protected]>
0 siblings, 1 reply; 42+ messages in thread
From: Peter Eisentraut @ 2025-09-27 13:40 UTC (permalink / raw)
To: Tom Lane <[email protected]>; David Christensen <[email protected]>; +Cc: Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On 26.09.25 18:23, Tom Lane wrote:
> No, I think the correct behavior would have to be to descend into
> SubLinks to see if they contain any aggregates belonging to the
> outer query level.
>
> However (looks around) we do already have that code.
> See contain_aggs_of_level. (contain_agg_clause is essentially
> a simplified version that is okay to use in the planner because
> it's already gotten rid of sublinks.)
>
> What mainly concerns me at this point is whether we've identified
> aggregate levels at the point in parsing where you want to run this.
> I have a bit of a worry that that might interact with grouping.
> Presumably the SQL committee thought about that, so it's probably
> soluble, but ...
The language used in the standard at the moment is the select list
elements that "do not directly contain an <aggregate function>", where
"directly contain" is a term of art that means "contains without an
intervening instance of <subquery>, <within group specification>, or
<set function specification> that is not an <ordered set function>". So
it means not to look into subqueries.
Note that in standard SQL, the GROUP BY clause can only contain plain
column references, not expressions, so this question is kind of moot in
that context, because the query would be invalid no matter whether you
transform the GROUP BY ALL to group by the subquery or not.
For this first patch version, I suggest you reject the use of GROUP BY
ALL if you find a subquery in the select list, unless you have an
unambiguous better solution.
(It was discussed to relax this restriction, so this discussion is
useful to collect some questions related to that.)
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-27 16:03 Tom Lane <[email protected]>
parent: Peter Eisentraut <[email protected]>
0 siblings, 2 replies; 42+ messages in thread
From: Tom Lane @ 2025-09-27 16:03 UTC (permalink / raw)
To: Peter Eisentraut <[email protected]>; +Cc: David Christensen <[email protected]>; Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
Peter Eisentraut <[email protected]> writes:
> The language used in the standard at the moment is the select list
> elements that "do not directly contain an <aggregate function>", where
> "directly contain" is a term of art that means "contains without an
> intervening instance of <subquery>, <within group specification>, or
> <set function specification> that is not an <ordered set function>". So
> it means not to look into subqueries.
TBH, that is obvious nonsense. A subquery could contain an aggregate
function that we've already identified as being of the current query
level. Putting such a construct into the GROUP BY list would create
an invalid query (cf. checkTargetlistEntrySQL92). Similarly, putting
a window function into the GROUP BY list would create an invalid
query.
> Note that in standard SQL, the GROUP BY clause can only contain plain
> column references, not expressions, so this question is kind of moot in
> that context, because the query would be invalid no matter whether you
> transform the GROUP BY ALL to group by the subquery or not.
So according to the standard, this:
select a+b, count(*) from ... group by all;
would be invalid because a+b couldn't be written directly in
GROUP BY? I can't see us rejecting that though, since we do
allow a+b in GROUP BY.
Seems like we're getting very little help from the standard as to
what this construct actually means. I suggest that we ignore the
current draft as not having been thought through quite enough yet,
and make ALL skip any tlist entries that contain_aggs_of_level
zero or contain_windowfuncs. If that means we're extending the
standard, so be it --- we've already extended GROUP BY quite a lot,
it seems.
regards, tom lane
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-27 18:14 Vik Fearing <[email protected]>
parent: Tom Lane <[email protected]>
1 sibling, 0 replies; 42+ messages in thread
From: Vik Fearing @ 2025-09-27 18:14 UTC (permalink / raw)
To: Tom Lane <[email protected]>; Peter Eisentraut <[email protected]>; +Cc: David Christensen <[email protected]>; Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On 27/09/2025 18:03, Tom Lane wrote:
> So according to the standard, this:
>
> select a+b, count(*) from ... group by all;
>
> would be invalid because a+b couldn't be written directly in
> GROUP BY?
Correct.
> I can't see us rejecting that though, since we do
> allow a+b in GROUP BY.
No, nor do I. Also, there were rumors about adding expressions to group
by in the standard but no formal proposal yet.
FWIW, I opposed adding this GROUP BY ALL feature, but I was
outnumbered. I hope to never see it in production.
--
Vik Fearing
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-27 22:23 Tom Lane <[email protected]>
parent: Tom Lane <[email protected]>
1 sibling, 2 replies; 42+ messages in thread
From: Tom Lane @ 2025-09-27 22:23 UTC (permalink / raw)
To: Peter Eisentraut <[email protected]>; +Cc: David Christensen <[email protected]>; Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
Here's a v6 that's rebased up to HEAD and contains fixes for the
semantic issues we discussed. It still lacks documentation, but
otherwise I think it's about ready to go.
regards, tom lane
Attachments:
[text/x-diff] v6-0001-Add-GROUP-BY-ALL.patch (14.9K, 2-v6-0001-Add-GROUP-BY-ALL.patch)
download | inline diff:
From 5503815351b2e45661262fddaab1271a435dd730 Mon Sep 17 00:00:00 2001
From: Tom Lane <[email protected]>
Date: Sat, 27 Sep 2025 18:19:16 -0400
Subject: [PATCH v6] Add GROUP BY ALL.
GROUP BY ALL is a form of GROUP BY that adds any TargetExpr that does
not contain an aggregate or window function into the groupClause of
the query, making it exactly equivalent to specifying those same
expressions in an explicit GROUP BY list.
This feature is useful for certain kinds of data exploration. It's
already present in some other DBMSes, and the SQL committee recently
accepted it into the standard, so we can be reasonably confident in
the syntax being stable. We do have to invent part of the semantics,
as the standard doesn't allow for expressions in GROUP BY, so they
haven't specified what to do with window functions. We assume that
those should be treated like aggregates, i.e., left out of the
constructed GROUP BY list.
Author: David Christensen <[email protected]>
Reviewed-by: Tom Lane <[email protected]>
Discussion: https://postgr.es/m/CAHM0NXjz0kDwtzoe-fnHAqPB1qA8_VJN0XAmCgUZ+iPnvP5LbA@mail.gmail.com
---
src/backend/parser/analyze.c | 2 +
src/backend/parser/gram.y | 14 +++
src/backend/parser/parse_clause.c | 65 ++++++++++++-
src/backend/utils/adt/ruleutils.c | 4 +-
src/include/nodes/parsenodes.h | 4 +-
src/include/parser/parse_clause.h | 1 +
src/test/regress/expected/aggregates.out | 113 +++++++++++++++++++++++
src/test/regress/sql/aggregates.sql | 50 ++++++++++
8 files changed, 250 insertions(+), 3 deletions(-)
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 5aeb54eb5f6..99ba043961b 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1467,12 +1467,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt,
qry->groupClause = transformGroupClause(pstate,
stmt->groupClause,
+ stmt->groupAll,
&qry->groupingSets,
&qry->targetList,
qry->sortClause,
EXPR_KIND_GROUP_BY,
false /* allow SQL92 rules */ );
qry->groupDistinct = stmt->groupDistinct;
+ qry->groupAll = stmt->groupAll;
if (stmt->distinctClause == NIL)
{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9fd48acb1f8..8a0aa1899e2 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -120,6 +120,7 @@ typedef struct SelectLimit
typedef struct GroupClause
{
bool distinct;
+ bool all;
List *list;
} GroupClause;
@@ -12993,6 +12994,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13010,6 +13012,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13507,14 +13510,24 @@ group_clause:
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
+ n->all = false;
n->list = $4;
$$ = n;
}
+ | GROUP_P BY ALL
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = false;
+ n->all = true;
+ n->list = NIL;
+ $$ = n;
+ }
| /*EMPTY*/
{
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = false;
+ n->all = false;
n->list = NIL;
$$ = n;
}
@@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
n->whereClause = $4;
n->groupClause = ($5)->list;
n->groupDistinct = ($5)->distinct;
+ n->groupAll = ($5)->all;
n->havingClause = $6;
n->windowClause = $7;
n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 9f20a70ce13..ca26f6f61f2 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult,
* GROUP BY items will be added to the targetlist (as resjunk columns)
* if not already present, so the targetlist must be passed by reference.
*
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate, non-window expressions in the targetlist.
+ *
* This is also used for window PARTITION BY clauses (which act almost the
* same, but are always interpreted per SQL99 rules).
*
@@ -2622,6 +2625,7 @@ transformGroupingSet(List **flatresult,
*
* pstate ParseState
* grouplist clause to transform
+ * groupByAll is this a GROUP BY ALL statement?
* groupingSets reference to list to contain the grouping set tree
* targetlist reference to TargetEntry list
* sortClause ORDER BY clause (SortGroupClause nodes)
@@ -2629,7 +2633,8 @@ transformGroupingSet(List **flatresult,
* useSQL99 SQL99 rather than SQL92 syntax
*/
List *
-transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
+transformGroupClause(ParseState *pstate, List *grouplist, bool groupByAll,
+ List **groupingSets,
List **targetlist, List *sortClause,
ParseExprKind exprKind, bool useSQL99)
{
@@ -2640,6 +2645,63 @@ transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
bool hasGroupingSets = false;
Bitmapset *seen_local = NULL;
+ /* Handle GROUP BY ALL */
+ if (groupByAll)
+ {
+ /* There cannot have been any explicit grouplist items */
+ Assert(grouplist == NIL);
+
+ /* Iterate over targets, adding acceptable ones to the result list */
+ foreach_ptr(TargetEntry, tle, *targetlist)
+ {
+ /* Ignore junk TLEs */
+ if (tle->resjunk)
+ continue;
+
+ /*
+ * TLEs containing aggregates are not okay to add to GROUP BY
+ * (compare checkTargetlistEntrySQL92). But the SQL standard
+ * directs us to skip them, so it's fine.
+ */
+ if (pstate->p_hasAggs &&
+ contain_aggs_of_level((Node *) tle->expr, 0))
+ continue;
+
+ /*
+ * Likewise, TLEs containing window functions are not okay to add
+ * to GROUP BY. At this writing, the SQL standard is silent on
+ * what to do with them, but by analogy to aggregates we'll just
+ * skip them.
+ */
+ if (pstate->p_hasWindowFuncs &&
+ contain_windowfuncs((Node *) tle->expr))
+ continue;
+
+ /*
+ * Otherwise, add the TLE to the result using default sort/group
+ * semantics. We specify the parse location as the TLE's
+ * location, despite the comment for addTargetToGroupList
+ * discouraging that. The only other thing we could point to is
+ * the ALL keyword, which seems unhelpful when there are multiple
+ * TLEs.
+ */
+ result = addTargetToGroupList(pstate, tle,
+ result, *targetlist,
+ exprLocation((Node *) tle->expr));
+ }
+
+ /* If we found any acceptable targets, we're done */
+ if (result != NIL)
+ return result;
+
+ /*
+ * Otherwise, the SQL standard says to treat it like "GROUP BY ()".
+ * Build a representation of that, and let the rest of this function
+ * handle it.
+ */
+ grouplist = list_make1(makeGroupingSet(GROUPING_SET_EMPTY, NIL, -1));
+ }
+
/*
* Recursively flatten implicit RowExprs. (Technically this is only needed
* for GROUP BY, per the syntax rules for grouping sets, but we do it
@@ -2818,6 +2880,7 @@ transformWindowDefinitions(ParseState *pstate,
true /* force SQL99 rules */ );
partitionClause = transformGroupClause(pstate,
windef->partitionClause,
+ false /* not GROUP BY ALL */ ,
NULL,
targetlist,
orderClause,
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index defcdaa8b34..d0c6d3b55bd 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6186,7 +6186,9 @@ get_basic_select_query(Query *query, deparse_context *context)
save_ingroupby = context->inGroupBy;
context->inGroupBy = true;
- if (query->groupingSets == NIL)
+ if (query->groupAll)
+ appendStringInfoString(buf, "ALL");
+ else if (query->groupingSets == NIL)
{
sep = "";
foreach(l, query->groupClause)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f1706df58fd..aa73f7d76dd 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -214,7 +214,8 @@ typedef struct Query
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
- bool groupDistinct; /* is the group by clause distinct? */
+ bool groupDistinct; /* was GROUP BY DISTINCT used? */
+ bool groupAll; /* was GROUP BY ALL used? */
List *groupingSets; /* a list of GroupingSet's if present */
@@ -2192,6 +2193,7 @@ typedef struct SelectStmt
Node *whereClause; /* WHERE qualification */
List *groupClause; /* GROUP BY clauses */
bool groupDistinct; /* Is this GROUP BY DISTINCT? */
+ bool groupAll; /* Is this GROUP BY ALL? */
Node *havingClause; /* HAVING conditional-expression */
List *windowClause; /* WINDOW window_name AS (...), ... */
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index 3e9894926de..ede3903d1dd 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -26,6 +26,7 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause,
ParseExprKind exprKind, const char *constructName,
LimitOption limitOption);
extern List *transformGroupClause(ParseState *pstate, List *grouplist,
+ bool groupByAll,
List **groupingSets,
List **targetlist, List *sortClause,
ParseExprKind exprKind, bool useSQL99);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1f24f6ffd1f..2cdb7562846 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1557,6 +1557,119 @@ drop table t2;
drop table t3;
drop table p_t1;
--
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+--
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+-- basic example
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: b
+ -> Seq Scan on t1
+(3 rows)
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b
+ -> Seq Scan on t1
+(3 rows)
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: (a + b)
+ -> Seq Scan on t1
+(3 rows)
+
+-- check we detect a non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- including grouped column is okay
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- including non-grouped column, not so much
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+ERROR: column "t1.c" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY AL...
+ ^
+-- all aggregates, should reduce to GROUP BY ()
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ Aggregate
+ Group Key: ()
+ -> Seq Scan on t1
+(3 rows)
+
+-- likewise with empty target list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+ QUERY PLAN
+-----------------------
+ Result
+ Replaces: Aggregate
+(2 rows)
+
+-- window functions are not to be included in GROUP BY, either
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) OVER (PARTITION BY a) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------------------
+ WindowAgg
+ Window: w1 AS (PARTITION BY a)
+ -> Sort
+ Sort Key: a
+ -> HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(7 rows)
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b, c
+ -> Seq Scan on t1
+(3 rows)
+
+-- verify deparsing of GROUP BY ALL
+CREATE TEMP VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+ pg_get_viewdef
+-----------------------
+ SELECT b, +
+ count(*) AS count+
+ FROM t1 +
+ GROUP BY ALL;
+(1 row)
+
+DROP VIEW v1;
+DROP TABLE t1;
+--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 62540b1ffa4..ec41da493ad 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -549,6 +549,56 @@ drop table t2;
drop table t3;
drop table p_t1;
+--
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+--
+
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+
+-- basic example
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+
+-- check we detect a non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+
+-- including grouped column is okay
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+
+-- including non-grouped column, not so much
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+
+-- all aggregates, should reduce to GROUP BY ()
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+
+-- likewise with empty target list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+
+-- window functions are not to be included in GROUP BY, either
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) OVER (PARTITION BY a) FROM t1 GROUP BY ALL;
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+
+-- verify deparsing of GROUP BY ALL
+CREATE TEMP VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+
+DROP VIEW v1;
+DROP TABLE t1;
+
--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
--
2.43.7
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-28 09:05 Chao Li <[email protected]>
parent: Tom Lane <[email protected]>
1 sibling, 1 reply; 42+ messages in thread
From: Chao Li @ 2025-09-28 09:05 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; David Christensen <[email protected]>; Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
> On Sep 28, 2025, at 06:23, Tom Lane <[email protected]> wrote:
>
> + /* Iterate over targets, adding acceptable ones to the result list */
> + foreach_ptr(TargetEntry, tle, *targetlist)
> + {
> + /* Ignore junk TLEs */
> + if (tle->resjunk)
> + continue;
Do we want to specifically check “ctid”?
If a user does:
```
select ctid, col1, col2, ... from t group by all;
```
It would be equivalent to no group by. Combing “select ctid” with “group by all” seems totally useless, but when users do such things, we can optimize that.
Best regards,
--
Chao Li (Evan)
HighGo Software Co., Ltd.
https://www.highgo.com/
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-28 14:20 Tom Lane <[email protected]>
parent: Chao Li <[email protected]>
0 siblings, 0 replies; 42+ messages in thread
From: Tom Lane @ 2025-09-28 14:20 UTC (permalink / raw)
To: Chao Li <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; David Christensen <[email protected]>; Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
Chao Li <[email protected]> writes:
> Do we want to specifically check “ctid”?
No. Complete waste of effort.
regards, tom lane
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-28 19:18 David Christensen <[email protected]>
parent: Tom Lane <[email protected]>
1 sibling, 1 reply; 42+ messages in thread
From: David Christensen @ 2025-09-28 19:18 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Sat, Sep 27, 2025 at 5:23 PM Tom Lane <[email protected]> wrote:
>
> Here's a v6 that's rebased up to HEAD and contains fixes for the
> semantic issues we discussed. It still lacks documentation, but
> otherwise I think it's about ready to go.
Here is v7 with a stab at docs; fairly minimal at this point, but
touching the two areas that are likely to need adjusting. When
adjusting the docs for sql-select, I noticed that the grammar also
supports `GROUP BY ALL <grouping_elements>`, so I also added a test to
ensure that this syntax is explicitly supported. (It seems like it
works as-is without further grammar adjustments, but I was a little
worried when I first saw that fact... :D) Not sure that
aggregates.sql is still the right place for all of these bits, but it
does seem like having all things `GROUP BY ALL`-related tested in the
same place is a nice property, so leaving there for now.
David
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-28 20:34 David Christensen <[email protected]>
parent: David Christensen <[email protected]>
0 siblings, 2 replies; 42+ messages in thread
From: David Christensen @ 2025-09-28 20:34 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Sun, Sep 28, 2025 at 2:18 PM David Christensen <[email protected]> wrote:
>
> On Sat, Sep 27, 2025 at 5:23 PM Tom Lane <[email protected]> wrote:
> >
> > Here's a v6 that's rebased up to HEAD and contains fixes for the
> > semantic issues we discussed. It still lacks documentation, but
> > otherwise I think it's about ready to go.
>
> Here is v7 with a stab at docs; fairly minimal at this point, but
> touching the two areas that are likely to need adjusting. When
> adjusting the docs for sql-select, I noticed that the grammar also
> supports `GROUP BY ALL <grouping_elements>`, so I also added a test to
> ensure that this syntax is explicitly supported. (It seems like it
> works as-is without further grammar adjustments, but I was a little
> worried when I first saw that fact... :D) Not sure that
> aggregates.sql is still the right place for all of these bits, but it
> does seem like having all things `GROUP BY ALL`-related tested in the
> same place is a nice property, so leaving there for now.
This time with attachment!
Attachments:
[application/octet-stream] v7-0001-Add-GROUP-BY-ALL.patch (18.0K, 2-v7-0001-Add-GROUP-BY-ALL.patch)
download | inline diff:
From 67812be34148d1e186ff7afc95c9acd2f58fa1b1 Mon Sep 17 00:00:00 2001
From: Tom Lane <[email protected]>
Date: Sat, 27 Sep 2025 18:19:16 -0400
Subject: [PATCH v7] Add GROUP BY ALL
GROUP BY ALL is a form of GROUP BY that adds any TargetExpr that does
not contain an aggregate or window function into the groupClause of
the query, making it exactly equivalent to specifying those same
expressions in an explicit GROUP BY list.
This feature is useful for certain kinds of data exploration. It's
already present in some other DBMSes, and the SQL committee recently
accepted it into the standard, so we can be reasonably confident in
the syntax being stable. We do have to invent part of the semantics,
as the standard doesn't allow for expressions in GROUP BY, so they
haven't specified what to do with window functions. We assume that
those should be treated like aggregates, i.e., left out of the
constructed GROUP BY list.
Author: David Christensen <[email protected]>
Reviewed-by: Tom Lane <[email protected]>
Discussion: https://postgr.es/m/CAHM0NXjz0kDwtzoe-fnHAqPB1qA8_VJN0XAmCgUZ+iPnvP5LbA@mail.gmail.com
---
doc/src/sgml/queries.sgml | 28 ++++++
doc/src/sgml/ref/select.sgml | 10 +-
src/backend/parser/analyze.c | 2 +
src/backend/parser/gram.y | 14 +++
src/backend/parser/parse_clause.c | 65 +++++++++++-
src/backend/utils/adt/ruleutils.c | 4 +-
src/include/nodes/parsenodes.h | 4 +-
src/include/parser/parse_clause.h | 1 +
src/test/regress/expected/aggregates.out | 122 +++++++++++++++++++++++
src/test/regress/sql/aggregates.sql | 54 ++++++++++
10 files changed, 300 insertions(+), 4 deletions(-)
diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index a326960ff4d..9bcb2962e6e 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -1151,6 +1151,34 @@ SELECT product_id, p.name, (sum(s.units) * p.price) AS sales
column names is also allowed.
</para>
+ <para>
+ PostgreSQL also supports the <literal>GROUP BY ALL</literal> syntax, which
+ is equivalent to explicitly including all columns or expressions which do
+ not contain either an aggregate function or a window function in their
+ expression list. This can greatly simplify ad-hoc exploration of data.
+ </para>
+
+ <para>
+ An example of the equivalence is this:
+<screen>
+<prompt>=></prompt> <userinput>SELECT a, b, a + b, sum(c) FROM test1 GROUP BY ALL;</userinput>
+ a | b | ?column? | sum
+---+---+----------+----
+ 1 | 4 | 5 | 9
+ 2 | 5 | 7 | 12
+ 3 | 6 | 9 | 15
+(3 rows)
+
+<prompt>=></prompt> <userinput>SELECT a, b, a + b, sum(c) FROM test1 GROUP BY a, b, a + b;</userinput>
+ a | b | ?column? | sum
+---+---+----------+----
+ 1 | 4 | 5 | 9
+ 2 | 5 | 7 | 12
+ 3 | 6 | 9 | 15
+(3 rows)
+</screen>
+ </para>
+
<indexterm>
<primary>HAVING</primary>
</indexterm>
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index d7089eac0be..afa49040358 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -796,7 +796,7 @@ WHERE <replaceable class="parameter">condition</replaceable>
<para>
The optional <literal>GROUP BY</literal> clause has the general form
<synopsis>
-GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...]
+GROUP BY { ALL | [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...] }
</synopsis>
</para>
@@ -813,6 +813,14 @@ GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</rep
input-column name rather than an output column name.
</para>
+ <para>
+ The form <literal>GROUP BY ALL</literal> with no additional
+ <replaceable class="parameter">grouping_element</replaceable> elements
+ provided is equivalent to doing a <literal>GROUP BY</literal> on all
+ expressions in the target list that do not contain either an aggregate or
+ a window function.
+ </para>
+
<para>
If any of <literal>GROUPING SETS</literal>, <literal>ROLLUP</literal> or
<literal>CUBE</literal> are present as grouping elements, then the
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 5aeb54eb5f6..3b392b084ad 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1467,12 +1467,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt,
qry->groupClause = transformGroupClause(pstate,
stmt->groupClause,
+ stmt->groupByAll,
&qry->groupingSets,
&qry->targetList,
qry->sortClause,
EXPR_KIND_GROUP_BY,
false /* allow SQL92 rules */ );
qry->groupDistinct = stmt->groupDistinct;
+ qry->groupByAll = stmt->groupByAll;
if (stmt->distinctClause == NIL)
{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9fd48acb1f8..f1def67ac7c 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -120,6 +120,7 @@ typedef struct SelectLimit
typedef struct GroupClause
{
bool distinct;
+ bool all;
List *list;
} GroupClause;
@@ -12993,6 +12994,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupByAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13010,6 +13012,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = ($7)->list;
n->groupDistinct = ($7)->distinct;
+ n->groupByAll = ($7)->all;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *) n;
@@ -13507,14 +13510,24 @@ group_clause:
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
+ n->all = false;
n->list = $4;
$$ = n;
}
+ | GROUP_P BY ALL
+ {
+ GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+ n->distinct = false;
+ n->all = true;
+ n->list = NIL;
+ $$ = n;
+ }
| /*EMPTY*/
{
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
n->distinct = false;
+ n->all = false;
n->list = NIL;
$$ = n;
}
@@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
n->whereClause = $4;
n->groupClause = ($5)->list;
n->groupDistinct = ($5)->distinct;
+ n->groupByAll = ($5)->all;
n->havingClause = $6;
n->windowClause = $7;
n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 9f20a70ce13..ca26f6f61f2 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult,
* GROUP BY items will be added to the targetlist (as resjunk columns)
* if not already present, so the targetlist must be passed by reference.
*
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate, non-window expressions in the targetlist.
+ *
* This is also used for window PARTITION BY clauses (which act almost the
* same, but are always interpreted per SQL99 rules).
*
@@ -2622,6 +2625,7 @@ transformGroupingSet(List **flatresult,
*
* pstate ParseState
* grouplist clause to transform
+ * groupByAll is this a GROUP BY ALL statement?
* groupingSets reference to list to contain the grouping set tree
* targetlist reference to TargetEntry list
* sortClause ORDER BY clause (SortGroupClause nodes)
@@ -2629,7 +2633,8 @@ transformGroupingSet(List **flatresult,
* useSQL99 SQL99 rather than SQL92 syntax
*/
List *
-transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
+transformGroupClause(ParseState *pstate, List *grouplist, bool groupByAll,
+ List **groupingSets,
List **targetlist, List *sortClause,
ParseExprKind exprKind, bool useSQL99)
{
@@ -2640,6 +2645,63 @@ transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
bool hasGroupingSets = false;
Bitmapset *seen_local = NULL;
+ /* Handle GROUP BY ALL */
+ if (groupByAll)
+ {
+ /* There cannot have been any explicit grouplist items */
+ Assert(grouplist == NIL);
+
+ /* Iterate over targets, adding acceptable ones to the result list */
+ foreach_ptr(TargetEntry, tle, *targetlist)
+ {
+ /* Ignore junk TLEs */
+ if (tle->resjunk)
+ continue;
+
+ /*
+ * TLEs containing aggregates are not okay to add to GROUP BY
+ * (compare checkTargetlistEntrySQL92). But the SQL standard
+ * directs us to skip them, so it's fine.
+ */
+ if (pstate->p_hasAggs &&
+ contain_aggs_of_level((Node *) tle->expr, 0))
+ continue;
+
+ /*
+ * Likewise, TLEs containing window functions are not okay to add
+ * to GROUP BY. At this writing, the SQL standard is silent on
+ * what to do with them, but by analogy to aggregates we'll just
+ * skip them.
+ */
+ if (pstate->p_hasWindowFuncs &&
+ contain_windowfuncs((Node *) tle->expr))
+ continue;
+
+ /*
+ * Otherwise, add the TLE to the result using default sort/group
+ * semantics. We specify the parse location as the TLE's
+ * location, despite the comment for addTargetToGroupList
+ * discouraging that. The only other thing we could point to is
+ * the ALL keyword, which seems unhelpful when there are multiple
+ * TLEs.
+ */
+ result = addTargetToGroupList(pstate, tle,
+ result, *targetlist,
+ exprLocation((Node *) tle->expr));
+ }
+
+ /* If we found any acceptable targets, we're done */
+ if (result != NIL)
+ return result;
+
+ /*
+ * Otherwise, the SQL standard says to treat it like "GROUP BY ()".
+ * Build a representation of that, and let the rest of this function
+ * handle it.
+ */
+ grouplist = list_make1(makeGroupingSet(GROUPING_SET_EMPTY, NIL, -1));
+ }
+
/*
* Recursively flatten implicit RowExprs. (Technically this is only needed
* for GROUP BY, per the syntax rules for grouping sets, but we do it
@@ -2818,6 +2880,7 @@ transformWindowDefinitions(ParseState *pstate,
true /* force SQL99 rules */ );
partitionClause = transformGroupClause(pstate,
windef->partitionClause,
+ false /* not GROUP BY ALL */ ,
NULL,
targetlist,
orderClause,
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index defcdaa8b34..c6d83d67b87 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6186,7 +6186,9 @@ get_basic_select_query(Query *query, deparse_context *context)
save_ingroupby = context->inGroupBy;
context->inGroupBy = true;
- if (query->groupingSets == NIL)
+ if (query->groupByAll)
+ appendStringInfoString(buf, "ALL");
+ else if (query->groupingSets == NIL)
{
sep = "";
foreach(l, query->groupClause)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f1706df58fd..ac0e02a1db7 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -214,7 +214,8 @@ typedef struct Query
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
- bool groupDistinct; /* is the group by clause distinct? */
+ bool groupDistinct; /* was GROUP BY DISTINCT used? */
+ bool groupByAll; /* was GROUP BY ALL used? */
List *groupingSets; /* a list of GroupingSet's if present */
@@ -2192,6 +2193,7 @@ typedef struct SelectStmt
Node *whereClause; /* WHERE qualification */
List *groupClause; /* GROUP BY clauses */
bool groupDistinct; /* Is this GROUP BY DISTINCT? */
+ bool groupByAll; /* Is this GROUP BY ALL? */
Node *havingClause; /* HAVING conditional-expression */
List *windowClause; /* WINDOW window_name AS (...), ... */
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index 3e9894926de..ede3903d1dd 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -26,6 +26,7 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause,
ParseExprKind exprKind, const char *constructName,
LimitOption limitOption);
extern List *transformGroupClause(ParseState *pstate, List *grouplist,
+ bool groupByAll,
List **groupingSets,
List **targetlist, List *sortClause,
ParseExprKind exprKind, bool useSQL99);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1f24f6ffd1f..0d63412399e 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1557,6 +1557,128 @@ drop table t2;
drop table t3;
drop table p_t1;
--
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+--
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+-- basic example
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: b
+ -> Seq Scan on t1
+(3 rows)
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b
+ -> Seq Scan on t1
+(3 rows)
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: (a + b)
+ -> Seq Scan on t1
+(3 rows)
+
+-- check we detect a non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- including grouped column is okay
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- including non-grouped column, not so much
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+ERROR: column "t1.c" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY AL...
+ ^
+-- all aggregates, should reduce to GROUP BY ()
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ Aggregate
+ Group Key: ()
+ -> Seq Scan on t1
+(3 rows)
+
+-- likewise with empty target list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+ QUERY PLAN
+-----------------------
+ Result
+ Replaces: Aggregate
+(2 rows)
+
+-- window functions are not to be included in GROUP BY, either
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) OVER (PARTITION BY a) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------------------
+ WindowAgg
+ Window: w1 AS (PARTITION BY a)
+ -> Sort
+ Sort Key: a
+ -> HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(7 rows)
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a, b, c
+ -> Seq Scan on t1
+(3 rows)
+
+-- group by all but with column lists (equivalent to GROUP BY default behavior, explicit antithesis to GROUP BY DISTINCT)
+EXPLAIN (COSTS OFF) SELECT a, count(*) FROM t1 GROUP BY ALL a;
+ QUERY PLAN
+----------------------
+ HashAggregate
+ Group Key: a
+ -> Seq Scan on t1
+(3 rows)
+
+-- verify deparsing of GROUP BY ALL
+CREATE TEMP VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+ pg_get_viewdef
+-----------------------
+ SELECT b, +
+ count(*) AS count+
+ FROM t1 +
+ GROUP BY ALL;
+(1 row)
+
+DROP VIEW v1;
+DROP TABLE t1;
+--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 62540b1ffa4..fc3257830c5 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -549,6 +549,60 @@ drop table t2;
drop table t3;
drop table p_t1;
+--
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+--
+
+CREATE TEMP TABLE t1 (
+ a int,
+ b int,
+ c int
+);
+
+-- basic example
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+
+-- check we detect a non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+
+-- including grouped column is okay
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+
+-- including non-grouped column, not so much
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+
+-- all aggregates, should reduce to GROUP BY ()
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+
+-- likewise with empty target list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+
+-- window functions are not to be included in GROUP BY, either
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) OVER (PARTITION BY a) FROM t1 GROUP BY ALL;
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+
+-- group by all but with column lists (equivalent to GROUP BY default behavior, explicit antithesis to GROUP BY DISTINCT)
+
+EXPLAIN (COSTS OFF) SELECT a, count(*) FROM t1 GROUP BY ALL a;
+
+-- verify deparsing of GROUP BY ALL
+CREATE TEMP VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+
+DROP VIEW v1;
+DROP TABLE t1;
+
--
-- Test GROUP BY matching of join columns that are type-coerced due to USING
--
--
2.49.0
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-29 20:58 Tom Lane <[email protected]>
parent: David Christensen <[email protected]>
1 sibling, 1 reply; 42+ messages in thread
From: Tom Lane @ 2025-09-29 20:58 UTC (permalink / raw)
To: David Christensen <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
David Christensen <[email protected]> writes:
>> Here is v7 with a stab at docs; fairly minimal at this point, but
>> touching the two areas that are likely to need adjusting.
I did some more word-smithing on the docs and pushed it.
>> When
>> adjusting the docs for sql-select, I noticed that the grammar also
>> supports `GROUP BY ALL <grouping_elements>`, so I also added a test to
>> ensure that this syntax is explicitly supported.
+1, can't hurt.
>> (It seems like it
>> works as-is without further grammar adjustments, but I was a little
>> worried when I first saw that fact... :D)
Bison would have been vocal about it if you'd introduced any
ambiguity. Still, I didn't feel like looking around to see if we
already covered this syntax, and I agree it's close enough to being
an issue to be worth covering.
Thanks for the patch!
regards, tom lane
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-29 21:13 David Christensen <[email protected]>
parent: Tom Lane <[email protected]>
0 siblings, 0 replies; 42+ messages in thread
From: David Christensen @ 2025-09-29 21:13 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: Peter Eisentraut <[email protected]>; Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
> On Sep 29, 2025, at 3:58 PM, Tom Lane <[email protected]> wrote:
>
> David Christensen <[email protected]> writes:
>>> Here is v7 with a stab at docs; fairly minimal at this point, but
>>> touching the two areas that are likely to need adjusting.
>
> I did some more word-smithing on the docs and pushed it.
>
>>> When
>>> adjusting the docs for sql-select, I noticed that the grammar also
>>> supports `GROUP BY ALL <grouping_elements>`, so I also added a test to
>>> ensure that this syntax is explicitly supported.
>
> +1, can't hurt.
>
>>> (It seems like it
>>> works as-is without further grammar adjustments, but I was a little
>>> worried when I first saw that fact... :D)
>
> Bison would have been vocal about it if you'd introduced any
> ambiguity. Still, I didn't feel like looking around to see if we
> already covered this syntax, and I agree it's close enough to being
> an issue to be worth covering.
>
> Thanks for the patch!
Great, thank you!
David
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2025-09-29 23:46 Chao Li <[email protected]>
parent: David Christensen <[email protected]>
1 sibling, 0 replies; 42+ messages in thread
From: Chao Li @ 2025-09-29 23:46 UTC (permalink / raw)
To: David Christensen <[email protected]>; +Cc: Tom Lane <[email protected]>; Peter Eisentraut <[email protected]>; Andrey Borodin <[email protected]>; pgsql-hackers; David G. Johnston <[email protected]>; Jelte Fennema-Nio <[email protected]>
> On Sep 29, 2025, at 04:34, David Christensen <[email protected]> wrote:
>
> On Sun, Sep 28, 2025 at 2:18 PM David Christensen <[email protected] <mailto:[email protected]>> wrote:
>>
>> On Sat, Sep 27, 2025 at 5:23 PM Tom Lane <[email protected]> wrote:
>>>
>>> Here's a v6 that's rebased up to HEAD and contains fixes for the
>>> semantic issues we discussed. It still lacks documentation, but
>>> otherwise I think it's about ready to go.
>>
>> Here is v7 with a stab at docs; fairly minimal at this point, but
>> touching the two areas that are likely to need adjusting. When
>> adjusting the docs for sql-select, I noticed that the grammar also
>> supports `GROUP BY ALL <grouping_elements>`, so I also added a test to
>> ensure that this syntax is explicitly supported. (It seems like it
>> works as-is without further grammar adjustments, but I was a little
>> worried when I first saw that fact... :D) Not sure that
>> aggregates.sql is still the right place for all of these bits, but it
>> does seem like having all things `GROUP BY ALL`-related tested in the
>> same place is a nice property, so leaving there for now.
>
> This time with attachment!
> <v7-0001-Add-GROUP-BY-ALL.patch>
A nit comment for the doc:
```
+ not contain either an aggregate function or a window function in their
+ expression list. This can greatly simplify ad-hoc exploration of data.
+ </para>
```
“In their expression list” => “in the <literal>SELECT</literal> list"
Best regards,
--
Chao Li (Evan)
HighGo Software Co., Ltd.
https://www.highgo.com/
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2026-04-14 11:21 Peter Eisentraut <[email protected]>
parent: Peter Eisentraut <[email protected]>
0 siblings, 1 reply; 42+ messages in thread
From: Peter Eisentraut @ 2026-04-14 11:21 UTC (permalink / raw)
To: Tom Lane <[email protected]>; +Cc: pgsql-hackers; David G. Johnston <[email protected]>; David Christensen <[email protected]>; Jelte Fennema-Nio <[email protected]>
On 27.09.25 15:30, Peter Eisentraut wrote:
>> Also, what about window functions in the tlist?
>
>> (I didn't stop to figure out why this isn't giving the same error, but
>> maybe it's an order-of-checks thing.) In any case: should this give
>> "window functions are not allowed in GROUP BY", or should the
>> window-function-containing tlist item be silently skipped by GROUP BY
>> ALL? Trying to make it work is surely not the right answer.
>
> Hmm, I don't know. The syntactic transformation talks about select list
> elements that "do not directly contain an <aggregate function>", but
> that can also appear as part of <window function>, so the syntactic
> transformation might appear to apply only to some types of window
> functions, which doesn't make sense to me.
>
> I don't know what a sensible behavior should be here. Maybe in this
> first patch version just reject use of GROUP BY ALL if you find any
> window functions in the select list.
The handling of window functions by GROUP BY ALL is a semi-open-item.
The code in transformGroupClause() currently says:
/*
* Likewise, TLEs containing window functions are not okay to add
* to GROUP BY. At this writing, the SQL standard is silent on
* what to do with them, but by analogy to aggregates we'll just
* skip them.
*/
if (pstate->p_hasWindowFuncs &&
contain_windowfuncs((Node *) tle->expr))
continue;
The wording of the SQL standard currently does not address that at all
(but we could fix it), which would mean that a window function ends up
in the GROUP BY ALL expansion by default.
Personally, I don't understand what the meaning of this should be.
Aggregates relate to grouping, but window functions are a different
processing phase, so that do they have to do with grouping?
I don't see any mention of using GROUP BY with window functions in our
relevant documentation, for example
https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS
https://www.postgresql.org/docs/devel/functions-window.html
Commit ef38a4d9756 added a regression test
EXPLAIN (COSTS OFF) SELECT a, COUNT(a) OVER (PARTITION BY a) FROM t1
GROUP BY ALL;
but the test table contains no data, so I don't know if this kind of
query produces interesting information. Wouldn't a more practical
query use different columns, like
SELECT a, COUNT(b) OVER (PARTITION BY a) FROM t1
?
I see that DuckDB and Oracle (the two other implementations that can be
accessed relatively freely, though there are others) each behave
differently here.
Maybe we can produce some more test cases to see what useful behaviors
should be?
^ permalink raw reply [nested|flat] 42+ messages in thread
* Re: [PATCH] GROUP BY ALL
@ 2026-04-14 12:57 David G. Johnston <[email protected]>
parent: Peter Eisentraut <[email protected]>
0 siblings, 0 replies; 42+ messages in thread
From: David G. Johnston @ 2026-04-14 12:57 UTC (permalink / raw)
To: Peter Eisentraut <[email protected]>; +Cc: Tom Lane <[email protected]>; pgsql-hackers; David Christensen <[email protected]>; Jelte Fennema-Nio <[email protected]>
On Tuesday, April 14, 2026, Peter Eisentraut <[email protected]> wrote:
>
>
> I don't see any mention of using GROUP BY with window functions in our
> relevant documentation, for example
>
> https://www.postgresql.org/docs/devel/sql-expressions.html#
> SYNTAX-WINDOW-FUNCTIONS
> https://www.postgresql.org/docs/devel/functions-window.html
The select reference page covers this. But the window clause could get
better treatment, it’s buried in step 5.
https://www.postgresql.org/docs/current/sql-select.html
> Commit ef38a4d9756 added a regression test
>
> EXPLAIN (COSTS OFF) SELECT a, COUNT(a) OVER (PARTITION BY a) FROM t1 GROUP
> BY ALL;
>
> but the test table contains no data, so I don't know if this kind of query
> produces interesting information.
>
Each non-null value of “a” would have an output of 1, while a null valued
“a” would have an output of 0. “A” is grouped since all expressions
involving “a” are non-aggregated. The equivalent rewrite is:
Select a, count(a_expr) over … from ( — step 5
select a, a as a_expr from tbl group by all. — step 4
);
For purposes of group by all one would erase/ignore the actual window
wrapper while leaving the expressions it operates over in place. This
extends from “window expressions are processed after group by/having”
documented in SELECT. IOW, group by all resolves during processing step 4
with intermediate results for the expressions within the window functions,
then step 5 removes the intermediate expressions that don’t appear in the
final output while adding in the results of processing the window functions.
David J.
^ permalink raw reply [nested|flat] 42+ messages in thread
end of thread, other threads:[~2026-04-14 12:57 UTC | newest]
Thread overview: 42+ messages (download: mbox mbox.gz follow: Atom feed)
-- links below jump to the message on this page --
2022-12-19 04:19 GROUP BY ALL Andrey Borodin <[email protected]>
2022-12-19 04:30 ` Tom Lane <[email protected]>
2022-12-19 04:40 ` Andrey Borodin <[email protected]>
2022-12-19 04:45 ` David G. Johnston <[email protected]>
2022-12-19 13:44 ` Isaac Morland <[email protected]>
2022-12-19 16:53 ` Vik Fearing <[email protected]>
2025-08-17 17:12 Re: [PATCH] GROUP BY ALL Jelte Fennema-Nio <[email protected]>
2025-09-26 07:02 ` Peter Eisentraut <[email protected]>
2025-09-26 08:45 ` Andrey Borodin <[email protected]>
2025-09-26 11:16 ` David Christensen <[email protected]>
2025-09-26 15:45 ` David Christensen <[email protected]>
2025-09-26 15:49 ` David Christensen <[email protected]>
2025-09-26 16:04 ` Tom Lane <[email protected]>
2025-09-26 16:13 ` David Christensen <[email protected]>
2025-09-26 16:17 ` David Christensen <[email protected]>
2025-09-26 16:24 ` Tom Lane <[email protected]>
2025-09-26 16:23 ` Tom Lane <[email protected]>
2025-09-27 13:40 ` Peter Eisentraut <[email protected]>
2025-09-27 16:03 ` Tom Lane <[email protected]>
2025-09-27 18:14 ` Vik Fearing <[email protected]>
2025-09-27 22:23 ` Tom Lane <[email protected]>
2025-09-28 09:05 ` Chao Li <[email protected]>
2025-09-28 14:20 ` Tom Lane <[email protected]>
2025-09-28 19:18 ` David Christensen <[email protected]>
2025-09-28 20:34 ` David Christensen <[email protected]>
2025-09-29 20:58 ` Tom Lane <[email protected]>
2025-09-29 21:13 ` David Christensen <[email protected]>
2025-09-29 23:46 ` Chao Li <[email protected]>
2025-09-26 16:37 ` jian he <[email protected]>
2025-09-26 16:56 ` David Christensen <[email protected]>
2025-09-26 17:00 ` David Christensen <[email protected]>
2025-09-26 17:46 ` Tom Lane <[email protected]>
2025-09-26 18:50 ` David Christensen <[email protected]>
2025-09-26 20:37 ` Tom Lane <[email protected]>
2025-09-26 14:11 ` Tom Lane <[email protected]>
2025-09-26 15:52 ` David Christensen <[email protected]>
2025-09-26 15:54 ` Peter Eisentraut <[email protected]>
2025-09-26 15:59 ` David Christensen <[email protected]>
2025-09-26 20:18 ` Tom Lane <[email protected]>
2025-09-27 13:30 ` Peter Eisentraut <[email protected]>
2026-04-14 11:21 ` Peter Eisentraut <[email protected]>
2026-04-14 12:57 ` David G. Johnston <[email protected]>
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox