Received: from malur.postgresql.org ([217.196.149.56]) by arkaria.postgresql.org with esmtps (TLS1.3) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.94.2) (envelope-from ) id 1v2FCg-001fhT-Ci for pgsql-hackers@arkaria.postgresql.org; Fri, 26 Sep 2025 20:38:06 +0000 Received: from localhost ([127.0.0.1] helo=malur.postgresql.org) by malur.postgresql.org with esmtp (Exim 4.94.2) (envelope-from ) id 1v2FCe-005m30-Co for pgsql-hackers@arkaria.postgresql.org; Fri, 26 Sep 2025 20:38:05 +0000 Received: from magus.postgresql.org ([2a02:c0:301:0:ffff::29]) by malur.postgresql.org with esmtps (TLS1.3) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.94.2) (envelope-from ) id 1v2FCe-005m2Q-0L for pgsql-hackers@lists.postgresql.org; Fri, 26 Sep 2025 20:38:04 +0000 Received: from sss.pgh.pa.us ([68.162.161.243]) by magus.postgresql.org with esmtps (TLS1.3) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.96) (envelope-from ) id 1v2FCc-000Hqb-1G for pgsql-hackers@postgresql.org; Fri, 26 Sep 2025 20:38:04 +0000 Received: from sss1.sss.pgh.pa.us (localhost [127.0.0.1]) by sss.pgh.pa.us (8.15.2/8.15.2) with ESMTP id 58QKbwsC031028; Fri, 26 Sep 2025 16:37:58 -0400 From: Tom Lane To: David Christensen cc: Peter Eisentraut , pgsql-hackers Subject: Re: [PATCH] GROUP BY ALL In-reply-to: References: <4D2047B0-E8D8-472B-B7E8-61206B1E6AFA@yandex-team.ru> <4096523.1758908779@sss.pgh.pa.us> Comments: In-reply-to David Christensen message dated "Fri, 26 Sep 2025 13:50:27 -0500" MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0" Content-ID: <30931.1758918999.0@sss.pgh.pa.us> Date: Fri, 26 Sep 2025 16:37:58 -0400 Message-ID: <31027.1758919078@sss.pgh.pa.us> List-Id: List-Help: List-Subscribe: List-Post: List-Owner: List-Archive: Archived-At: Precedence: bulk ------- =_aaaaaaaaaa0 Content-Type: text/plain; charset="us-ascii" Content-ID: <30931.1758918999.1@sss.pgh.pa.us> [ trimming the cc: list because gmail decided my last was spam ] David Christensen writes: > Great feedback, thanks; attached is v4 which should address these comments. I did a pass of cleanup over this --- mostly cosmetic, but not entirely. Along the way I discovered a pre-existing bug: transformSelectStmt does qry->groupDistinct = stmt->groupDistinct; but transformPLAssignStmt fails to, with the result that GROUP BY DISTINCT would misbehave if used in a plpgsql "expression" context. I'm not hugely surprised that no one has reported that from the field, but nonetheless it's broken. In the attached v5 I just quickly added the missing line and moved on, but we'll need to back-patch that bit. (Maybe we'd be well advised to refactor to reduce the amount of duplicated code that needs to be kept in sync?) I have not attempted to address the definitional issues I just queried Peter about. Other open items: * Documentation * The test cases deserve reconsideration now that we think their charter only goes as far as EXPLAIN'ing the results; some of them seem pretty redundant in this context. regards, tom lane ------- =_aaaaaaaaaa0 Content-Type: text/x-diff; name="v5-0001-Add-GROUP-BY-ALL.patch"; charset="us-ascii" Content-ID: <30931.1758918999.2@sss.pgh.pa.us> Content-Description: v5-0001-Add-GROUP-BY-ALL.patch Content-Transfer-Encoding: quoted-printable diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index b9763ea1714..fe632d3dabf 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -1440,12 +1440,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt= *stmt) = qry->groupClause =3D transformGroupClause(pstate, stmt->groupClause, + stmt->groupAll, &qry->groupingSets, &qry->targetList, qry->sortClause, EXPR_KIND_GROUP_BY, false /* allow SQL92 rules */ ); qry->groupDistinct =3D stmt->groupDistinct; + qry->groupAll =3D stmt->groupAll; = if (stmt->distinctClause =3D=3D NIL) { @@ -2930,11 +2932,14 @@ transformPLAssignStmt(ParseState *pstate, PLAssign= Stmt *stmt) = qry->groupClause =3D transformGroupClause(pstate, sstmt->groupClause, + sstmt->groupAll, &qry->groupingSets, &qry->targetList, qry->sortClause, EXPR_KIND_GROUP_BY, false /* allow SQL92 rules */ ); + qry->groupDistinct =3D sstmt->groupDistinct; + qry->groupAll =3D sstmt->groupAll; = if (sstmt->distinctClause =3D=3D NIL) { diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 9fd48acb1f8..8a0aa1899e2 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -120,6 +120,7 @@ typedef struct SelectLimit typedef struct GroupClause { bool distinct; + bool all; List *list; } GroupClause; = @@ -12993,6 +12994,7 @@ simple_select: n->whereClause =3D $6; n->groupClause =3D ($7)->list; n->groupDistinct =3D ($7)->distinct; + n->groupAll =3D ($7)->all; n->havingClause =3D $8; n->windowClause =3D $9; $$ =3D (Node *) n; @@ -13010,6 +13012,7 @@ simple_select: n->whereClause =3D $6; n->groupClause =3D ($7)->list; n->groupDistinct =3D ($7)->distinct; + n->groupAll =3D ($7)->all; n->havingClause =3D $8; n->windowClause =3D $9; $$ =3D (Node *) n; @@ -13507,14 +13510,24 @@ group_clause: GroupClause *n =3D (GroupClause *) palloc(sizeof(GroupClause)); = n->distinct =3D $3 =3D=3D SET_QUANTIFIER_DISTINCT; + n->all =3D false; n->list =3D $4; $$ =3D n; } + | GROUP_P BY ALL + { + GroupClause *n =3D (GroupClause *) palloc(sizeof(GroupClause)); + n->distinct =3D false; + n->all =3D true; + n->list =3D NIL; + $$ =3D n; + } | /*EMPTY*/ { GroupClause *n =3D (GroupClause *) palloc(sizeof(GroupClause)); = n->distinct =3D false; + n->all =3D false; n->list =3D NIL; $$ =3D n; } @@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list n->whereClause =3D $4; n->groupClause =3D ($5)->list; n->groupDistinct =3D ($5)->distinct; + n->groupAll =3D ($5)->all; n->havingClause =3D $6; n->windowClause =3D $7; n->sortClause =3D $8; diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_= clause.c index 9f20a70ce13..562e3e90e34 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult, * GROUP BY items will be added to the targetlist (as resjunk columns) * if not already present, so the targetlist must be passed by reference. * + * If GROUP BY ALL is specified, the groupClause will be inferred to be a= ll + * non-aggregate expressions in the targetlist. + * * This is also used for window PARTITION BY clauses (which act almost th= e * same, but are always interpreted per SQL99 rules). * @@ -2622,6 +2625,7 @@ transformGroupingSet(List **flatresult, * * pstate ParseState * grouplist clause to transform + * groupByAll is this a GROUP BY ALL statement? * groupingSets reference to list to contain the grouping set tree * targetlist reference to TargetEntry list * sortClause ORDER BY clause (SortGroupClause nodes) @@ -2629,7 +2633,8 @@ transformGroupingSet(List **flatresult, * useSQL99 SQL99 rather than SQL92 syntax */ List * -transformGroupClause(ParseState *pstate, List *grouplist, List **grouping= Sets, +transformGroupClause(ParseState *pstate, List *grouplist, bool groupByAll= , + List **groupingSets, List **targetlist, List *sortClause, ParseExprKind exprKind, bool useSQL99) { @@ -2640,6 +2645,41 @@ transformGroupClause(ParseState *pstate, List *grou= plist, List **groupingSets, bool hasGroupingSets =3D false; Bitmapset *seen_local =3D NULL; = + /* Handle GROUP BY ALL */ + if (groupByAll) + { + /* There cannot have been any explicit list items */ + Assert(grouplist =3D=3D NIL); + + /* + * Iterate over targets, any non-aggregate gets added as a Target. + * Note that it's not enough to check for a top-level Aggref; we need + * to ensure that any sub-expression here does not include an Aggref + * (for instance an expression such as `sum(col) + 4` should not be + * added as a grouping target). + * + * We specify the parse location as the TLE's location, despite the + * comment for addTargetToGroupList discouraging that. The only other + * thing we could point to is the ALL keyword, which seems unhelpful + * when there are multiple TLEs. + */ + foreach_ptr(TargetEntry, tle, *targetlist) + { + /* Ignore junk TLEs */ + if (tle->resjunk) + continue; + + /* Use contain_aggs_of_level to detect Aggrefs in SubLinks */ + if (!(pstate->p_hasAggs && + contain_aggs_of_level((Node *) tle->expr, 0))) + result =3D addTargetToGroupList(pstate, tle, + result, *targetlist, + exprLocation((Node *) tle->expr)); + } + + return result; + } + /* * Recursively flatten implicit RowExprs. (Technically this is only need= ed * for GROUP BY, per the syntax rules for grouping sets, but we do it @@ -2818,6 +2858,7 @@ transformWindowDefinitions(ParseState *pstate, true /* force SQL99 rules */ ); partitionClause =3D transformGroupClause(pstate, windef->partitionClause, + false /* not GROUP BY ALL */ , NULL, targetlist, orderClause, diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/rul= eutils.c index defcdaa8b34..d0c6d3b55bd 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -6186,7 +6186,9 @@ get_basic_select_query(Query *query, deparse_context= *context) save_ingroupby =3D context->inGroupBy; context->inGroupBy =3D true; = - if (query->groupingSets =3D=3D NIL) + if (query->groupAll) + appendStringInfoString(buf, "ALL"); + else if (query->groupingSets =3D=3D NIL) { sep =3D ""; foreach(l, query->groupClause) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes= .h index 4ed14fc5b78..d379879ae80 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -214,7 +214,8 @@ typedef struct Query List *returningList; /* return-values list (of TargetEntry) */ = List *groupClause; /* a list of SortGroupClause's */ - bool groupDistinct; /* is the group by clause distinct? */ + bool groupDistinct; /* was GROUP BY DISTINCT used? */ + bool groupAll; /* was GROUP BY ALL used? */ = List *groupingSets; /* a list of GroupingSet's if present */ = @@ -2192,6 +2193,7 @@ typedef struct SelectStmt Node *whereClause; /* WHERE qualification */ List *groupClause; /* GROUP BY clauses */ bool groupDistinct; /* Is this GROUP BY DISTINCT? */ + bool groupAll; /* Is this GROUP BY ALL? */ Node *havingClause; /* HAVING conditional-expression */ List *windowClause; /* WINDOW window_name AS (...), ... */ = diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_= clause.h index 3e9894926de..ede3903d1dd 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -26,6 +26,7 @@ extern Node *transformLimitClause(ParseState *pstate, No= de *clause, ParseExprKind exprKind, const char *constructName, LimitOption limitOption); extern List *transformGroupClause(ParseState *pstate, List *grouplist, + bool groupByAll, List **groupingSets, List **targetlist, List *sortClause, ParseExprKind exprKind, bool useSQL99); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/e= xpected/aggregates.out index 1f24f6ffd1f..aeeb452ec26 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1557,6 +1557,153 @@ drop table t2; drop table t3; drop table p_t1; -- +-- Test GROUP BY ALL +-- +-- We don't care about the data here, just the proper transformation of t= he +-- GROUP BY clause, so test some queries and verify the EXPLAIN plans. +CREATE TEMP TABLE t1 ( + a int, + b int, + c int +); +-- basic field check +EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL; + QUERY PLAN = +---------------------- + HashAggregate + Group Key: b + -> Seq Scan on t1 +(3 rows) + +-- throw a null in the values too +EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL; + QUERY PLAN = +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- multiple columns, non-consecutive order +EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL; + QUERY PLAN = +---------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on t1 +(3 rows) + +-- multi columns, no aggregate +EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL; + QUERY PLAN = +---------------------- + HashAggregate + Group Key: (a + b) + -> Seq Scan on t1 +(3 rows) + +-- non-top-level aggregate +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL; + QUERY PLAN = +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- including grouped column +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL; + QUERY PLAN = +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- oops all aggregates +EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL; + QUERY PLAN = +---------------------- + Aggregate + -> Seq Scan on t1 +(2 rows) + +-- empty column list +EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL; + QUERY PLAN = +---------------- + Seq Scan on t1 +(1 row) + +-- filter +EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b =3D 2) FROM t1 GROU= P BY ALL; + QUERY PLAN = +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- all cols +EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL; + QUERY PLAN = +---------------------- + HashAggregate + Group Key: a, b, c + -> Seq Scan on t1 +(3 rows) + +EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL; + QUERY PLAN = +---------------------- + HashAggregate + Group Key: a, b, c + -> Seq Scan on t1 +(3 rows) + +-- expression without including aggregate columns +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL; +ERROR: column "t1.c" must appear in the GROUP BY clause or be used in an= aggregate function +LINE 1: EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY AL... + ^ +-- subquery +EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GRO= UP BY ALL; + QUERY PLAN = +----------------------------------- + GroupAggregate + InitPlan 1 + -> Limit + -> Seq Scan on t1 t1_1 + -> Seq Scan on t1 +(5 rows) + +-- cte +EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GR= OUP BY ALL) +SELECT AVG(a), sum_b FROM a_query GROUP BY ALL; + QUERY PLAN = +---------------------------- + HashAggregate + Group Key: sum(t1.b) + -> HashAggregate + Group Key: t1.a + -> Seq Scan on t1 +(5 rows) + +-- verify deparse +CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL; +NOTICE: view "v1" will be a temporary view +SELECT pg_get_viewdef('v1'::regclass); + pg_get_viewdef = +----------------------- + SELECT b, + + count(*) AS count+ + FROM t1 + + GROUP BY ALL; +(1 row) + +DROP VIEW v1; +DROP TABLE t1; +-- -- Test GROUP BY matching of join columns that are type-coerced due to US= ING -- create temp table t1(f1 int, f2 int); diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/ag= gregates.sql index 62540b1ffa4..6b2516cd9b1 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -549,6 +549,68 @@ drop table t2; drop table t3; drop table p_t1; = + +-- +-- Test GROUP BY ALL +-- + +-- We don't care about the data here, just the proper transformation of t= he +-- GROUP BY clause, so test some queries and verify the EXPLAIN plans. + +CREATE TEMP TABLE t1 ( + a int, + b int, + c int +); + +-- basic field check +EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL; + +-- throw a null in the values too +EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL; + +-- multiple columns, non-consecutive order +EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL; + +-- multi columns, no aggregate +EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL; + +-- non-top-level aggregate +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL; + +-- including grouped column +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL; + +-- oops all aggregates +EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL; + +-- empty column list +EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL; + +-- filter +EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b =3D 2) FROM t1 GROU= P BY ALL; + +-- all cols +EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL; +EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL; + +-- expression without including aggregate columns +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL; + +-- subquery +EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GRO= UP BY ALL; + +-- cte +EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GR= OUP BY ALL) +SELECT AVG(a), sum_b FROM a_query GROUP BY ALL; + +-- verify deparse +CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL; +SELECT pg_get_viewdef('v1'::regclass); + +DROP VIEW v1; +DROP TABLE t1; + -- -- Test GROUP BY matching of join columns that are type-coerced due to US= ING -- ------- =_aaaaaaaaaa0--