From: Tom Lane <tgl@sss.pgh.pa.us>
To: David Christensen <david@pgguru.net>
cc: Peter Eisentraut <peter@eisentraut.org>,
        pgsql-hackers <pgsql-hackers@postgresql.org>
Subject: Re: [PATCH] GROUP BY ALL
In-reply-to: 
 <CAHM0NXiH_hm+pz2Me6YUkix913oniWgYJ3K-OMc7bXei=cDewA@mail.gmail.com>
References: <4D2047B0-E8D8-472B-B7E8-61206B1E6AFA@yandex-team.ru>
 <F694DABC-6A42-4614-B254-643180B9D6F6@pgguru.net>
 <CAHM0NXj1Gv_96WxZkiLKhxJVaYOGcmdZ2hApNcvWQ5HxV84Q=A@mail.gmail.com>
 <CACJufxEThXCX1QWyNE1gw--uJ4JQJ8f16MFH-kN+Ym3_02azTA@mail.gmail.com>
 <CAHM0NXg=yGzhn=H0_HiZ9u=Xq-40g1f+Bb=2e2LDxgd6a11bWw@mail.gmail.com>
 <CAHM0NXgB7hm-JH8c1o=o-4P7LYn_EoE=XZAW0ENdtP-nso60YQ@mail.gmail.com>
 <4096523.1758908779@sss.pgh.pa.us>
 <CAHM0NXiH_hm+pz2Me6YUkix913oniWgYJ3K-OMc7bXei=cDewA@mail.gmail.com>
Comments: In-reply-to David Christensen <david@pgguru.net>
	message dated "Fri, 26 Sep 2025 13:50:27 -0500"
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
Content-ID: <30931.1758918999.0@sss.pgh.pa.us>
Date: Fri, 26 Sep 2025 16:37:58 -0400
Message-ID: <31027.1758919078@sss.pgh.pa.us>
Archived-At: 
 <https://www.postgresql.org/message-id/31027.1758919078%40sss.pgh.pa.us>
Precedence: bulk

------- =_aaaaaaaaaa0
Content-Type: text/plain; charset="us-ascii"
Content-ID: <30931.1758918999.1@sss.pgh.pa.us>

[ trimming the cc: list because gmail decided my last was spam ]

David Christensen <david@pgguru.net> writes:
> Great feedback, thanks; attached is v4 which should address these comments.

I did a pass of cleanup over this --- mostly cosmetic, but not
entirely.  Along the way I discovered a pre-existing bug:
transformSelectStmt does
 	qry->groupDistinct = stmt->groupDistinct;
but transformPLAssignStmt fails to, with the result that GROUP BY
DISTINCT would misbehave if used in a plpgsql "expression" context.
I'm not hugely surprised that no one has reported that from the
field, but nonetheless it's broken.  In the attached v5 I just
quickly added the missing line and moved on, but we'll need to
back-patch that bit.  (Maybe we'd be well advised to refactor to
reduce the amount of duplicated code that needs to be kept in sync?)

I have not attempted to address the definitional issues I just
queried Peter about.  Other open items:

* Documentation

* The test cases deserve reconsideration now that we think their
charter only goes as far as EXPLAIN'ing the results; some of them
seem pretty redundant in this context.

			regards, tom lane


------- =_aaaaaaaaaa0
Content-Type: text/x-diff; name="v5-0001-Add-GROUP-BY-ALL.patch";
	charset="us-ascii"
Content-ID: <30931.1758918999.2@sss.pgh.pa.us>
Content-Description: v5-0001-Add-GROUP-BY-ALL.patch
Content-Transfer-Encoding: quoted-printable

diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index b9763ea1714..fe632d3dabf 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1440,12 +1440,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt=
 *stmt)
 =

 	qry->groupClause =3D transformGroupClause(pstate,
 											stmt->groupClause,
+											stmt->groupAll,
 											&qry->groupingSets,
 											&qry->targetList,
 											qry->sortClause,
 											EXPR_KIND_GROUP_BY,
 											false /* allow SQL92 rules */ );
 	qry->groupDistinct =3D stmt->groupDistinct;
+	qry->groupAll =3D stmt->groupAll;
 =

 	if (stmt->distinctClause =3D=3D NIL)
 	{
@@ -2930,11 +2932,14 @@ transformPLAssignStmt(ParseState *pstate, PLAssign=
Stmt *stmt)
 =

 	qry->groupClause =3D transformGroupClause(pstate,
 											sstmt->groupClause,
+											sstmt->groupAll,
 											&qry->groupingSets,
 											&qry->targetList,
 											qry->sortClause,
 											EXPR_KIND_GROUP_BY,
 											false /* allow SQL92 rules */ );
+	qry->groupDistinct =3D sstmt->groupDistinct;
+	qry->groupAll =3D sstmt->groupAll;
 =

 	if (sstmt->distinctClause =3D=3D NIL)
 	{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9fd48acb1f8..8a0aa1899e2 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -120,6 +120,7 @@ typedef struct SelectLimit
 typedef struct GroupClause
 {
 	bool		distinct;
+	bool		all;
 	List	   *list;
 } GroupClause;
 =

@@ -12993,6 +12994,7 @@ simple_select:
 					n->whereClause =3D $6;
 					n->groupClause =3D ($7)->list;
 					n->groupDistinct =3D ($7)->distinct;
+					n->groupAll =3D ($7)->all;
 					n->havingClause =3D $8;
 					n->windowClause =3D $9;
 					$$ =3D (Node *) n;
@@ -13010,6 +13012,7 @@ simple_select:
 					n->whereClause =3D $6;
 					n->groupClause =3D ($7)->list;
 					n->groupDistinct =3D ($7)->distinct;
+					n->groupAll =3D ($7)->all;
 					n->havingClause =3D $8;
 					n->windowClause =3D $9;
 					$$ =3D (Node *) n;
@@ -13507,14 +13510,24 @@ group_clause:
 					GroupClause *n =3D (GroupClause *) palloc(sizeof(GroupClause));
 =

 					n->distinct =3D $3 =3D=3D SET_QUANTIFIER_DISTINCT;
+					n->all =3D false;
 					n->list =3D $4;
 					$$ =3D n;
 				}
+			| GROUP_P BY ALL
+				{
+					GroupClause *n =3D (GroupClause *) palloc(sizeof(GroupClause));
+					n->distinct =3D false;
+					n->all =3D true;
+					n->list =3D NIL;
+					$$ =3D n;
+				}
 			| /*EMPTY*/
 				{
 					GroupClause *n =3D (GroupClause *) palloc(sizeof(GroupClause));
 =

 					n->distinct =3D false;
+					n->all =3D false;
 					n->list =3D NIL;
 					$$ =3D n;
 				}
@@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
 					n->whereClause =3D $4;
 					n->groupClause =3D ($5)->list;
 					n->groupDistinct =3D ($5)->distinct;
+					n->groupAll =3D ($5)->all;
 					n->havingClause =3D $6;
 					n->windowClause =3D $7;
 					n->sortClause =3D $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_=
clause.c
index 9f20a70ce13..562e3e90e34 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult,
  * GROUP BY items will be added to the targetlist (as resjunk columns)
  * if not already present, so the targetlist must be passed by reference.
  *
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be a=
ll
+ * non-aggregate expressions in the targetlist.
+ *
  * This is also used for window PARTITION BY clauses (which act almost th=
e
  * same, but are always interpreted per SQL99 rules).
  *
@@ -2622,6 +2625,7 @@ transformGroupingSet(List **flatresult,
  *
  * pstate		ParseState
  * grouplist	clause to transform
+ * groupByAll	is this a GROUP BY ALL statement?
  * groupingSets reference to list to contain the grouping set tree
  * targetlist	reference to TargetEntry list
  * sortClause	ORDER BY clause (SortGroupClause nodes)
@@ -2629,7 +2633,8 @@ transformGroupingSet(List **flatresult,
  * useSQL99		SQL99 rather than SQL92 syntax
  */
 List *
-transformGroupClause(ParseState *pstate, List *grouplist, List **grouping=
Sets,
+transformGroupClause(ParseState *pstate, List *grouplist, bool groupByAll=
,
+					 List **groupingSets,
 					 List **targetlist, List *sortClause,
 					 ParseExprKind exprKind, bool useSQL99)
 {
@@ -2640,6 +2645,41 @@ transformGroupClause(ParseState *pstate, List *grou=
plist, List **groupingSets,
 	bool		hasGroupingSets =3D false;
 	Bitmapset  *seen_local =3D NULL;
 =

+	/* Handle GROUP BY ALL */
+	if (groupByAll)
+	{
+		/* There cannot have been any explicit list items */
+		Assert(grouplist =3D=3D NIL);
+
+		/*
+		 * Iterate over targets, any non-aggregate gets added as a Target.
+		 * Note that it's not enough to check for a top-level Aggref; we need
+		 * to ensure that any sub-expression here does not include an Aggref
+		 * (for instance an expression such as `sum(col) + 4` should not be
+		 * added as a grouping target).
+		 *
+		 * We specify the parse location as the TLE's location, despite the
+		 * comment for addTargetToGroupList discouraging that.  The only other
+		 * thing we could point to is the ALL keyword, which seems unhelpful
+		 * when there are multiple TLEs.
+		 */
+		foreach_ptr(TargetEntry, tle, *targetlist)
+		{
+			/* Ignore junk TLEs */
+			if (tle->resjunk)
+				continue;
+
+			/* Use contain_aggs_of_level to detect Aggrefs in SubLinks */
+			if (!(pstate->p_hasAggs &&
+				  contain_aggs_of_level((Node *) tle->expr, 0)))
+				result =3D addTargetToGroupList(pstate, tle,
+											  result, *targetlist,
+											  exprLocation((Node *) tle->expr));
+		}
+
+		return result;
+	}
+
 	/*
 	 * Recursively flatten implicit RowExprs. (Technically this is only need=
ed
 	 * for GROUP BY, per the syntax rules for grouping sets, but we do it
@@ -2818,6 +2858,7 @@ transformWindowDefinitions(ParseState *pstate,
 										  true /* force SQL99 rules */ );
 		partitionClause =3D transformGroupClause(pstate,
 											   windef->partitionClause,
+											   false /* not GROUP BY ALL */ ,
 											   NULL,
 											   targetlist,
 											   orderClause,
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/rul=
eutils.c
index defcdaa8b34..d0c6d3b55bd 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6186,7 +6186,9 @@ get_basic_select_query(Query *query, deparse_context=
 *context)
 		save_ingroupby =3D context->inGroupBy;
 		context->inGroupBy =3D true;
 =

-		if (query->groupingSets =3D=3D NIL)
+		if (query->groupAll)
+			appendStringInfoString(buf, "ALL");
+		else if (query->groupingSets =3D=3D NIL)
 		{
 			sep =3D "";
 			foreach(l, query->groupClause)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes=
.h
index 4ed14fc5b78..d379879ae80 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -214,7 +214,8 @@ typedef struct Query
 	List	   *returningList;	/* return-values list (of TargetEntry) */
 =

 	List	   *groupClause;	/* a list of SortGroupClause's */
-	bool		groupDistinct;	/* is the group by clause distinct? */
+	bool		groupDistinct;	/* was GROUP BY DISTINCT used? */
+	bool		groupAll;		/* was GROUP BY ALL used? */
 =

 	List	   *groupingSets;	/* a list of GroupingSet's if present */
 =

@@ -2192,6 +2193,7 @@ typedef struct SelectStmt
 	Node	   *whereClause;	/* WHERE qualification */
 	List	   *groupClause;	/* GROUP BY clauses */
 	bool		groupDistinct;	/* Is this GROUP BY DISTINCT? */
+	bool		groupAll;		/* Is this GROUP BY ALL? */
 	Node	   *havingClause;	/* HAVING conditional-expression */
 	List	   *windowClause;	/* WINDOW window_name AS (...), ... */
 =

diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_=
clause.h
index 3e9894926de..ede3903d1dd 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -26,6 +26,7 @@ extern Node *transformLimitClause(ParseState *pstate, No=
de *clause,
 								  ParseExprKind exprKind, const char *constructName,
 								  LimitOption limitOption);
 extern List *transformGroupClause(ParseState *pstate, List *grouplist,
+								  bool groupByAll,
 								  List **groupingSets,
 								  List **targetlist, List *sortClause,
 								  ParseExprKind exprKind, bool useSQL99);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/e=
xpected/aggregates.out
index 1f24f6ffd1f..aeeb452ec26 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1557,6 +1557,153 @@ drop table t2;
 drop table t3;
 drop table p_t1;
 --
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of t=
he
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+CREATE TEMP TABLE t1 (
+  a int,
+  b int,
+  c int
+);
+-- basic field check
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+      QUERY PLAN      =

+----------------------
+ HashAggregate
+   Group Key: b
+   ->  Seq Scan on t1
+(3 rows)
+
+-- throw a null in the values too
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+      QUERY PLAN      =

+----------------------
+ HashAggregate
+   Group Key: a
+   ->  Seq Scan on t1
+(3 rows)
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+      QUERY PLAN      =

+----------------------
+ HashAggregate
+   Group Key: a, b
+   ->  Seq Scan on t1
+(3 rows)
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+      QUERY PLAN      =

+----------------------
+ HashAggregate
+   Group Key: (a + b)
+   ->  Seq Scan on t1
+(3 rows)
+
+-- non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+      QUERY PLAN      =

+----------------------
+ HashAggregate
+   Group Key: a
+   ->  Seq Scan on t1
+(3 rows)
+
+-- including grouped column
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+      QUERY PLAN      =

+----------------------
+ HashAggregate
+   Group Key: a
+   ->  Seq Scan on t1
+(3 rows)
+
+-- oops all aggregates
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+      QUERY PLAN      =

+----------------------
+ Aggregate
+   ->  Seq Scan on t1
+(2 rows)
+
+-- empty column list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+   QUERY PLAN   =

+----------------
+ Seq Scan on t1
+(1 row)
+
+-- filter
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b =3D 2) FROM t1 GROU=
P BY ALL;
+      QUERY PLAN      =

+----------------------
+ HashAggregate
+   Group Key: a
+   ->  Seq Scan on t1
+(3 rows)
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL;
+      QUERY PLAN      =

+----------------------
+ HashAggregate
+   Group Key: a, b, c
+   ->  Seq Scan on t1
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+      QUERY PLAN      =

+----------------------
+ HashAggregate
+   Group Key: a, b, c
+   ->  Seq Scan on t1
+(3 rows)
+
+-- expression without including aggregate columns
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+ERROR:  column "t1.c" must appear in the GROUP BY clause or be used in an=
 aggregate function
+LINE 1: EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY AL...
+                                               ^
+-- subquery
+EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GRO=
UP BY ALL;
+            QUERY PLAN             =

+-----------------------------------
+ GroupAggregate
+   InitPlan 1
+     ->  Limit
+           ->  Seq Scan on t1 t1_1
+   ->  Seq Scan on t1
+(5 rows)
+
+-- cte
+EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GR=
OUP BY ALL)
+SELECT AVG(a), sum_b  FROM a_query GROUP BY ALL;
+         QUERY PLAN         =

+----------------------------
+ HashAggregate
+   Group Key: sum(t1.b)
+   ->  HashAggregate
+         Group Key: t1.a
+         ->  Seq Scan on t1
+(5 rows)
+
+-- verify deparse
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+NOTICE:  view "v1" will be a temporary view
+SELECT pg_get_viewdef('v1'::regclass);
+    pg_get_viewdef     =

+-----------------------
+  SELECT b,           +
+     count(*) AS count+
+    FROM t1           +
+   GROUP BY ALL;
+(1 row)
+
+DROP VIEW v1;
+DROP TABLE t1;
+--
 -- Test GROUP BY matching of join columns that are type-coerced due to US=
ING
 --
 create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/ag=
gregates.sql
index 62540b1ffa4..6b2516cd9b1 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -549,6 +549,68 @@ drop table t2;
 drop table t3;
 drop table p_t1;
 =

+
+--
+-- Test GROUP BY ALL
+--
+
+-- We don't care about the data here, just the proper transformation of t=
he
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+
+CREATE TEMP TABLE t1 (
+  a int,
+  b int,
+  c int
+);
+
+-- basic field check
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- throw a null in the values too
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+
+-- non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+
+-- including grouped column
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+
+-- oops all aggregates
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+
+-- empty column list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+
+-- filter
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b =3D 2) FROM t1 GROU=
P BY ALL;
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL;
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+
+-- expression without including aggregate columns
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+
+-- subquery
+EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GRO=
UP BY ALL;
+
+-- cte
+EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GR=
OUP BY ALL)
+SELECT AVG(a), sum_b  FROM a_query GROUP BY ALL;
+
+-- verify deparse
+CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+
+DROP VIEW v1;
+DROP TABLE t1;
+
 --
 -- Test GROUP BY matching of join columns that are type-coerced due to US=
ING
 --

------- =_aaaaaaaaaa0--