[PATCH] GROUP BY ALL - David Christensen

public inbox for [email protected]  
help / color / mirror / Atom feed

From: David Christensen <[email protected]>
To: pgsql-hackers <[email protected]>
Subject: [PATCH] GROUP BY ALL
Date: Mon, 22 Jul 2024 15:55:20 -0500
Message-ID: <CAHM0NXjz0kDwtzoe-fnHAqPB1qA8_VJN0XAmCgUZ+iPnvP5LbA@mail.gmail.com> (raw)

I see that there'd been some chatter but not a lot of discussion about
a GROUP BY ALL feature/functionality.  There certainly is utility in
such a construct IMHO.

The grammar is unambiguous, so can support this construct in lieu of
the traditional GROUP BY clause.  Enclosed is a patch which adds this
via just scanning the TargetEntry list and adding anything that is not
an aggregate function call to the groupList.

Still need some docs; just throwing this out there and getting some feedback.

Thanks,

David


Attachments:

  [application/octet-stream] 0001-WIP-groupAll-to-automatically-add-columns-to-the-gro.patch (11.2K, 2-0001-WIP-groupAll-to-automatically-add-columns-to-the-gro.patch)
  download | inline diff:
From f9d7ee410b1d6ced64383934be012ff5ec7e4d3c Mon Sep 17 00:00:00 2001
From: David Christensen <[email protected]>
Date: Mon, 11 Mar 2024 10:37:05 -0400
Subject: [PATCH] WIP: groupAll to automatically add columns to the groupClause
 list

---
 src/backend/parser/analyze.c             | 30 ++++++++++----
 src/backend/parser/gram.y                | 16 +++++++-
 src/backend/parser/parse_clause.c        |  7 ++--
 src/backend/utils/adt/ruleutils.c        | 51 +++++++++++++-----------
 src/include/nodes/parsenodes.h           |  2 +
 src/include/parser/parse_clause.h        |  2 +
 src/test/regress/expected/aggregates.out | 46 +++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      | 32 +++++++++++++++
 8 files changed, 152 insertions(+), 34 deletions(-)

diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 28fed9d87f..ab38a4f3f5 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1371,6 +1371,20 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 	qry->targetList = transformTargetList(pstate, stmt->targetList,
 										  EXPR_KIND_SELECT_TARGET);
 
+	/* if groupByAll, expand targetList into groupClause. In this case, we cannot have any other group clauses, so this is safe */
+
+	if (stmt->groupAll)
+	{
+		ListCell *l1;
+		/* iterate over targets, any non-aggregate gets added as a Target */
+		foreach (l1,qry->targetList)
+		{
+			TargetEntry *n = (TargetEntry*)lfirst(l1);
+			if (!IsA(n->expr,Aggref))
+				qry->groupClause = addTargetToGroupList(pstate, n, qry->groupClause, qry->targetList, 0);
+		}
+	}
+
 	/* mark column origins */
 	markTargetListOrigins(pstate, qry->targetList);
 
@@ -1394,14 +1408,16 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 										  EXPR_KIND_ORDER_BY,
 										  false /* allow SQL92 rules */ );
 
-	qry->groupClause = transformGroupClause(pstate,
-											stmt->groupClause,
-											&qry->groupingSets,
-											&qry->targetList,
-											qry->sortClause,
-											EXPR_KIND_GROUP_BY,
-											false /* allow SQL92 rules */ );
+	if (!stmt->groupAll)
+		qry->groupClause = transformGroupClause(pstate,
+												stmt->groupClause,
+												&qry->groupingSets,
+												&qry->targetList,
+												qry->sortClause,
+												EXPR_KIND_GROUP_BY,
+												false /* allow SQL92 rules */ );
 	qry->groupDistinct = stmt->groupDistinct;
+	qry->groupAll = stmt->groupAll;
 
 	if (stmt->distinctClause == NIL)
 	{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a043fd4c66..7a968d7568 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -135,6 +135,7 @@ typedef struct SelectLimit
 typedef struct GroupClause
 {
 	bool		distinct;
+	bool		all;
 	List	   *list;
 } GroupClause;
 
@@ -12845,6 +12846,7 @@ simple_select:
 					n->whereClause = $6;
 					n->groupClause = ($7)->list;
 					n->groupDistinct = ($7)->distinct;
+					n->groupAll = ($7)->all;
 					n->havingClause = $8;
 					n->windowClause = $9;
 					$$ = (Node *) n;
@@ -12862,6 +12864,7 @@ simple_select:
 					n->whereClause = $6;
 					n->groupClause = ($7)->list;
 					n->groupDistinct = ($7)->distinct;
+					n->groupAll = ($7)->all;
 					n->havingClause = $8;
 					n->windowClause = $9;
 					$$ = (Node *) n;
@@ -13334,12 +13337,21 @@ first_or_next: FIRST_P								{ $$ = 0; }
  * GroupingSet node of some type.
  */
 group_clause:
-			GROUP_P BY set_quantifier group_by_list
+			GROUP_P BY ALL
+				{
+					GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+					n->distinct = false;
+					n->list = NIL;
+					n->all = true;
+					$$ = n;
+				}
+			| GROUP_P BY set_quantifier group_by_list
 				{
 					GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
 
 					n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
 					n->list = $4;
+					n->all = false;
 					$$ = n;
 				}
 			| /*EMPTY*/
@@ -13348,6 +13360,7 @@ group_clause:
 
 					n->distinct = false;
 					n->list = NIL;
+					n->all = false;
 					$$ = n;
 				}
 		;
@@ -17452,6 +17465,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
 					n->whereClause = $4;
 					n->groupClause = ($5)->list;
 					n->groupDistinct = ($5)->distinct;
+					n->groupAll = ($5)->all;
 					n->havingClause = $6;
 					n->windowClause = $7;
 					n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 8118036495..fd9929cecf 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -92,8 +92,6 @@ static int	get_matching_location(int sortgroupref,
 								  List *sortgrouprefs, List *exprs);
 static List *resolve_unique_index_expr(ParseState *pstate, InferClause *infer,
 									   Relation heapRel);
-static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
-								  List *grouplist, List *targetlist, int location);
 static WindowClause *findWindowClause(List *wclist, const char *name);
 static Node *transformFrameOffset(ParseState *pstate, int frameOptions,
 								  Oid rangeopfamily, Oid rangeopcintype, Oid *inRangeFunc,
@@ -2598,6 +2596,9 @@ transformGroupingSet(List **flatresult,
  * GROUP BY items will be added to the targetlist (as resjunk columns)
  * if not already present, so the targetlist must be passed by reference.
  *
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate expressions in the targetList.
+ *
  * This is also used for window PARTITION BY clauses (which act almost the
  * same, but are always interpreted per SQL99 rules).
  *
@@ -3532,7 +3533,7 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle,
  *
  * Returns the updated SortGroupClause list.
  */
-static List *
+List *
 addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
 					 List *grouplist, List *targetlist, int location)
 {
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 653685bffc..9a067479da 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -5965,38 +5965,43 @@ get_basic_select_query(Query *query, deparse_context *context,
 
 		appendContextKeyword(context, " GROUP BY ",
 							 -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
-		if (query->groupDistinct)
-			appendStringInfoString(buf, "DISTINCT ");
+		if (query->groupAll)
+			appendContextKeyword(context, " ALL ",
+								 -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+		else
+		{
+			if (query->groupDistinct)
+				appendStringInfoString(buf, "DISTINCT ");
 
-		save_exprkind = context->special_exprkind;
-		context->special_exprkind = EXPR_KIND_GROUP_BY;
+			save_exprkind = context->special_exprkind;
+			context->special_exprkind = EXPR_KIND_GROUP_BY;
 
-		if (query->groupingSets == NIL)
-		{
-			sep = "";
-			foreach(l, query->groupClause)
+			if (query->groupingSets == NIL)
 			{
-				SortGroupClause *grp = (SortGroupClause *) lfirst(l);
+				sep = "";
+				foreach(l, query->groupClause)
+				{
+					SortGroupClause *grp = (SortGroupClause *) lfirst(l);
 
-				appendStringInfoString(buf, sep);
-				get_rule_sortgroupclause(grp->tleSortGroupRef, query->targetList,
-										 false, context);
-				sep = ", ";
+					appendStringInfoString(buf, sep);
+					get_rule_sortgroupclause(grp->tleSortGroupRef, query->targetList,
+											 false, context);
+					sep = ", ";
+				}
 			}
-		}
-		else
-		{
-			sep = "";
-			foreach(l, query->groupingSets)
+			else
 			{
-				GroupingSet *grp = lfirst(l);
+				sep = "";
+				foreach(l, query->groupingSets)
+				{
+					GroupingSet *grp = lfirst(l);
 
-				appendStringInfoString(buf, sep);
-				get_rule_groupingset(grp, query->targetList, true, context);
-				sep = ", ";
+					appendStringInfoString(buf, sep);
+					get_rule_groupingset(grp, query->targetList, true, context);
+					sep = ", ";
+				}
 			}
 		}
-
 		context->special_exprkind = save_exprkind;
 	}
 
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 85a62b538e..5cd3508d63 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -199,6 +199,7 @@ typedef struct Query
 
 	List	   *groupClause;	/* a list of SortGroupClause's */
 	bool		groupDistinct;	/* is the group by clause distinct? */
+	bool		groupAll;	/* is the group by clause distinct? */
 
 	List	   *groupingSets;	/* a list of GroupingSet's if present */
 
@@ -2131,6 +2132,7 @@ typedef struct SelectStmt
 	Node	   *whereClause;	/* WHERE qualification */
 	List	   *groupClause;	/* GROUP BY clauses */
 	bool		groupDistinct;	/* Is this GROUP BY DISTINCT? */
+	bool		groupAll;		/* Is this GROUP BY ALL? */
 	Node	   *havingClause;	/* HAVING conditional-expression */
 	List	   *windowClause;	/* WINDOW window_name AS (...), ... */
 
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index e71762b10c..7d386912f4 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -50,6 +50,8 @@ extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
 								 List *sortlist, List *targetlist, SortBy *sortby);
 extern Index assignSortGroupRef(TargetEntry *tle, List *tlist);
 extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList);
+extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
+								  List *grouplist, List *targetlist, int location);
 
 /* functions in parse_jsontable.c */
 extern ParseNamespaceItem *transformJsonTable(ParseState *pstate, JsonTable *jt);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index a5596ab210..3e2ebc76d4 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1454,6 +1454,52 @@ drop table t2;
 drop table t3;
 drop table p_t1;
 --
+-- Test GROUP BY ALL
+--
+CREATE TEMP TABLE t1 (
+  a int,
+  b int
+);
+COPY t1 FROM STDIN;
+-- basic field check
+SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+ b | count 
+---+-------
+ 3 |     2
+ 2 |     2
+ 1 |     1
+(3 rows)
+
+-- throw a null in the values too
+SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+ a | count 
+---+-------
+   |     0
+ 1 |     4
+(2 rows)
+
+-- multiple columns, non-consecutive order
+SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+ a | sum | b 
+---+-----+---
+ 1 |   1 | 1
+ 1 |   6 | 3
+ 1 |   2 | 2
+   |   2 | 2
+(4 rows)
+
+-- multi columns, no aggregate
+SELECT a + b FROM t1 GROUP BY ALL;
+ ?column? 
+----------
+         
+        3
+        4
+        2
+(4 rows)
+
+DROP TABLE t1;
+--
 -- Test GROUP BY matching of join columns that are type-coerced due to USING
 --
 create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index ca6d1bcfb7..46b9df3389 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -512,6 +512,38 @@ drop table t2;
 drop table t3;
 drop table p_t1;
 
+
+--
+-- Test GROUP BY ALL
+--
+
+CREATE TEMP TABLE t1 (
+  a int,
+  b int
+);
+
+COPY t1 FROM STDIN;
+1	1
+1	2
+1	3
+\N	2
+1	3
+\.
+
+-- basic field check
+SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- throw a null in the values too
+SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+SELECT a + b FROM t1 GROUP BY ALL;
+
+DROP TABLE t1;
+
 --
 -- Test GROUP BY matching of join columns that are type-coerced due to USING
 --
-- 
2.40.1

view thread (2+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected]
  Subject: Re: [PATCH] GROUP BY ALL
  In-Reply-To: <CAHM0NXjz0kDwtzoe-fnHAqPB1qA8_VJN0XAmCgUZ+iPnvP5LbA@mail.gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox