[PATCH] GROUP BY ALL

public inbox for [email protected]  
help / color / mirror / Atom feed

[PATCH] GROUP BY ALL
2+ messages / 2 participants
[nested] [flat]

* [PATCH] GROUP BY ALL
@ 2024-07-22 20:55  David Christensen <[email protected]>
  0 siblings, 1 reply; 2+ messages in thread

From: David Christensen @ 2024-07-22 20:55 UTC (permalink / raw)
  To: pgsql-hackers

I see that there'd been some chatter but not a lot of discussion about
a GROUP BY ALL feature/functionality.  There certainly is utility in
such a construct IMHO.

The grammar is unambiguous, so can support this construct in lieu of
the traditional GROUP BY clause.  Enclosed is a patch which adds this
via just scanning the TargetEntry list and adding anything that is not
an aggregate function call to the groupList.

Still need some docs; just throwing this out there and getting some feedback.

Thanks,

David


Attachments:

  [application/octet-stream] 0001-WIP-groupAll-to-automatically-add-columns-to-the-gro.patch (11.2K, 2-0001-WIP-groupAll-to-automatically-add-columns-to-the-gro.patch)
  download | inline diff:
From f9d7ee410b1d6ced64383934be012ff5ec7e4d3c Mon Sep 17 00:00:00 2001
From: David Christensen <[email protected]>
Date: Mon, 11 Mar 2024 10:37:05 -0400
Subject: [PATCH] WIP: groupAll to automatically add columns to the groupClause
 list

---
 src/backend/parser/analyze.c             | 30 ++++++++++----
 src/backend/parser/gram.y                | 16 +++++++-
 src/backend/parser/parse_clause.c        |  7 ++--
 src/backend/utils/adt/ruleutils.c        | 51 +++++++++++++-----------
 src/include/nodes/parsenodes.h           |  2 +
 src/include/parser/parse_clause.h        |  2 +
 src/test/regress/expected/aggregates.out | 46 +++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      | 32 +++++++++++++++
 8 files changed, 152 insertions(+), 34 deletions(-)

diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 28fed9d87f..ab38a4f3f5 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1371,6 +1371,20 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 	qry->targetList = transformTargetList(pstate, stmt->targetList,
 										  EXPR_KIND_SELECT_TARGET);
 
+	/* if groupByAll, expand targetList into groupClause. In this case, we cannot have any other group clauses, so this is safe */
+
+	if (stmt->groupAll)
+	{
+		ListCell *l1;
+		/* iterate over targets, any non-aggregate gets added as a Target */
+		foreach (l1,qry->targetList)
+		{
+			TargetEntry *n = (TargetEntry*)lfirst(l1);
+			if (!IsA(n->expr,Aggref))
+				qry->groupClause = addTargetToGroupList(pstate, n, qry->groupClause, qry->targetList, 0);
+		}
+	}
+
 	/* mark column origins */
 	markTargetListOrigins(pstate, qry->targetList);
 
@@ -1394,14 +1408,16 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 										  EXPR_KIND_ORDER_BY,
 										  false /* allow SQL92 rules */ );
 
-	qry->groupClause = transformGroupClause(pstate,
-											stmt->groupClause,
-											&qry->groupingSets,
-											&qry->targetList,
-											qry->sortClause,
-											EXPR_KIND_GROUP_BY,
-											false /* allow SQL92 rules */ );
+	if (!stmt->groupAll)
+		qry->groupClause = transformGroupClause(pstate,
+												stmt->groupClause,
+												&qry->groupingSets,
+												&qry->targetList,
+												qry->sortClause,
+												EXPR_KIND_GROUP_BY,
+												false /* allow SQL92 rules */ );
 	qry->groupDistinct = stmt->groupDistinct;
+	qry->groupAll = stmt->groupAll;
 
 	if (stmt->distinctClause == NIL)
 	{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a043fd4c66..7a968d7568 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -135,6 +135,7 @@ typedef struct SelectLimit
 typedef struct GroupClause
 {
 	bool		distinct;
+	bool		all;
 	List	   *list;
 } GroupClause;
 
@@ -12845,6 +12846,7 @@ simple_select:
 					n->whereClause = $6;
 					n->groupClause = ($7)->list;
 					n->groupDistinct = ($7)->distinct;
+					n->groupAll = ($7)->all;
 					n->havingClause = $8;
 					n->windowClause = $9;
 					$$ = (Node *) n;
@@ -12862,6 +12864,7 @@ simple_select:
 					n->whereClause = $6;
 					n->groupClause = ($7)->list;
 					n->groupDistinct = ($7)->distinct;
+					n->groupAll = ($7)->all;
 					n->havingClause = $8;
 					n->windowClause = $9;
 					$$ = (Node *) n;
@@ -13334,12 +13337,21 @@ first_or_next: FIRST_P								{ $$ = 0; }
  * GroupingSet node of some type.
  */
 group_clause:
-			GROUP_P BY set_quantifier group_by_list
+			GROUP_P BY ALL
+				{
+					GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+					n->distinct = false;
+					n->list = NIL;
+					n->all = true;
+					$$ = n;
+				}
+			| GROUP_P BY set_quantifier group_by_list
 				{
 					GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
 
 					n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
 					n->list = $4;
+					n->all = false;
 					$$ = n;
 				}
 			| /*EMPTY*/
@@ -13348,6 +13360,7 @@ group_clause:
 
 					n->distinct = false;
 					n->list = NIL;
+					n->all = false;
 					$$ = n;
 				}
 		;
@@ -17452,6 +17465,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
 					n->whereClause = $4;
 					n->groupClause = ($5)->list;
 					n->groupDistinct = ($5)->distinct;
+					n->groupAll = ($5)->all;
 					n->havingClause = $6;
 					n->windowClause = $7;
 					n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 8118036495..fd9929cecf 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -92,8 +92,6 @@ static int	get_matching_location(int sortgroupref,
 								  List *sortgrouprefs, List *exprs);
 static List *resolve_unique_index_expr(ParseState *pstate, InferClause *infer,
 									   Relation heapRel);
-static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
-								  List *grouplist, List *targetlist, int location);
 static WindowClause *findWindowClause(List *wclist, const char *name);
 static Node *transformFrameOffset(ParseState *pstate, int frameOptions,
 								  Oid rangeopfamily, Oid rangeopcintype, Oid *inRangeFunc,
@@ -2598,6 +2596,9 @@ transformGroupingSet(List **flatresult,
  * GROUP BY items will be added to the targetlist (as resjunk columns)
  * if not already present, so the targetlist must be passed by reference.
  *
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate expressions in the targetList.
+ *
  * This is also used for window PARTITION BY clauses (which act almost the
  * same, but are always interpreted per SQL99 rules).
  *
@@ -3532,7 +3533,7 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle,
  *
  * Returns the updated SortGroupClause list.
  */
-static List *
+List *
 addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
 					 List *grouplist, List *targetlist, int location)
 {
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 653685bffc..9a067479da 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -5965,38 +5965,43 @@ get_basic_select_query(Query *query, deparse_context *context,
 
 		appendContextKeyword(context, " GROUP BY ",
 							 -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
-		if (query->groupDistinct)
-			appendStringInfoString(buf, "DISTINCT ");
+		if (query->groupAll)
+			appendContextKeyword(context, " ALL ",
+								 -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+		else
+		{
+			if (query->groupDistinct)
+				appendStringInfoString(buf, "DISTINCT ");
 
-		save_exprkind = context->special_exprkind;
-		context->special_exprkind = EXPR_KIND_GROUP_BY;
+			save_exprkind = context->special_exprkind;
+			context->special_exprkind = EXPR_KIND_GROUP_BY;
 
-		if (query->groupingSets == NIL)
-		{
-			sep = "";
-			foreach(l, query->groupClause)
+			if (query->groupingSets == NIL)
 			{
-				SortGroupClause *grp = (SortGroupClause *) lfirst(l);
+				sep = "";
+				foreach(l, query->groupClause)
+				{
+					SortGroupClause *grp = (SortGroupClause *) lfirst(l);
 
-				appendStringInfoString(buf, sep);
-				get_rule_sortgroupclause(grp->tleSortGroupRef, query->targetList,
-										 false, context);
-				sep = ", ";
+					appendStringInfoString(buf, sep);
+					get_rule_sortgroupclause(grp->tleSortGroupRef, query->targetList,
+											 false, context);
+					sep = ", ";
+				}
 			}
-		}
-		else
-		{
-			sep = "";
-			foreach(l, query->groupingSets)
+			else
 			{
-				GroupingSet *grp = lfirst(l);
+				sep = "";
+				foreach(l, query->groupingSets)
+				{
+					GroupingSet *grp = lfirst(l);
 
-				appendStringInfoString(buf, sep);
-				get_rule_groupingset(grp, query->targetList, true, context);
-				sep = ", ";
+					appendStringInfoString(buf, sep);
+					get_rule_groupingset(grp, query->targetList, true, context);
+					sep = ", ";
+				}
 			}
 		}
-
 		context->special_exprkind = save_exprkind;
 	}
 
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 85a62b538e..5cd3508d63 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -199,6 +199,7 @@ typedef struct Query
 
 	List	   *groupClause;	/* a list of SortGroupClause's */
 	bool		groupDistinct;	/* is the group by clause distinct? */
+	bool		groupAll;	/* is the group by clause distinct? */
 
 	List	   *groupingSets;	/* a list of GroupingSet's if present */
 
@@ -2131,6 +2132,7 @@ typedef struct SelectStmt
 	Node	   *whereClause;	/* WHERE qualification */
 	List	   *groupClause;	/* GROUP BY clauses */
 	bool		groupDistinct;	/* Is this GROUP BY DISTINCT? */
+	bool		groupAll;		/* Is this GROUP BY ALL? */
 	Node	   *havingClause;	/* HAVING conditional-expression */
 	List	   *windowClause;	/* WINDOW window_name AS (...), ... */
 
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index e71762b10c..7d386912f4 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -50,6 +50,8 @@ extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
 								 List *sortlist, List *targetlist, SortBy *sortby);
 extern Index assignSortGroupRef(TargetEntry *tle, List *tlist);
 extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList);
+extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
+								  List *grouplist, List *targetlist, int location);
 
 /* functions in parse_jsontable.c */
 extern ParseNamespaceItem *transformJsonTable(ParseState *pstate, JsonTable *jt);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index a5596ab210..3e2ebc76d4 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1454,6 +1454,52 @@ drop table t2;
 drop table t3;
 drop table p_t1;
 --
+-- Test GROUP BY ALL
+--
+CREATE TEMP TABLE t1 (
+  a int,
+  b int
+);
+COPY t1 FROM STDIN;
+-- basic field check
+SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+ b | count 
+---+-------
+ 3 |     2
+ 2 |     2
+ 1 |     1
+(3 rows)
+
+-- throw a null in the values too
+SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+ a | count 
+---+-------
+   |     0
+ 1 |     4
+(2 rows)
+
+-- multiple columns, non-consecutive order
+SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+ a | sum | b 
+---+-----+---
+ 1 |   1 | 1
+ 1 |   6 | 3
+ 1 |   2 | 2
+   |   2 | 2
+(4 rows)
+
+-- multi columns, no aggregate
+SELECT a + b FROM t1 GROUP BY ALL;
+ ?column? 
+----------
+         
+        3
+        4
+        2
+(4 rows)
+
+DROP TABLE t1;
+--
 -- Test GROUP BY matching of join columns that are type-coerced due to USING
 --
 create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index ca6d1bcfb7..46b9df3389 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -512,6 +512,38 @@ drop table t2;
 drop table t3;
 drop table p_t1;
 
+
+--
+-- Test GROUP BY ALL
+--
+
+CREATE TEMP TABLE t1 (
+  a int,
+  b int
+);
+
+COPY t1 FROM STDIN;
+1	1
+1	2
+1	3
+\N	2
+1	3
+\.
+
+-- basic field check
+SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- throw a null in the values too
+SELECT a, COUNT(a) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+SELECT a + b FROM t1 GROUP BY ALL;
+
+DROP TABLE t1;
+
 --
 -- Test GROUP BY matching of join columns that are type-coerced due to USING
 --
-- 
2.40.1



^ permalink  raw  reply  [nested|flat] 2+ messages in thread

* Re: [PATCH] GROUP BY ALL
@ 2024-07-24 08:56  Jelte Fennema-Nio <[email protected]>
  parent: David Christensen <[email protected]>
  0 siblings, 0 replies; 2+ messages in thread

From: Jelte Fennema-Nio @ 2024-07-24 08:56 UTC (permalink / raw)
  To: David Christensen <[email protected]>; +Cc: pgsql-hackers

On Mon, 22 Jul 2024 at 22:55, David Christensen <[email protected]> wrote:
> I see that there'd been some chatter but not a lot of discussion about
> a GROUP BY ALL feature/functionality.  There certainly is utility in
> such a construct IMHO.

+1 from me. When exploring data, this is extremely useful because you
don't have to update the GROUP BY clause every time

Regarding the arguments against this:
1. I don't think this is any more unreadable than being able to GROUP
BY 1, 2, 3. Or being able to use column aliases from the SELECT in the
GROUP BY clause. Again this is already allowed. Personally I actually
think it's more readable than specifying e.g. 5 columns in the group
by, because then I have to cross-reference with columns in the SELECT
clause to find out if they are the same. With ALL I instantly know
it's grouped by all
2. This is indeed not part of the standard. But we have many things
that are not part of the standard. I think as long as we use the same
syntax as snowflake, databricks and duckdb I personally don't see a
big problem. Then we could try and make this be part of the standard
in the next version of the standard.

^ permalink  raw  reply  [nested|flat] 2+ messages in thread

end of thread, other threads:[~2024-07-24 08:56 UTC | newest]

Thread overview: 2+ messages (download: mbox mbox.gz follow: Atom feed)
-- links below jump to the message on this page --
2024-07-22 20:55 [PATCH] GROUP BY ALL David Christensen <[email protected]>
2024-07-24 08:56 ` Jelte Fennema-Nio <[email protected]>

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox