Re: JumbleQuery ma treat different GROUP BY expr as the same

public inbox for [email protected]  
help / color / mirror / Atom feed

From: jian he <[email protected]>
To: Michael Paquier <[email protected]>
Cc: Tom Lane <[email protected]>
Cc: PostgreSQL-development <[email protected]>
Cc: Richard Guo <[email protected]>
Subject: Re: JumbleQuery ma treat different GROUP BY expr as the same
Date: Wed, 14 Jan 2026 14:15:04 +0800
Message-ID: <CACJufxGFiyc_3h5FceXf3AFFAPpScc-91DrH5yTT8CQJrghRQA@mail.gmail.com> (raw)
In-Reply-To: <[email protected]>
References: <CACJufxEy2W+tCqC7XuJ94r3ivWsM=onKJp94kRFx3hoARjBeFQ@mail.gmail.com>
	<[email protected]>
	<[email protected]>
	<[email protected]>
	<CACJufxFb2AjY5CS4iCOompuVjriRxUddkGCTdPRdpEHjG5tD=g@mail.gmail.com>
	<[email protected]>

On Tue, Jan 13, 2026 at 3:10 PM Michael Paquier <[email protected]> wrote:
>
> On Mon, Jan 12, 2026 at 04:20:44PM +0800, jian he wrote:
> > While working on it, I guess I found another bug,  below JumbleQuery will return
> > the same result:
> >
> > SELECT FROM (VALUES (1::INT, 2::INT)) AS t(a, b) ORDER BY a, b;
> > SELECT a FROM (VALUES (1::INT, 2::INT)) AS t(a, b) ORDER BY a, b;
> > SELECT a, b FROM (VALUES (1::INT, 2::INT)) AS t(a, b) ORDER BY a, b;
> >
> > so I think TargetEntry.resjunk should not be marked as query_jumble_ignore.
>
> Not sure how to feel about this one, as a primary node.  3db72ebcbe20
> has put a query_jumble_ignore to TargetEntry.resjunk which was simply
> a consistent move with the pre-v15 branches because these columns have
> always been ignored.  I have never heard complaints about that in the
> field with PGSS, TBH.  The original choice comes from this thread,
> back in 2012 when this was still integrated into PGSS:
> https://www.postgresql.org/message-id/CAEYLb_WGeFCT7MfJ8FXf-CR6BSE6Lbn%2BO1VX3%2BOGrc4Bscn4%3DA%40ma...
>

if not remove the query_jumble_ignore from TargetEntry.resjunk
the below query would have the same QueryID.

SELECT COUNT(*), a FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;
SELECT COUNT(*), a, b FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;

It affects queries that include an ORDER BY or GROUP BY clause, so a patch is
attached.



--
jian
https://www.enterprisedb.com/


Attachments:

  [text/x-patch] v3-0001-Fix-query-jumbling-for-TargetEntry.resjunk.patch (6.8K, 2-v3-0001-Fix-query-jumbling-for-TargetEntry.resjunk.patch)
  download | inline diff:
From 7da8cf6a17a72522bf7120a2db2f6d8ceb14f837 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Wed, 14 Jan 2026 13:30:39 +0800
Subject: [PATCH v3 1/1] Fix query jumbling for TargetEntry.resjunk

TargetEntry.resjunk was marked with the node attribute query_jumble_ignore,
causing such target list entry to be ignored during the query jumbling.

For example, these two queries could be grouped together within the same query ID:
SELECT COUNT(*), a FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;
SELECT COUNT(*), a, b FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;

However, as such queries have different semantics, they should be split across
multiple entries.

We do need to mark the following two queries as identical.
SELECT COUNT(*), a as b FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;
SELECT COUNT(*), a FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;

Therefore TargetEntry.resname marked as query_jumble_ignore is correct.

Discussion: https://postgr.es/m/CACJufxEy2W+tCqC7XuJ94r3ivWsM=onKJp94kRFx3hoARjBeFQ@mail.gmail.com
---
 .../pg_stat_statements/expected/select.out    | 44 ++++++++++++++++++-
 contrib/pg_stat_statements/sql/select.sql     |  6 +++
 src/include/nodes/primnodes.h                 |  2 +-
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out
index a069119c790..6c391d43f1c 100644
--- a/contrib/pg_stat_statements/expected/select.out
+++ b/contrib/pg_stat_statements/expected/select.out
@@ -99,6 +99,22 @@ SELECT 2 AS "int" ORDER BY 1;
    2
 (1 row)
 
+SELECT FROM (VALUES (1::INT, 2::INT)) AS t(a, b) ORDER BY a, b;
+--
+(1 row)
+
+SELECT a FROM (VALUES (1::INT, 2::INT)) AS t(a, b) ORDER BY a, b;
+ a 
+---
+ 1
+(1 row)
+
+SELECT a, b FROM (VALUES (1::INT, 2::INT)) AS t(a, b) ORDER BY a, b;
+ a | b 
+---+---
+ 1 | 2
+(1 row)
+
 /* this comment should not appear in the output */
 SELECT 'hello'
   -- but this one will appear
@@ -223,6 +239,9 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
      1 |    2 | SELECT $1 AS i UNION SELECT $2 ORDER BY i
      1 |    1 | SELECT $1 || $2
      2 |    2 | SELECT DISTINCT $1 AS "int"
+     1 |    1 | SELECT FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) ORDER BY a, b
+     1 |    1 | SELECT a FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) ORDER BY a, b
+     1 |    1 | SELECT a, b FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) ORDER BY a, b
      0 |    0 | SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"
      1 |    1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
      1 |    2 | WITH t(f) AS (                                                              +
@@ -230,7 +249,7 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
        |      | )                                                                           +
        |      |   SELECT f FROM t ORDER BY f
      1 |    1 | select $1::jsonb ? $2
-(17 rows)
+(20 rows)
 
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
@@ -478,6 +497,24 @@ SELECT COUNT(*) FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;
      1
 (1 row)
 
+SELECT COUNT(*), a FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;
+ count | a 
+-------+---
+     1 | 1
+(1 row)
+
+SELECT COUNT(*), a, b FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;
+ count | a | b 
+-------+---+---
+     1 | 1 | 2
+(1 row)
+
+SELECT COUNT(*) FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b ORDER BY a, b;
+ count 
+-------
+     1
+(1 row)
+
 SELECT COUNT(*) FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY b, a;
  count 
 -------
@@ -546,14 +583,17 @@ SELECT calls, query FROM pg_stat_statements WHERE query LIKE '%GROUP BY%' ORDER
      1 | SELECT COUNT(*) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY a
      2 | SELECT COUNT(*) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY a HAVING a = $3
      1 | SELECT COUNT(*) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY a, b
+     1 | SELECT COUNT(*) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY a, b ORDER BY a, b
      1 | SELECT COUNT(*) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY b
      1 | SELECT COUNT(*) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY b HAVING b = $3
      1 | SELECT COUNT(*) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY b, a
+     1 | SELECT COUNT(*), a FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY a, b
+     1 | SELECT COUNT(*), a, b FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY a, b
      1 | SELECT GROUPING(a) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY a
      1 | SELECT GROUPING(b) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY a, b
      1 | SELECT GROUPING(b) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY b
      1 | SELECT GROUPING(b) FROM (VALUES ($1::INT, $2::INT)) AS t(a, b) GROUP BY b, a
-(12 rows)
+(15 rows)
 
 -- GROUP BY [DISTINCT]
 SELECT a, b, c
diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql
index a10d618c034..6c826e61870 100644
--- a/contrib/pg_stat_statements/sql/select.sql
+++ b/contrib/pg_stat_statements/sql/select.sql
@@ -31,6 +31,9 @@ SELECT DISTINCT 1 AS "int";
 SELECT DISTINCT 2 AS "int";
 SELECT 1 AS "int" ORDER BY 1;
 SELECT 2 AS "int" ORDER BY 1;
+SELECT FROM (VALUES (1::INT, 2::INT)) AS t(a, b) ORDER BY a, b;
+SELECT a FROM (VALUES (1::INT, 2::INT)) AS t(a, b) ORDER BY a, b;
+SELECT a, b FROM (VALUES (1::INT, 2::INT)) AS t(a, b) ORDER BY a, b;
 
 /* this comment should not appear in the output */
 SELECT 'hello'
@@ -162,6 +165,9 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%FETCH FIRST%';
 SELECT COUNT(*) FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a;
 SELECT COUNT(*) FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY b;
 SELECT COUNT(*) FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;
+SELECT COUNT(*), a FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;
+SELECT COUNT(*), a, b FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b;
+SELECT COUNT(*) FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY a, b ORDER BY a, b;
 SELECT COUNT(*) FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY b, a;
 SELECT COUNT(*) FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY GROUPING SETS(a, ());
 SELECT COUNT(*) FROM (VALUES (1::INT, 2::INT)) AS t(a, b) GROUP BY GROUPING SETS(b, ());
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 5211cadc258..174ebe92939 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -2248,7 +2248,7 @@ typedef struct TargetEntry
 	/* column's number in source table */
 	AttrNumber	resorigcol pg_node_attr(query_jumble_ignore);
 	/* set to true to eliminate the attribute from final target list */
-	bool		resjunk pg_node_attr(query_jumble_ignore);
+	bool		resjunk;
 } TargetEntry;
 
 
-- 
2.34.1

view thread (5+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected], [email protected], [email protected]
  Subject: Re: JumbleQuery ma treat different GROUP BY expr as the same
  In-Reply-To: <CACJufxGFiyc_3h5FceXf3AFFAPpScc-91DrH5yTT8CQJrghRQA@mail.gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox