public inbox for [email protected]  
help / color / mirror / Atom feed
From: Ilmar Yunusov <[email protected]>
To: [email protected]
Cc: Ilmar Yunusov <[email protected]>
Subject: [RFC PATCH v0 1/7] Add EXPLAIN WAITS statement reporting
Date: Sat,  9 May 2026 04:22:31 +0500
Message-ID: <6fc0404acb69765067ccbf1ca3ee0fd39240f9d6.1778280923.git.tanswis42@gmail.com> (raw)
In-Reply-To: <[email protected]>
References: <[email protected]>

---
 doc/src/sgml/ref/explain.sgml           |  14 +++
 src/backend/commands/explain.c          | 129 +++++++++++++++++++-
 src/backend/commands/explain_state.c    |   8 ++
 src/backend/utils/activity/wait_event.c | 149 ++++++++++++++++++++++++
 src/include/commands/explain_state.h    |   1 +
 src/include/utils/wait_event.h          |  29 +++++
 src/test/regress/expected/explain.out   |  17 +++
 src/test/regress/sql/explain.sql        |   5 +
 8 files changed, 349 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/ref/explain.sgml b/doc/src/sgml/ref/explain.sgml
index e95e19081e1..5460568af1d 100644
--- a/doc/src/sgml/ref/explain.sgml
+++ b/doc/src/sgml/ref/explain.sgml
@@ -47,6 +47,7 @@ EXPLAIN [ ( <replaceable class="parameter">option</replaceable> [, ...] ) ] <rep
     SUMMARY [ <replaceable class="parameter">boolean</replaceable> ]
     MEMORY [ <replaceable class="parameter">boolean</replaceable> ]
     IO [ <replaceable class="parameter">boolean</replaceable> ]
+    WAITS [ <replaceable class="parameter">boolean</replaceable> ]
     FORMAT { TEXT | XML | JSON | YAML }
 </synopsis>
  </refsynopsisdiv>
@@ -310,6 +311,19 @@ ROLLBACK;
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>WAITS</literal></term>
+    <listitem>
+     <para>
+      Include exact backend-local wait event timing observed during query
+      execution.  Wait events are grouped by wait event type and wait event
+      name, and include the number of waits and total time spent waiting.
+      This parameter may only be used when <literal>ANALYZE</literal> is also
+      enabled.  It defaults to <literal>FALSE</literal>.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>FORMAT</literal></term>
     <listitem>
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 112c17b0d64..0b7cc5c15c6 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -45,6 +45,7 @@
 #include "utils/tuplesort.h"
 #include "utils/tuplestore.h"
 #include "utils/typcache.h"
+#include "utils/wait_event.h"
 #include "utils/xml.h"
 
 
@@ -149,6 +150,9 @@ static const char *explain_get_index_name(Oid indexId);
 static bool peek_buffer_usage(ExplainState *es, const BufferUsage *usage);
 static void show_buffer_usage(ExplainState *es, const BufferUsage *usage);
 static void show_wal_usage(ExplainState *es, const WalUsage *usage);
+static int	wait_event_usage_cmp(const void *a, const void *b);
+static void show_wait_event_usage(ExplainState *es,
+								  const WaitEventUsage *usage);
 static void show_memory_counters(ExplainState *es,
 								 const MemoryContextCounters *mem_counters);
 static void show_result_replacement_info(Result *result, ExplainState *es);
@@ -510,6 +514,8 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	int			eflags;
 	int			instrument_option = 0;
 	SerializeMetrics serializeMetrics = {0};
+	WaitEventUsage waitEventUsage;
+	WaitEventUsage *waitEventUsagePtr = NULL;
 
 	Assert(plannedstmt->commandType != CMD_UTILITY);
 
@@ -583,11 +589,27 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 		else
 			dir = ForwardScanDirection;
 
+		if (es->waits)
+		{
+			waitEventUsagePtr = &waitEventUsage;
+			pgstat_begin_wait_event_usage(waitEventUsagePtr,
+										  queryDesc->estate->es_query_cxt);
+		}
+
 		/* run the plan */
-		ExecutorRun(queryDesc, dir, 0);
+		PG_TRY();
+		{
+			ExecutorRun(queryDesc, dir, 0);
 
-		/* run cleanup too */
-		ExecutorFinish(queryDesc);
+			/* run cleanup too */
+			ExecutorFinish(queryDesc);
+		}
+		PG_FINALLY();
+		{
+			if (waitEventUsagePtr)
+				pgstat_end_wait_event_usage(waitEventUsagePtr);
+		}
+		PG_END_TRY();
 
 		/* We can't run ExecutorEnd 'till we're done printing the stats... */
 		totaltime += elapsed_time(&starttime);
@@ -605,6 +627,9 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	/* Create textual dump of plan tree */
 	ExplainPrintPlan(es, queryDesc);
 
+	if (waitEventUsagePtr)
+		show_wait_event_usage(es, waitEventUsagePtr);
+
 	/* Show buffer and/or memory usage in planning */
 	if (peek_buffer_usage(es, bufusage) || mem_counters)
 	{
@@ -4503,6 +4528,104 @@ show_wal_usage(ExplainState *es, const WalUsage *usage)
 	}
 }
 
+static int
+wait_event_usage_cmp(const void *a, const void *b)
+{
+	const WaitEventUsageEntry *ea = (const WaitEventUsageEntry *) a;
+	const WaitEventUsageEntry *eb = (const WaitEventUsageEntry *) b;
+	int64		ta = INSTR_TIME_GET_MICROSEC(ea->time);
+	int64		tb = INSTR_TIME_GET_MICROSEC(eb->time);
+
+	if (ta < tb)
+		return 1;
+	if (ta > tb)
+		return -1;
+	if (ea->wait_event_info < eb->wait_event_info)
+		return -1;
+	if (ea->wait_event_info > eb->wait_event_info)
+		return 1;
+	return 0;
+}
+
+static void
+show_wait_event_usage(ExplainState *es, const WaitEventUsage *usage)
+{
+	WaitEventUsageEntry *entries;
+
+	if (usage == NULL)
+		return;
+
+	if (es->format == EXPLAIN_FORMAT_TEXT && usage->nentries == 0)
+		return;
+
+	if (usage->nentries > 0)
+	{
+		entries = palloc_array(WaitEventUsageEntry, usage->nentries);
+		memcpy(entries, usage->entries,
+			   sizeof(WaitEventUsageEntry) * usage->nentries);
+		qsort(entries, usage->nentries, sizeof(WaitEventUsageEntry),
+			  wait_event_usage_cmp);
+	}
+	else
+		entries = NULL;
+
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainIndentText(es);
+		appendStringInfoString(es->str, "Wait Events:\n");
+		es->indent++;
+
+		for (int i = 0; i < usage->nentries; i++)
+		{
+			const char *event_type;
+			const char *event_name;
+
+			event_type = pgstat_get_wait_event_type(entries[i].wait_event_info);
+			event_name = pgstat_get_wait_event(entries[i].wait_event_info);
+
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "%s:%s calls=%" PRIu64 " time=%0.3f ms\n",
+							 event_type ? event_type : "Unknown",
+							 event_name ? event_name : "unknown",
+							 entries[i].calls,
+							 INSTR_TIME_GET_MILLISEC(entries[i].time));
+		}
+
+		es->indent--;
+	}
+	else
+	{
+		ExplainOpenGroup("Wait-Events", "Wait Events", false, es);
+
+		for (int i = 0; i < usage->nentries; i++)
+		{
+			const char *event_type;
+			const char *event_name;
+
+			event_type = pgstat_get_wait_event_type(entries[i].wait_event_info);
+			event_name = pgstat_get_wait_event(entries[i].wait_event_info);
+
+			ExplainOpenGroup("Wait-Event", NULL, true, es);
+			ExplainPropertyText("Wait Event Type",
+								event_type ? event_type : "Unknown",
+								es);
+			ExplainPropertyText("Wait Event",
+								event_name ? event_name : "unknown",
+								es);
+			ExplainPropertyUInteger("Calls", NULL, entries[i].calls, es);
+			ExplainPropertyFloat("Time", "ms",
+								 INSTR_TIME_GET_MILLISEC(entries[i].time),
+								 3, es);
+			ExplainCloseGroup("Wait-Event", NULL, true, es);
+		}
+
+		ExplainCloseGroup("Wait-Events", "Wait Events", false, es);
+	}
+
+	if (entries)
+		pfree(entries);
+}
+
 /*
  * Show memory usage details.
  */
diff --git a/src/backend/commands/explain_state.c b/src/backend/commands/explain_state.c
index a0ee0a664be..2abcb64cc95 100644
--- a/src/backend/commands/explain_state.c
+++ b/src/backend/commands/explain_state.c
@@ -164,6 +164,8 @@ ParseExplainOptionList(ExplainState *es, List *options, ParseState *pstate)
 		}
 		else if (strcmp(opt->defname, "io") == 0)
 			es->io = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "waits") == 0)
+			es->waits = defGetBoolean(opt);
 		else if (!ApplyExtensionExplainOption(es, opt, pstate))
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
@@ -196,6 +198,12 @@ ParseExplainOptionList(ExplainState *es, List *options, ParseState *pstate)
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("EXPLAIN option %s requires ANALYZE", "IO")));
 
+	/* check that WAITS is used with EXPLAIN ANALYZE */
+	if (es->waits && !es->analyze)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("EXPLAIN option %s requires ANALYZE", "WAITS")));
+
 	/* check that serialize is used with EXPLAIN ANALYZE */
 	if (es->serialize != EXPLAIN_SERIALIZE_NONE && !es->analyze)
 		ereport(ERROR,
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index 95635c7f56c..60d37ccbb73 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -36,11 +36,20 @@ static const char *pgstat_get_wait_client(WaitEventClient w);
 static const char *pgstat_get_wait_ipc(WaitEventIPC w);
 static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
 static const char *pgstat_get_wait_io(WaitEventIO w);
+static void WaitEventUsageAdd(WaitEventUsage *usage, uint32 wait_event_info,
+							  const instr_time *elapsed);
 
 
 static uint32 local_my_wait_event_info;
 uint32	   *my_wait_event_info = &local_my_wait_event_info;
 
+#define WAIT_EVENT_USAGE_INITIAL_EVENTS	16
+
+int			pgstat_wait_event_usage_depth = 0;
+static WaitEventUsage *pgstat_wait_event_usage = NULL;
+static uint32 pgstat_wait_event_usage_current = 0;
+static instr_time pgstat_wait_event_usage_start;
+
 #define WAIT_EVENT_CLASS_MASK	0xFF000000
 #define WAIT_EVENT_ID_MASK		0x0000FFFF
 
@@ -349,6 +358,146 @@ pgstat_reset_wait_event_storage(void)
 	my_wait_event_info = &local_my_wait_event_info;
 }
 
+/*
+ * Start collecting exact wait event timings in this backend.
+ *
+ * This is intended for short-lived instrumentation such as EXPLAIN ANALYZE.
+ * It records waits observed through pgstat_report_wait_start/end in backend
+ * local memory.  Nested collection is deliberately treated as part of the
+ * outer collection for now; callers that want independent nested accounting
+ * need a stack of WaitEventUsage contexts.
+ */
+void
+pgstat_begin_wait_event_usage(WaitEventUsage *usage, MemoryContext memcontext)
+{
+	Assert(usage != NULL);
+	Assert(memcontext != NULL);
+
+	if (pgstat_wait_event_usage_depth++ == 0)
+	{
+		memset(usage, 0, sizeof(WaitEventUsage));
+		usage->memcontext = memcontext;
+		pgstat_wait_event_usage = usage;
+		pgstat_wait_event_usage_current = 0;
+		INSTR_TIME_SET_ZERO(pgstat_wait_event_usage_start);
+	}
+}
+
+/*
+ * Stop collecting wait event timings.
+ */
+void
+pgstat_end_wait_event_usage(WaitEventUsage *usage)
+{
+	Assert(usage != NULL);
+	Assert(pgstat_wait_event_usage_depth > 0);
+
+	if (--pgstat_wait_event_usage_depth == 0)
+	{
+		if (pgstat_wait_event_usage_current != 0)
+			pgstat_count_wait_event_end();
+
+		pgstat_wait_event_usage = NULL;
+		pgstat_wait_event_usage_current = 0;
+		INSTR_TIME_SET_ZERO(pgstat_wait_event_usage_start);
+	}
+}
+
+/*
+ * Record the beginning of a wait event for exact EXPLAIN-style accounting.
+ */
+void
+pgstat_count_wait_event_start(uint32 wait_event_info)
+{
+	if (pgstat_wait_event_usage == NULL)
+		return;
+
+	/*
+	 * Waits are not expected to nest.  If they do, finish the previous wait
+	 * at the boundary so accounting remains internally consistent.
+	 */
+	if (pgstat_wait_event_usage_current != 0)
+		pgstat_count_wait_event_end();
+
+	pgstat_wait_event_usage_current = wait_event_info;
+	INSTR_TIME_SET_CURRENT(pgstat_wait_event_usage_start);
+}
+
+/*
+ * Record the end of the current wait event.
+ */
+void
+pgstat_count_wait_event_end(void)
+{
+	instr_time	end;
+	instr_time	elapsed;
+
+	if (pgstat_wait_event_usage == NULL ||
+		pgstat_wait_event_usage_current == 0)
+		return;
+
+	INSTR_TIME_SET_CURRENT(end);
+	elapsed = end;
+	INSTR_TIME_SUBTRACT(elapsed, pgstat_wait_event_usage_start);
+
+	WaitEventUsageAdd(pgstat_wait_event_usage,
+					  pgstat_wait_event_usage_current,
+					  &elapsed);
+
+	pgstat_wait_event_usage_current = 0;
+	INSTR_TIME_SET_ZERO(pgstat_wait_event_usage_start);
+}
+
+static void
+WaitEventUsageAdd(WaitEventUsage *usage, uint32 wait_event_info,
+				  const instr_time *elapsed)
+{
+	WaitEventUsageEntry *entry = NULL;
+
+	for (int i = 0; i < usage->nentries; i++)
+	{
+		if (usage->entries[i].wait_event_info == wait_event_info)
+		{
+			entry = &usage->entries[i];
+			break;
+		}
+	}
+
+	if (entry == NULL)
+	{
+		if (usage->nentries >= usage->maxentries)
+		{
+			MemoryContext oldcontext;
+			int			newmaxentries;
+
+			if (usage->maxentries > 0)
+				newmaxentries = usage->maxentries * 2;
+			else
+				newmaxentries = WAIT_EVENT_USAGE_INITIAL_EVENTS;
+
+			oldcontext = MemoryContextSwitchTo(usage->memcontext);
+			if (usage->entries)
+				usage->entries = repalloc_array(usage->entries,
+												WaitEventUsageEntry,
+												newmaxentries);
+			else
+				usage->entries = palloc_array(WaitEventUsageEntry,
+											  newmaxentries);
+			MemoryContextSwitchTo(oldcontext);
+
+			usage->maxentries = newmaxentries;
+		}
+
+		entry = &usage->entries[usage->nentries++];
+		entry->wait_event_info = wait_event_info;
+		entry->calls = 0;
+		INSTR_TIME_SET_ZERO(entry->time);
+	}
+
+	entry->calls++;
+	INSTR_TIME_ADD(entry->time, *elapsed);
+}
+
 /* ----------
  * pgstat_get_wait_event_type() -
  *
diff --git a/src/include/commands/explain_state.h b/src/include/commands/explain_state.h
index 97bc7ed49f6..4f0d0d495d4 100644
--- a/src/include/commands/explain_state.h
+++ b/src/include/commands/explain_state.h
@@ -56,6 +56,7 @@ typedef struct ExplainState
 	bool		memory;			/* print planner's memory usage information */
 	bool		settings;		/* print modified settings */
 	bool		io;				/* print info about IO (prefetch, ...) */
+	bool		waits;			/* print wait event usage */
 	bool		generic;		/* generate a generic plan */
 	ExplainSerializeOption serialize;	/* serialize the query's output? */
 	ExplainFormat format;		/* output format */
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index 86ee348220d..f7fab5736bb 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -11,16 +11,39 @@
 #define WAIT_EVENT_H
 
 /* enums for wait events */
+#include "portability/instr_time.h"
+#include "utils/palloc.h"
 #include "utils/wait_event_types.h"
 
+typedef struct WaitEventUsageEntry
+{
+	uint32		wait_event_info;
+	uint64		calls;
+	instr_time	time;
+} WaitEventUsageEntry;
+
+typedef struct WaitEventUsage
+{
+	MemoryContext memcontext;
+	int			nentries;
+	int			maxentries;
+	WaitEventUsageEntry *entries;
+} WaitEventUsage;
+
 extern const char *pgstat_get_wait_event(uint32 wait_event_info);
 extern const char *pgstat_get_wait_event_type(uint32 wait_event_info);
 static inline void pgstat_report_wait_start(uint32 wait_event_info);
 static inline void pgstat_report_wait_end(void);
 extern void pgstat_set_wait_event_storage(uint32 *wait_event_info);
 extern void pgstat_reset_wait_event_storage(void);
+extern void pgstat_begin_wait_event_usage(WaitEventUsage *usage,
+										  MemoryContext memcontext);
+extern void pgstat_end_wait_event_usage(WaitEventUsage *usage);
+extern void pgstat_count_wait_event_start(uint32 wait_event_info);
+extern void pgstat_count_wait_event_end(void);
 
 extern PGDLLIMPORT uint32 *my_wait_event_info;
+extern PGDLLIMPORT int pgstat_wait_event_usage_depth;
 
 
 /*
@@ -66,6 +89,9 @@ extern char **GetWaitEventCustomNames(uint32 classId, int *nwaitevents);
 static inline void
 pgstat_report_wait_start(uint32 wait_event_info)
 {
+	if (pgstat_wait_event_usage_depth > 0)
+		pgstat_count_wait_event_start(wait_event_info);
+
 	/*
 	 * Since this is a four-byte field which is always read and written as
 	 * four-bytes, updates are atomic.
@@ -82,6 +108,9 @@ pgstat_report_wait_start(uint32 wait_event_info)
 static inline void
 pgstat_report_wait_end(void)
 {
+	if (pgstat_wait_event_usage_depth > 0)
+		pgstat_count_wait_event_end();
+
 	/* see pgstat_report_wait_start() */
 	*(volatile uint32 *) my_wait_event_info = 0;
 }
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 74a4d87801e..2c7a7e1d4c6 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -99,6 +99,23 @@ select explain_filter('explain (buffers, format text) select * from int8_tbl i8'
  Seq Scan on int8_tbl i8  (cost=N.N..N.N rows=N width=N)
 (1 row)
 
+-- WAITS option
+select explain_filter('explain (analyze, waits, costs off, summary off, timing off, buffers off) select pg_sleep(0.01)');
+            explain_filter             
+---------------------------------------
+ Result (actual rows=N.N loops=N)
+ Wait Events:
+   Timeout:PgSleep calls=N time=N.N ms
+(3 rows)
+
+select explain_filter_to_json('explain (analyze, waits, costs off, summary off, timing off, buffers off, format json) select pg_sleep(0.01)') #> '{0,Wait Events,0}';
+                                     ?column?                                     
+----------------------------------------------------------------------------------
+ {"Time": 0.0, "Calls": 0, "Wait Event": "PgSleep", "Wait Event Type": "Timeout"}
+(1 row)
+
+explain (waits) select 1;
+ERROR:  EXPLAIN option WAITS requires ANALYZE
 \a
 select explain_filter('explain (analyze, buffers, io, format xml) select * from int8_tbl i8');
 explain_filter
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index 2f163c64bf6..fe025ddeac5 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -68,6 +68,11 @@ select explain_filter('explain (analyze, buffers off, verbose) select * from int
 select explain_filter('explain (analyze, buffers, format text) select * from int8_tbl i8');
 select explain_filter('explain (buffers, format text) select * from int8_tbl i8');
 
+-- WAITS option
+select explain_filter('explain (analyze, waits, costs off, summary off, timing off, buffers off) select pg_sleep(0.01)');
+select explain_filter_to_json('explain (analyze, waits, costs off, summary off, timing off, buffers off, format json) select pg_sleep(0.01)') #> '{0,Wait Events,0}';
+explain (waits) select 1;
+
 \a
 select explain_filter('explain (analyze, buffers, io, format xml) select * from int8_tbl i8');
 select explain_filter('explain (analyze, serialize, buffers, io, format yaml) select * from int8_tbl i8');
-- 
2.52.0






reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected]
  Subject: Re: [RFC PATCH v0 1/7] Add EXPLAIN WAITS statement reporting
  In-Reply-To: <6fc0404acb69765067ccbf1ca3ee0fd39240f9d6.1778280923.git.tanswis42@gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox