Received: from malur.postgresql.org ([217.196.149.56]) by arkaria.postgresql.org with esmtps (TLS1.3) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.96) (envelope-from ) id 1wLUb4-001obS-1x for pgsql-hackers@arkaria.postgresql.org; Fri, 08 May 2026 23:27:06 +0000 Received: from localhost ([127.0.0.1] helo=malur.postgresql.org) by malur.postgresql.org with esmtp (Exim 4.96) (envelope-from ) id 1wLUb3-00Bjt6-1t for pgsql-hackers@arkaria.postgresql.org; Fri, 08 May 2026 23:27:05 +0000 Received: from magus.postgresql.org ([2a02:c0:301:0:ffff::29]) by malur.postgresql.org with esmtps (TLS1.3) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.96) (envelope-from ) id 1wLUXS-00BXxP-0S for pgsql-hackers@lists.postgresql.org; Fri, 08 May 2026 23:23:22 +0000 Received: from mail-wm1-x332.google.com ([2a00:1450:4864:20::332]) by magus.postgresql.org with esmtps (TLS1.3) tls TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 (Exim 4.98.2) (envelope-from ) id 1wLUXO-00000001JVN-33cm for pgsql-hackers@postgresql.org; Fri, 08 May 2026 23:23:21 +0000 Received: by mail-wm1-x332.google.com with SMTP id 5b1f17b1804b1-488d2079582so27197055e9.2 for ; Fri, 08 May 2026 16:23:18 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20251104; t=1778282597; x=1778887397; darn=postgresql.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=eNjhZ/j5ZDTFICzF/LRH3MQuAkwgOMKIoNTavj3SDEI=; b=PM4IUn4O2wOtXgKvdCxNLBbrCG8uGwoW8txdTF1JY8S3ffJMoUMM3/2aaAid93is6y 42Eq3u/9HH+virjfwFe+1rRsBbVsZ9VAJ89NYiFgEUEEon6TtAasC+afblQjElStz5Ax A0NqJDT+wlhn432YRY732BAAE1Uai+K9EVD0fmbSf7VxekZ3UhnW1sykD689Z3qOxUWo kIy34mRzcTDmwjnoZljr8At8evlRWPkODWPKOlZpxC3hSuoYa4jDCmZDEMvFO9XP8JUB 0yiU62QHO1o/d7lqBsbP/w1Ib8dfKiac/uHcVRyurO+ij0ivPL5mMC+jsW/B0tesX+bV Tl5g== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20251104; t=1778282597; x=1778887397; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-gg:x-gm-message-state:from :to:cc:subject:date:message-id:reply-to; bh=eNjhZ/j5ZDTFICzF/LRH3MQuAkwgOMKIoNTavj3SDEI=; b=eJq+uw0g7hyGt3HXPiaIYWlNxynf+v7ij5HGusKMqCJbXzGgNjjz0KPBts/Y9Fu8Ub h7rpZPoo1W3GGxY5UWFTmmW9lFoHJZ+uk+Tmr+jX/A0yLa56c3VCDLj89/mNmb/QArLc wrGO3/mX30dpiQ5f0YVeRLoKw7pjKc1hAJOkMWKdY0dEGm6iS2qmGGVqKmE98CX3obQq nOn+8uH5CkTe4kOaaeD15BpvSTt8cDYGiKQF67hiTdYOguggSQAjiJrKpzokaoSwsHvn /PL8kJbG9ezHoRCnjoVtwFnIoaSgQ6WzXZM3M6ZtAT4vqPuLK0D2z+ZIxGk4ZWTmAV3l bZkw== X-Gm-Message-State: AOJu0YzEu7T80uiueSnfM+oiVYBPOx1ksUJNkMtqHvRFwauGzCBTGWIv kIp4leCoOQ7COcq34lmtPWKFalHyYoE4TwgTBWjOvWHB8/fJxDOhbH7layBtvvg0VdE= X-Gm-Gg: AeBDieuyXJR2gHN/aHNDnz9cMfsO4gFU+c2bnNY93PAAu6ueEJ60zPlnHIkUJns0ngC +1fiEatR0rxcBDcAG56f1xxVjR44ijrheNbGnWrfdlKQgVSc41GqOZ2oNyLWGol2gucrEFsnYlz I9pvZGTZY0BWYWnvXcuqr/NYwufdvhv06v85rIZVQ2rGtHQalqRcDQz3CTCpwMUFvCzw0OOGKYz vNY8w/KRIIF6RX+/m2v67eM+G66d3L+kjQMoJGxODJ0L3wLvKRjeTCKfH052+2xkuqCyB+yKYSF 23auBzqxkf2H99IffYzb3e1AxbpbbJjxTA4W/VrJJednFUrq5aV0COmgt1qlD6Nn6vTVBE4zeWu 27Ad7HMZAtAPRr0lXd9dlzSoX5vNg8TomHxWS95YqW6HtR/bbiaYPrKgrD/VY04VFnB2UCuw0x3 ACgC9N83xXwKzEPwVcruSOKUtYuVMx2tARQJqVCSKCrlEOLIPJ1ags/1OFAFMIVvaaBN8jj2gAW jKiZvz5qRUxVdwhV2p2H6yZ9GhQ18l/hQ== X-Received: by 2002:a05:600c:c0c1:b0:48a:56de:d62a with SMTP id 5b1f17b1804b1-48e51e1ddb2mr170458815e9.11.1778282597099; Fri, 08 May 2026 16:23:17 -0700 (PDT) Received: from localhost.localdomain (a195b158c78d62.go.net.mt. [195.158.78.62]) by smtp.gmail.com with ESMTPSA id 5b1f17b1804b1-48e6fffb9bdsm6224775e9.1.2026.05.08.16.23.15 (version=TLS1_3 cipher=TLS_CHACHA20_POLY1305_SHA256 bits=256/256); Fri, 08 May 2026 16:23:16 -0700 (PDT) From: Ilmar Yunusov To: pgsql-hackers@postgresql.org Cc: Ilmar Yunusov Subject: [RFC PATCH v0 6/7] Hide EXPLAIN WAITS accumulator internals Date: Sat, 9 May 2026 04:22:36 +0500 Message-ID: X-Mailer: git-send-email 2.52.0 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: List-Help: List-Subscribe: List-Post: List-Owner: List-Archive: Archived-At: Precedence: bulk --- src/backend/commands/explain.c | 45 ++++++++++-------- src/backend/executor/execParallel.c | 44 +++++++++-------- src/backend/utils/activity/wait_event.c | 63 +++++++++++++++++++++++-- src/include/utils/wait_event.h | 32 ++++++++----- 4 files changed, 129 insertions(+), 55 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index ee69d723cd8..0e2ec510fee 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -514,7 +514,6 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, int eflags; int instrument_option = 0; SerializeMetrics serializeMetrics = {0}; - WaitEventUsage waitEventUsage; WaitEventUsage *waitEventUsagePtr = NULL; Assert(plannedstmt->commandType != CMD_UTILITY); @@ -593,9 +592,8 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, if (es->waits) { - waitEventUsagePtr = &waitEventUsage; - pgstat_begin_wait_event_usage(waitEventUsagePtr, - queryDesc->estate->es_query_cxt); + waitEventUsagePtr = + pgstat_begin_wait_event_usage(queryDesc->estate->es_query_cxt); queryDesc->estate->es_wait_event_usage = waitEventUsagePtr; } @@ -4559,20 +4557,29 @@ static void show_wait_event_usage(ExplainState *es, const char *labelname, const WaitEventUsage *usage) { + const WaitEventUsageEntry *usage_entries; WaitEventUsageEntry *entries; + uint64 overflowed_calls; + instr_time overflowed_time; + int nentries; if (usage == NULL) return; - if (usage->nentries == 0 && usage->overflowed_calls == 0) + if (pgstat_wait_event_usage_is_empty(usage)) return; - if (usage->nentries > 0) + nentries = pgstat_get_wait_event_usage_entries(usage, &usage_entries); + pgstat_get_wait_event_usage_overflow(usage, + &overflowed_calls, + &overflowed_time); + + if (nentries > 0) { - entries = palloc_array(WaitEventUsageEntry, usage->nentries); - memcpy(entries, usage->entries, - sizeof(WaitEventUsageEntry) * usage->nentries); - qsort(entries, usage->nentries, sizeof(WaitEventUsageEntry), + entries = palloc_array(WaitEventUsageEntry, nentries); + memcpy(entries, usage_entries, + sizeof(WaitEventUsageEntry) * nentries); + qsort(entries, nentries, sizeof(WaitEventUsageEntry), wait_event_usage_cmp); } else @@ -4584,7 +4591,7 @@ show_wait_event_usage(ExplainState *es, const char *labelname, appendStringInfo(es->str, "%s:\n", labelname); es->indent++; - for (int i = 0; i < usage->nentries; i++) + for (int i = 0; i < nentries; i++) { const char *event_type; const char *event_name; @@ -4600,24 +4607,24 @@ show_wait_event_usage(ExplainState *es, const char *labelname, INSTR_TIME_GET_MILLISEC(entries[i].time)); } - if (usage->overflowed_calls > 0) + if (overflowed_calls > 0) { ExplainIndentText(es); appendStringInfo(es->str, "Unrecorded Wait Event Calls: calls=%" PRIu64 " time=%0.3f ms\n", - usage->overflowed_calls, - INSTR_TIME_GET_MILLISEC(usage->overflowed_time)); + overflowed_calls, + INSTR_TIME_GET_MILLISEC(overflowed_time)); } es->indent--; } else { - if (usage->nentries > 0) + if (nentries > 0) { ExplainOpenGroup("Wait-Events", labelname, false, es); - for (int i = 0; i < usage->nentries; i++) + for (int i = 0; i < nentries; i++) { const char *event_type; const char *event_name; @@ -4642,16 +4649,16 @@ show_wait_event_usage(ExplainState *es, const char *labelname, ExplainCloseGroup("Wait-Events", labelname, false, es); } - if (usage->overflowed_calls > 0) + if (overflowed_calls > 0) { /* * This is not a wait event identity, so keep it outside the * Wait Events array in structured output. */ ExplainPropertyUInteger("Unrecorded Wait Event Calls", NULL, - usage->overflowed_calls, es); + overflowed_calls, es); ExplainPropertyFloat("Unrecorded Wait Event Time", "ms", - INSTR_TIME_GET_MILLISEC(usage->overflowed_time), + INSTR_TIME_GET_MILLISEC(overflowed_time), 3, es); } } diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 520b4b8484f..dcd06c718c8 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -1352,8 +1352,9 @@ ExecParallelAccumulateWaitEventUsageWorker(WaitEventUsage *usage, if (worker->overflowed_calls > 0) { - usage->overflowed_calls += worker->overflowed_calls; - INSTR_TIME_ADD(usage->overflowed_time, worker->overflowed_time); + pgstat_accumulate_wait_event_usage_overflow(usage, + worker->overflowed_calls, + &worker->overflowed_time); worker->overflowed_calls = 0; INSTR_TIME_SET_ZERO(worker->overflowed_time); } @@ -1377,11 +1378,15 @@ ExecParallelReportWaitEventUsageWorker(SharedWaitEventUsageWorker *worker, dsa_area *area, const WaitEventUsage *usage) { + const WaitEventUsageEntry *usage_entries; WaitEventUsageEntry *entries; WaitEventUsageEntry *old_entries = NULL; dsa_pointer entries_dsa; + uint64 overflowed_calls; + instr_time overflowed_time; Size entries_size; int old_nentries = 0; + int usage_nentries; int new_nentries = 0; int i = 0; int j = 0; @@ -1390,10 +1395,15 @@ ExecParallelReportWaitEventUsageWorker(SharedWaitEventUsageWorker *worker, Assert(area != NULL); Assert(usage != NULL); - worker->overflowed_calls += usage->overflowed_calls; - INSTR_TIME_ADD(worker->overflowed_time, usage->overflowed_time); + usage_nentries = + pgstat_get_wait_event_usage_entries(usage, &usage_entries); + pgstat_get_wait_event_usage_overflow(usage, + &overflowed_calls, + &overflowed_time); + worker->overflowed_calls += overflowed_calls; + INSTR_TIME_ADD(worker->overflowed_time, overflowed_time); - if (usage->nentries <= 0) + if (usage_nentries <= 0) return; if (DsaPointerIsValid(worker->entries)) @@ -1404,25 +1414,25 @@ ExecParallelReportWaitEventUsageWorker(SharedWaitEventUsageWorker *worker, } entries_size = mul_size(sizeof(WaitEventUsageEntry), - (Size) old_nentries + (Size) usage->nentries); + (Size) old_nentries + (Size) usage_nentries); entries_dsa = dsa_allocate(area, entries_size); entries = dsa_get_address(area, entries_dsa); - while (i < old_nentries && j < usage->nentries) + while (i < old_nentries && j < usage_nentries) { WaitEventUsageEntry *entry = &entries[new_nentries]; uint32 old_info = old_entries[i].wait_event_info; - uint32 new_info = usage->entries[j].wait_event_info; + uint32 new_info = usage_entries[j].wait_event_info; if (old_info < new_info) *entry = old_entries[i++]; else if (old_info > new_info) - *entry = usage->entries[j++]; + *entry = usage_entries[j++]; else { *entry = old_entries[i++]; - entry->calls += usage->entries[j].calls; - INSTR_TIME_ADD(entry->time, usage->entries[j].time); + entry->calls += usage_entries[j].calls; + INSTR_TIME_ADD(entry->time, usage_entries[j].time); j++; } @@ -1431,8 +1441,8 @@ ExecParallelReportWaitEventUsageWorker(SharedWaitEventUsageWorker *worker, while (i < old_nentries) entries[new_nentries++] = old_entries[i++]; - while (j < usage->nentries) - entries[new_nentries++] = usage->entries[j++]; + while (j < usage_nentries) + entries[new_nentries++] = usage_entries[j++]; if (DsaPointerIsValid(worker->entries)) dsa_free(area, worker->entries); @@ -1781,7 +1791,6 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc) QueryDesc *queryDesc; SharedExecutorInstrumentation *instrumentation; SharedJitInstrumentation *jit_instrumentation; - WaitEventUsage waitEventUsage; WaitEventUsage *waitEventUsagePtr = NULL; int instrument_options = 0; void *area_space; @@ -1841,11 +1850,8 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc) InstrStartParallelQuery(); if (wait_event_usage != NULL) - { - waitEventUsagePtr = &waitEventUsage; - pgstat_begin_wait_event_usage(waitEventUsagePtr, - queryDesc->estate->es_query_cxt); - } + waitEventUsagePtr = + pgstat_begin_wait_event_usage(queryDesc->estate->es_query_cxt); /* * Run the plan. If we specified a tuple bound, be careful not to demand diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c index 67980cc0a3b..9719e38729e 100644 --- a/src/backend/utils/activity/wait_event.c +++ b/src/backend/utils/activity/wait_event.c @@ -36,6 +36,17 @@ static const char *pgstat_get_wait_client(WaitEventClient w); static const char *pgstat_get_wait_ipc(WaitEventIPC w); static const char *pgstat_get_wait_timeout(WaitEventTimeout w); static const char *pgstat_get_wait_io(WaitEventIO w); +struct WaitEventUsage +{ + struct WaitEventUsage *active_parent; /* active plan-node stack link */ + struct WaitEventUsage *query_parent; /* active query-level stack link */ + struct WaitEventUsage *saved_node_usage; /* node stack at query start */ + int nentries; + int maxentries; + WaitEventUsageEntry *entries; + uint64 overflowed_calls; + instr_time overflowed_time; +}; static void WaitEventUsageAdd(WaitEventUsage *usage, uint32 wait_event_info, uint64 calls, const instr_time *elapsed); static void WaitEventUsageAddOverflow(WaitEventUsage *usage, uint64 calls, @@ -422,12 +433,12 @@ WaitEventUsageInit(WaitEventUsage *usage, MemoryContext memcontext) * local memory. Nested top-level collectors are kept in a query-level stack; * a wait is counted once in each active collector. */ -void -pgstat_begin_wait_event_usage(WaitEventUsage *usage, MemoryContext memcontext) +WaitEventUsage * +pgstat_begin_wait_event_usage(MemoryContext memcontext) { + WaitEventUsage *usage; bool first; - Assert(usage != NULL); Assert(memcontext != NULL); first = pgstat_wait_event_usage_depth == 0; @@ -440,7 +451,7 @@ pgstat_begin_wait_event_usage(WaitEventUsage *usage, MemoryContext memcontext) INSTR_TIME_SET_ZERO(pgstat_wait_event_usage_start); } - WaitEventUsageInit(usage, memcontext); + usage = pgstat_create_wait_event_usage(memcontext); usage->query_parent = pgstat_wait_event_usage; /* * A nested EXPLAIN can error out while one of its plan nodes is active, @@ -451,6 +462,7 @@ pgstat_begin_wait_event_usage(WaitEventUsage *usage, MemoryContext memcontext) pgstat_wait_event_usage = usage; pgstat_wait_event_usage_depth++; pgstat_wait_event_usage_active = true; + return usage; } /* @@ -579,6 +591,49 @@ pgstat_accumulate_wait_event_usage(WaitEventUsage *usage, &entries[i].time); } +void +pgstat_accumulate_wait_event_usage_overflow(WaitEventUsage *usage, + uint64 calls, + const instr_time *elapsed) +{ + Assert(usage != NULL); + Assert(elapsed != NULL); + + WaitEventUsageAddOverflow(usage, calls, elapsed); +} + +bool +pgstat_wait_event_usage_is_empty(const WaitEventUsage *usage) +{ + Assert(usage != NULL); + + return usage->nentries == 0 && usage->overflowed_calls == 0; +} + +int +pgstat_get_wait_event_usage_entries(const WaitEventUsage *usage, + const WaitEventUsageEntry **entries) +{ + Assert(usage != NULL); + Assert(entries != NULL); + + *entries = usage->entries; + return usage->nentries; +} + +void +pgstat_get_wait_event_usage_overflow(const WaitEventUsage *usage, + uint64 *calls, + instr_time *elapsed) +{ + Assert(usage != NULL); + Assert(calls != NULL); + Assert(elapsed != NULL); + + *calls = usage->overflowed_calls; + *elapsed = usage->overflowed_time; +} + /* * Find the existing entry, or the insertion position for a new entry. * diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index 67497790307..19763cfcae5 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -15,6 +15,12 @@ #include "utils/palloc.h" #include "utils/wait_event_types.h" +/* + * EXPLAIN wait event accounting support. WaitEventUsage is intentionally + * opaque outside wait_event.c; callers should allocate, accumulate, and read + * it through the functions below. WaitEventUsageEntry is the reportable + * tuple copied to EXPLAIN output and parallel-worker storage. + */ typedef struct WaitEventUsageEntry { uint32 wait_event_info; @@ -22,17 +28,7 @@ typedef struct WaitEventUsageEntry instr_time time; } WaitEventUsageEntry; -typedef struct WaitEventUsage -{ - struct WaitEventUsage *active_parent; /* active plan-node stack link */ - struct WaitEventUsage *query_parent; /* active query-level stack link */ - struct WaitEventUsage *saved_node_usage; /* node stack at query start */ - int nentries; - int maxentries; - WaitEventUsageEntry *entries; - uint64 overflowed_calls; - instr_time overflowed_time; -} WaitEventUsage; +typedef struct WaitEventUsage WaitEventUsage; extern const char *pgstat_get_wait_event(uint32 wait_event_info); extern const char *pgstat_get_wait_event_type(uint32 wait_event_info); @@ -40,13 +36,23 @@ static inline void pgstat_report_wait_start(uint32 wait_event_info); static inline void pgstat_report_wait_end(void); extern void pgstat_set_wait_event_storage(uint32 *wait_event_info); extern void pgstat_reset_wait_event_storage(void); + +/* EXPLAIN wait event accounting. */ extern WaitEventUsage *pgstat_create_wait_event_usage(MemoryContext memcontext); -extern void pgstat_begin_wait_event_usage(WaitEventUsage *usage, - MemoryContext memcontext); +extern WaitEventUsage *pgstat_begin_wait_event_usage(MemoryContext memcontext); extern void pgstat_end_wait_event_usage(WaitEventUsage *usage); extern void pgstat_accumulate_wait_event_usage(WaitEventUsage *usage, const WaitEventUsageEntry *entries, int nentries); +extern void pgstat_accumulate_wait_event_usage_overflow(WaitEventUsage *usage, + uint64 calls, + const instr_time *elapsed); +extern bool pgstat_wait_event_usage_is_empty(const WaitEventUsage *usage); +extern int pgstat_get_wait_event_usage_entries(const WaitEventUsage *usage, + const WaitEventUsageEntry **entries); +extern void pgstat_get_wait_event_usage_overflow(const WaitEventUsage *usage, + uint64 *calls, + instr_time *elapsed); extern WaitEventUsage *pgstat_enter_wait_event_usage(WaitEventUsage *usage); extern void pgstat_restore_wait_event_usage(WaitEventUsage *usage); extern void pgstat_count_wait_event_start(uint32 wait_event_info); -- 2.52.0