From 54b1dabf3c8da43d700a1087307177a2f17e62ca Mon Sep 17 00:00:00 2001 From: Naga Appani Date: Fri, 12 Dec 2025 22:44:46 +0000 Subject: [PATCH v12] Add pg_get_multixact_stats() function for monitoring MultiXact usage Expose multixact state via a new SQL-callable function pg_get_multixact_stats(), returning: - num_mxids : number of MultiXact IDs in use - num_members : number of member entries in use - members_size : bytes used by num_members in pg_multixact/members directory - oldest_multixact : oldest MultiXact ID still needed This patch adds pg_get_multixact_stats() function - SQL-callable interface to GetMultiXactInfo() - Returns NULLs if MultiXact system not initialized - Includes isolation tests for monitoring invariants Documentation updates: - func-info.sgml: add function entry - maintenance.sgml: mention monitoring multixact usage Build and catalog: - Add function to existing multixactfuncs.c - pg_proc.dat entry Author: Naga Appani Reviewed-by: Ashutosh Bapat Reviewed-by: Michael Paquier Discussion: https://www.postgresql.org/message-id/flat/CA%2BQeY%2BAAsYK6WvBW4qYzHz4bahHycDAY_q5ECmHkEV_eB9ckzg%40mail.gmail.com --- doc/src/sgml/func/func-info.sgml | 27 +++++ doc/src/sgml/maintenance.sgml | 58 ++++++++- src/backend/utils/adt/multixactfuncs.c | 45 +++++++ src/include/catalog/pg_proc.dat | 10 ++ .../isolation/expected/multixact_stats.out | 89 ++++++++++++++ src/test/isolation/isolation_schedule | 1 + src/test/isolation/specs/multixact_stats.spec | 113 ++++++++++++++++++ 7 files changed, 337 insertions(+), 6 deletions(-) create mode 100644 src/test/isolation/expected/multixact_stats.out create mode 100644 src/test/isolation/specs/multixact_stats.spec diff --git a/doc/src/sgml/func/func-info.sgml b/doc/src/sgml/func/func-info.sgml index d4508114a48..051c3b28985 100644 --- a/doc/src/sgml/func/func-info.sgml +++ b/doc/src/sgml/func/func-info.sgml @@ -2975,6 +2975,33 @@ acl | {postgres=arwdDxtm/postgres,foo=r/postgres} modify key columns. + + + + + pg_get_multixact_stats + + pg_get_multixact_stats () + record + ( num_mxids integer, + num_members bigint, + members_size bigint, + oldest_multixact xid ) + + + Returns statistics about current multixact usage: + num_mxids is the total number of multixact IDs currently present in the system, + num_members is the total number of multixact member entries currently + present in the system, + members_size is the storage occupied by num_members + in the pg_multixact/members directory, + oldest_multixact is the oldest multixact ID still in use. + + + The function reports statistics at the time it is invoked. Values may vary between calls, + even within a single transaction. + + diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 08e6489afb8..8695b92e93e 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -813,14 +813,60 @@ HINT: Execute a database-wide VACUUM in that database. As a safety device, an aggressive vacuum scan will occur for any table whose multixact-age is greater than . Also, if the - storage occupied by multixacts members exceeds about 10GB, aggressive vacuum - scans will occur more often for all tables, starting with those that - have the oldest multixact-age. Both of these kinds of aggressive - scans will occur even if autovacuum is nominally disabled. The members storage - area can grow up to about 20GB before reaching wraparound. + linkend="guc-autovacuum-multixact-freeze-max-age"/>. Also, if the number + of multixact member entries created exceeds approximately 2^31 entries + (occupying roughly 10GB in the pg_multixact/members directory), + aggressive vacuum scans will occur more often for all tables, starting with those that + have the oldest multixact-age. Both of these kinds of aggressive + scans will occur even if autovacuum is nominally disabled. At approximately 2^32 entries + (occupying roughly 20GB in the pg_multixact/members directory), even + more aggressive vacuum scans are triggered to reclaim member storage space. + + The pg_get_multixact_stats() function described in + provides a way to monitor + multixact allocation and usage patterns in real time, for example: + +postgres=# SELECT *,pg_size_pretty(members_size) members_size_pretty FROM pg_catalog.pg_get_multixact_stats(); + num_mxids | num_members | members_size | oldest_multixact | members_size_pretty +-----------+-------------+--------------+------------------+--------------------- + 311740299 | 2785241176 | 13926205880 | 2 | 13 GB +(1 row) + + This output shows a system with significant multixact activity: about ~312 million + multixact IDs and ~2.8 billion member entries consuming 13 GB of storage space. + By leveraging this information, the function helps: + + + + Identify unusual multixact activity from concurrent row-level locks + or foreign key operations. For example, a spike in num_mxids might indicate + multiple sessions running UPDATE statements with foreign key checks, + concurrent SELECT FOR SHARE operations, or frequent use of savepoints + causing lock contention. + + + + + Track multixact cleanup efficiency by monitoring oldest_multixact. + If this value remains unchanged while num_members grows, it could indicate + that long-running transactions are preventing cleanup, or autovacuum is + not keeping up with the workload. + + + + + Detect potential performance impacts before they become critical. + For instance, high multixact usage from frequent row-level locking or + foreign key operations can lead to increased I/O and CPU overhead during + vacuum operations. Monitoring these stats helps tune autovacuum frequency + and transaction patterns. + + + + + Similar to the XID case, if autovacuum fails to clear old MXIDs from a table, the system will begin to emit warning messages when the database's oldest MXIDs reach forty diff --git a/src/backend/utils/adt/multixactfuncs.c b/src/backend/utils/adt/multixactfuncs.c index a428e140bc4..c0597cf5425 100644 --- a/src/backend/utils/adt/multixactfuncs.c +++ b/src/backend/utils/adt/multixactfuncs.c @@ -15,6 +15,8 @@ #include "postgres.h" #include "access/multixact.h" +#include "access/multixact_internal.h" +#include "access/htup_details.h" #include "funcapi.h" #include "utils/builtins.h" @@ -85,3 +87,46 @@ pg_get_multixact_members(PG_FUNCTION_ARGS) SRF_RETURN_DONE(funccxt); } + +/* + * pg_get_multixact_stats + * + * Returns statistics about current multixact usage. + * + * Returns NULL if the oldest referenced offset is unknown. + */ +Datum +pg_get_multixact_stats(PG_FUNCTION_ARGS) +{ + TupleDesc tupdesc; + Datum values[4]; + bool nulls[4]; + MultiXactOffset members; + MultiXactId oldestMultiXactId; + uint32 multixacts; + MultiXactOffset oldestOffset; + int64 membersBytes; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("return type must be a row type"))); + + GetMultiXactInfo(&multixacts, &members, &oldestMultiXactId, &oldestOffset); + + /* + * Calculate storage space for members. Members are stored in groups, + * with each group containing MULTIXACT_MEMBERS_PER_MEMBERGROUP members + * and taking MULTIXACT_MEMBERGROUP_SIZE bytes. + */ + membersBytes = (int64) (members / MULTIXACT_MEMBERS_PER_MEMBERGROUP) * + MULTIXACT_MEMBERGROUP_SIZE; + + values[0] = UInt32GetDatum(multixacts); + values[1] = Int64GetDatum(members); + values[2] = Int64GetDatum(membersBytes); + values[3] = UInt32GetDatum(oldestMultiXactId); + memset(nulls, false, sizeof(nulls)); + + return HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)); +} diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index fd9448ec7b9..6caea6c8281 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12612,4 +12612,14 @@ proargnames => '{pid,io_id,io_generation,state,operation,off,length,target,handle_data_len,raw_result,result,target_desc,f_sync,f_localmem,f_buffered}', prosrc => 'pg_get_aios' }, +# Get multixact usage +{ oid => '9001', descr => 'get current multixact usage statistics', + proname => 'pg_get_multixact_stats', + provolatile => 'v', proparallel => 's', prorettype => 'record', + proargtypes => '', + proallargtypes => '{int8,int8,int8,xid}', + proargmodes => '{o,o,o,o}', + proargnames => '{num_mxids,num_members,members_size,oldest_multixact}', + prosrc => 'pg_get_multixact_stats'}, + ] diff --git a/src/test/isolation/expected/multixact_stats.out b/src/test/isolation/expected/multixact_stats.out new file mode 100644 index 00000000000..27a6510c4ad --- /dev/null +++ b/src/test/isolation/expected/multixact_stats.out @@ -0,0 +1,89 @@ +Parsed test spec with 2 sessions + +starting permutation: snap0 s1_begin s1_lock snap1 s2_begin s2_lock snap2 check_while_pinned s1_commit s2_commit +step snap0: + CREATE TEMP TABLE snap0 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); + +step s1_begin: BEGIN; +step s1_lock: SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; +?column? +-------- + 1 +(1 row) + +step snap1: + CREATE TEMP TABLE snap1 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); + +step s2_begin: BEGIN; +step s2_lock: SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; +?column? +-------- + 1 +(1 row) + +step snap2: + CREATE TEMP TABLE snap2 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); + +step check_while_pinned: + SELECT r.assertion, r.ok + FROM snap0 s0 + JOIN snap1 s1 ON TRUE + JOIN snap2 s2 ON TRUE, + LATERAL unnest( + ARRAY[ + 'is_init_mxids', + 'is_init_members', + 'is_init_oldest_mxid', + 'is_init_oldest_off', + 'is_oldest_mxid_nondec_01', + 'is_oldest_mxid_nondec_12', + 'is_oldest_off_nondec_01', + 'is_oldest_off_nondec_12', + 'is_members_increased_ge1', + 'is_mxids_nondec_01', + 'is_mxids_nondec_12', + 'is_members_nondec_01', + 'is_members_nondec_12' + ], + ARRAY[ + (s2.num_mxids IS NOT NULL), + (s2.num_members IS NOT NULL), + (s2.oldest_multixact IS NOT NULL), + + (s1.oldest_multixact::text::bigint >= COALESCE(s0.oldest_multixact::text::bigint, 0)), + (s2.oldest_multixact::text::bigint >= COALESCE(s1.oldest_multixact::text::bigint, 0)), + + (s2.num_members >= COALESCE(s1.num_members, 0) + 1), + + (s1.num_mxids >= COALESCE(s0.num_mxids, 0)), + (s2.num_mxids >= COALESCE(s1.num_mxids, 0)), + (s1.num_members >= COALESCE(s0.num_members, 0)), + (s2.num_members >= COALESCE(s1.num_members, 0)) + ] + ) AS r(assertion, ok); + +assertion |ok +------------------------+-- +is_init_mxids |t +is_init_members |t +is_init_oldest_mxid |t +is_init_oldest_off |t +is_oldest_mxid_nondec_01|t +is_oldest_mxid_nondec_12|t +is_oldest_off_nondec_01 |t +is_oldest_off_nondec_12 |t +is_members_increased_ge1|t +is_mxids_nondec_01 |t +is_mxids_nondec_12 | +is_members_nondec_01 | +is_members_nondec_12 | +(13 rows) + +step s1_commit: COMMIT; +step s2_commit: COMMIT; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 112f05a3677..67f0078d8ba 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -119,3 +119,4 @@ test: serializable-parallel-2 test: serializable-parallel-3 test: matview-write-skew test: lock-nowait +test: multixact_stats diff --git a/src/test/isolation/specs/multixact_stats.spec b/src/test/isolation/specs/multixact_stats.spec new file mode 100644 index 00000000000..7b034654504 --- /dev/null +++ b/src/test/isolation/specs/multixact_stats.spec @@ -0,0 +1,113 @@ +# Test invariants for pg_get_multixact_stats() +# We create exactly one fresh MultiXact on a brand-new table. While it is pinned +# by two open transactions, we assert only invariants that background VACUUM/FREEZE +# cannot violate: +# • members increased by ≥ 1 when the second session locked the row, +# • num_mxids / num_members did not decrease vs earlier snapshots, +# • oldest_* never decreases. +# We make NO assertions after releasing locks (freezing/truncation may shrink deltas). +# +# Terminology (global counters): +# num_mxids, num_members : "in-use" deltas derived from global horizons +# oldest_multixact, offset : oldest horizons; they move forward, never backward +# +# All assertions execute while our multixact is pinned by open txns, which protects +# the truncation horizon (VACUUM can't advance past our pinned multi). + +setup +{ + CREATE TABLE mxq(id int PRIMARY KEY, v int); + INSERT INTO mxq VALUES (1, 42); +} + +teardown +{ + DROP TABLE mxq; +} + +# Two sessions that lock on the same tuple -> one MultiXact with >= 2 members. +session "s1" +setup { SET client_min_messages = warning; SET lock_timeout = '5s'; } +step s1_begin { BEGIN; } +step s1_lock { SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; } +step s1_commit { COMMIT; } + +session "s2" +setup { SET client_min_messages = warning; SET lock_timeout = '5s'; } +step s2_begin { BEGIN; } +step s2_lock { SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; } +step s2_commit { COMMIT; } + +# Baseline BEFORE any locking; may be NULLs if multixact isn't initialized yet. +step snap0 { + CREATE TEMP TABLE snap0 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); +} + +# After s1 has locked the row. +step snap1 { + CREATE TEMP TABLE snap1 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); +} + +# After s2 joins on the SAME tuple -> multixact with >= 2 members. +step snap2 { + CREATE TEMP TABLE snap2 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); +} + +# Pretty, deterministic key/value output of boolean checks. +# Keys: +# is_init_mxids : num_mxids is non-NULL +# is_init_members : num_members is non-NULL +# is_init_oldest_mxid : oldest_multixact is non-NULL +# is_oldest_mxid_nondec_01 : oldest_multixact did not decrease (snap0→snap1) +# is_oldest_mxid_nondec_12 : oldest_multixact did not decrease (snap1→snap2) +# is_members_increased_ge1 : members increased by at least 1 when s2 joined +# is_mxids_nondec_01 : num_mxids did not decrease (snap0→snap1) +# is_mxids_nondec_12 : num_mxids did not decrease (snap1→snap2) +# is_members_nondec_01 : num_members did not decrease (snap0→snap1) +# is_members_nondec_12 : num_members did not decrease (snap1→snap2) +step check_while_pinned { + SELECT r.assertion, r.ok + FROM snap0 s0 + JOIN snap1 s1 ON TRUE + JOIN snap2 s2 ON TRUE, + LATERAL unnest( + ARRAY[ + 'is_init_mxids', + 'is_init_members', + 'is_init_oldest_mxid', + 'is_init_oldest_off', + 'is_oldest_mxid_nondec_01', + 'is_oldest_mxid_nondec_12', + 'is_oldest_off_nondec_01', + 'is_oldest_off_nondec_12', + 'is_members_increased_ge1', + 'is_mxids_nondec_01', + 'is_mxids_nondec_12', + 'is_members_nondec_01', + 'is_members_nondec_12' + ], + ARRAY[ + (s2.num_mxids IS NOT NULL), + (s2.num_members IS NOT NULL), + (s2.oldest_multixact IS NOT NULL), + + (s1.oldest_multixact::text::bigint >= COALESCE(s0.oldest_multixact::text::bigint, 0)), + (s2.oldest_multixact::text::bigint >= COALESCE(s1.oldest_multixact::text::bigint, 0)), + + (s2.num_members >= COALESCE(s1.num_members, 0) + 1), + + (s1.num_mxids >= COALESCE(s0.num_mxids, 0)), + (s2.num_mxids >= COALESCE(s1.num_mxids, 0)), + (s1.num_members >= COALESCE(s0.num_members, 0)), + (s2.num_members >= COALESCE(s1.num_members, 0)) + ] + ) AS r(assertion, ok); +} + +permutation snap0 s1_begin s1_lock snap1 s2_begin s2_lock snap2 check_while_pinned s1_commit s2_commit -- 2.47.3