From: Antonin Houska Date: Tue, 12 May 2026 12:27:08 +0200 Subject: [PATCH] Test to demonstrate bug in commit 0d3dba38c7 and to verify a fix. The fix: https://www.postgresql.org/message-id/77611.1778055944%40localhost --- src/backend/access/transam/xact.c | 2 + src/backend/commands/repack_worker.c | 2 + src/test/isolation/isolationtester.c | 9 +- .../expected/repack_running_xacts.out | 81 ++++++++++++ .../specs/repack_running_xacts.spec | 119 ++++++++++++++++++ 5 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 src/test/modules/injection_points/expected/repack_running_xacts.out create mode 100644 src/test/modules/injection_points/specs/repack_running_xacts.spec diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 5586fbe5b07..b63ee166028 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -65,6 +65,7 @@ #include "utils/builtins.h" #include "utils/combocid.h" #include "utils/guc.h" +#include "utils/injection_point.h" #include "utils/inval.h" #include "utils/memutils.h" #include "utils/relmapper.h" @@ -2428,6 +2429,7 @@ CommitTransaction(void) * must be done _before_ releasing locks we hold and _after_ * RecordTransactionCommit. */ + INJECTION_POINT("before-end-transaction", NULL); ProcArrayEndTransaction(MyProc, latestXid); /* diff --git a/src/backend/commands/repack_worker.c b/src/backend/commands/repack_worker.c index c40f8c98e06..6835626d677 100644 --- a/src/backend/commands/repack_worker.c +++ b/src/backend/commands/repack_worker.c @@ -26,6 +26,7 @@ #include "storage/ipc.h" #include "storage/proc.h" #include "tcop/tcopprot.h" +#include "utils/injection_point.h" #include "utils/memutils.h" #define REPL_PLUGIN_NAME "pgrepack" @@ -233,6 +234,7 @@ repack_setup_logical_decoding(Oid relid) * Neither prepare_write nor do_write callback nor update_progress is * useful for us. */ + INJECTION_POINT("before-create-decoding-context", NULL); ctx = CreateInitDecodingContext(REPL_PLUGIN_NAME, NIL, true, diff --git a/src/test/isolation/isolationtester.c b/src/test/isolation/isolationtester.c index 440c875b8ac..8f17ee412c9 100644 --- a/src/test/isolation/isolationtester.c +++ b/src/test/isolation/isolationtester.c @@ -216,15 +216,22 @@ main(int argc, char **argv) * exactly expect concurrent use of test tables. However, autovacuum will * occasionally take AccessExclusiveLock to truncate a table, and we must * ignore that transient wait. + * + * If the session's backend is blocked, and if its background worker is + * waiting on an injection point, we assume that the injection point is + * the reason for the backend to be blocked. That's what we check in the + * second query of the UNION. XXX Should we use a separate query for that? */ initPQExpBuffer(&wait_query); appendPQExpBufferStr(&wait_query, + "WITH blocking(res) AS (" "SELECT pg_catalog.pg_isolation_test_session_is_blocked($1, '{"); /* The spec syntax requires at least one session; assume that here. */ appendPQExpBufferStr(&wait_query, conns[1].backend_pid_str); for (i = 2; i < nconns; i++) appendPQExpBuffer(&wait_query, ",%s", conns[i].backend_pid_str); - appendPQExpBufferStr(&wait_query, "}')"); + appendPQExpBufferStr(&wait_query, "}') UNION " + "SELECT pg_catalog.pg_isolation_test_session_is_blocked(pid, '{}') FROM pg_stat_activity WHERE leader_pid=$1) SELECT bool_or(res) FROM blocking"); res = PQprepare(conns[0].conn, PREP_WAITING, wait_query.data, 0, NULL); if (PQresultStatus(res) != PGRES_COMMAND_OK) diff --git a/src/test/modules/injection_points/expected/repack_running_xacts.out b/src/test/modules/injection_points/expected/repack_running_xacts.out new file mode 100644 index 00000000000..271fe2b97cb --- /dev/null +++ b/src/test/modules/injection_points/expected/repack_running_xacts.out @@ -0,0 +1,81 @@ +Parsed test spec with 5 sessions + +starting permutation: repack s3_assign_xid wakeup_bcdc s4_changes s4_attach s4_commit s3_commit s5_assign_xid wakeup_bet s5_commit check +injection_points_attach +----------------------- + +(1 row) + +step repack: + REPACK (CONCURRENTLY) repack_test; + +step s3_assign_xid: + BEGIN; + INSERT INTO aux VALUES (1); + +step wakeup_bcdc: + SELECT injection_points_wakeup('before-create-decoding-context'); + +injection_points_wakeup +----------------------- + +(1 row) + +step s4_changes: + BEGIN; + INSERT INTO repack_test(i, j) VALUES (1, 1); + +step s4_attach: + SELECT injection_points_set_local(); + SELECT injection_points_attach('before-end-transaction', 'wait'); + +injection_points_set_local +-------------------------- + +(1 row) + +injection_points_attach +----------------------- + +(1 row) + +step s4_commit: + COMMIT; + +step s3_commit: + COMMIT; + +step s5_assign_xid: + BEGIN; + INSERT INTO aux VALUES (2); + +step wakeup_bet: + SELECT injection_points_wakeup('before-end-transaction'); + +injection_points_wakeup +----------------------- + +(1 row) + +step repack: <... completed> +step s4_commit: <... completed> +step s5_commit: + COMMIT; + +step check: + TABLE repack_test; + +i|j +-+- +(0 rows) + +injection_points_detach +----------------------- + +(1 row) + +injection_points_detach +----------------------- + +(1 row) + diff --git a/src/test/modules/injection_points/specs/repack_running_xacts.spec b/src/test/modules/injection_points/specs/repack_running_xacts.spec new file mode 100644 index 00000000000..1f878514046 --- /dev/null +++ b/src/test/modules/injection_points/specs/repack_running_xacts.spec @@ -0,0 +1,119 @@ +setup +{ + CREATE EXTENSION injection_points; + CREATE TABLE repack_test(i int PRIMARY KEY, j int); + CREATE TABLE aux(i int); +} + +teardown +{ + DROP TABLE repack_test; + DROP TABLE aux; + DROP EXTENSION injection_points; +} + +session s1 +setup +{ + SELECT injection_points_attach('before-create-decoding-context', 'wait'); +} +step repack +{ + REPACK (CONCURRENTLY) repack_test; +} +step check +{ + TABLE repack_test; +} +teardown +{ + SELECT injection_points_detach('before-create-decoding-context'); +} + +session s2 +step wakeup_bcdc +{ + SELECT injection_points_wakeup('before-create-decoding-context'); +} +step wakeup_bet +{ + SELECT injection_points_wakeup('before-end-transaction'); +} + +session s3 +step s3_assign_xid +{ + BEGIN; + INSERT INTO aux VALUES (1); +} +step s3_commit +{ + COMMIT; +} + +session s4 +step s4_changes +{ + BEGIN; + INSERT INTO repack_test(i, j) VALUES (1, 1); +} +# Do not attach in the setup section, that would be too soon. +step s4_attach +{ + SELECT injection_points_set_local(); + SELECT injection_points_attach('before-end-transaction', 'wait'); +} +step s4_commit +{ + COMMIT; +} +teardown +{ + SELECT injection_points_detach('before-end-transaction'); +} + +session s5 +step s5_assign_xid +{ + BEGIN; + INSERT INTO aux VALUES (2); +} +step s5_commit +{ + COMMIT; +} + +permutation +repack +# Assign XID so that a running transaction prevents the snapshot builder from +# reaching CONSISTENT state immediately. It will wait for this to complete +# after having reached BUILDING_SNAPSHOT. +s3_assign_xid +# Let the decoding setup start. +wakeup_bcdc +# Likewise, the snapshot builder will wait for the s4's xact to complete after +# having reached FULL_SNAPSHOT. This is the problematic transaction, so let it +# do some changes. +s4_changes +# Attach to the 'before-end-transaction' injection point that s4 will need +# during commit. +s4_attach +# Only write commit record for s4, but do not remove the xact from procarray +# yet. Thus the snapshot builder still needs to wait. +s4_commit +# Let the snapshot builder proceed to FULL_SNAPSHOT. +s3_commit +# Start another transaction so that CONSISTENT is not reached "directly", +# i.e. due to no running transaction. It's important here that builder->xmin +# does not advance. +s5_assign_xid +# Remove s4 xact from procarray, and thus reach the CONSISTENT state. Since +# the COMMIT appeared in WAL too early (i.e. when the snapshot builder state +# did not allow decoding of COMMIT records yet), the snapshot builder will +# consider s4 running. This is also due to returning from +# SnapBuildProcessRunningXacts() too early, w/o advancing builder->xmin. +wakeup_bet +# s5 is not needed anymore +s5_commit +# Show that the data changes performed by s4 are lost. +check -- 2.47.3 --=-=-=--