From f31ae975dfba0271d8dd61b44008fc2de14df68b Mon Sep 17 00:00:00 2001
From: Vitaly Davydov <v.davydov@postgrespro.ru>
Date: Fri, 23 Jan 2026 14:20:05 +0300
Subject: [PATCH] Fix deadlock detector activation in a recovery conflict

When the startup process in a deadlock with a backend, it sends the
signal to the backend to trigger the deadlock detector when
the deadlock timeout is elapsed (deadlock_timeout guc). Due to some
optimization in timeout.c, when spontaneous SIGALRM signals are
possible, which doesn't relate to any enabled timeout, the function
ResolveRecoveryConflictWithBufferPin can never send the signal to the
conflicting backend, becase the deadlock timeout will never be
triggered.

The patch fixes ResolveRecoveryConflictWithBufferPin by ignoring
spontaneous SIGALRM signals, that are possible in the current
implementation of timeout.c functionality.
---
 src/backend/storage/buffer/bufmgr.c |  2 +-
 src/backend/storage/ipc/standby.c   | 80 +++++++++++++++++++----------
 src/include/storage/standby.h       |  2 +-
 3 files changed, 54 insertions(+), 30 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 6f935648ae9..3b9fb78842a 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -6627,7 +6627,7 @@ LockBufferForCleanup(Buffer buffer)
 			/* Publish the bufid that Startup process waits on */
 			SetStartupBufferPinWaitBufId(buffer - 1);
 			/* Set alarm and then wait to be signaled by UnpinBuffer() */
-			ResolveRecoveryConflictWithBufferPin();
+			ResolveRecoveryConflictWithBufferPin(buffer);
 			/* Reset the published bufid */
 			SetStartupBufferPinWaitBufId(-1);
 		}
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index afffab77106..fde5f45781f 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -30,6 +30,7 @@
 #include "storage/procarray.h"
 #include "storage/sinvaladt.h"
 #include "storage/standby.h"
+#include "storage/buf_internals.h"
 #include "utils/hsearch.h"
 #include "utils/injection_point.h"
 #include "utils/ps_status.h"
@@ -790,11 +791,13 @@ cleanup:
  * at least deadlock_timeout.
  */
 void
-ResolveRecoveryConflictWithBufferPin(void)
+ResolveRecoveryConflictWithBufferPin(Buffer buffer)
 {
 	TimestampTz ltime;
 
 	Assert(InHotStandby);
+	Assert(BufferIsValid(buffer));
+	Assert(!BufferIsLocal(buffer));
 
 	ltime = GetStandbyLimitTime();
 
@@ -831,35 +834,56 @@ ResolveRecoveryConflictWithBufferPin(void)
 		enable_timeouts(timeouts, cnt);
 	}
 
-	/*
-	 * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
-	 * by one of the timeouts established above.
-	 *
-	 * We assume that only UnpinBuffer() and the timeout requests established
-	 * above can wake us up here. WakeupRecovery() called by walreceiver or
-	 * SIGHUP signal handler, etc cannot do that because it uses the different
-	 * latch from that ProcWaitForSignal() waits on.
-	 */
-	ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP);
-
-	if (got_standby_delay_timeout)
-		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
-	else if (got_standby_deadlock_timeout)
+	for (;;)
 	{
 		/*
-		 * Send out a request for hot-standby backends to check themselves for
-		 * deadlocks.
-		 *
-		 * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
-		 * to be signaled by UnpinBuffer() again and send a request for
-		 * deadlocks check if deadlock_timeout happens. This causes the
-		 * request to continue to be sent every deadlock_timeout until the
-		 * buffer is unpinned or ltime is reached. This would increase the
-		 * workload in the startup process and backends. In practice it may
-		 * not be so harmful because the period that the buffer is kept pinned
-		 * is basically no so long. But we should fix this?
-		 */
-		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+		* Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
+		* by one of the timeouts established above.
+		*
+		* We assume that only UnpinBuffer() and the timeout requests established
+		* above can wake us up here. WakeupRecovery() called by walreceiver or
+		* SIGHUP signal handler, etc cannot do that because it uses the different
+		* latch from that ProcWaitForSignal() waits on.
+		*/
+		ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP);
+
+		if (got_standby_delay_timeout)
+			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+		else if (got_standby_deadlock_timeout)
+		{
+			/*
+			* Send out a request for hot-standby backends to check themselves for
+			* deadlocks.
+			*
+			* XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
+			* to be signaled by UnpinBuffer() again and send a request for
+			* deadlocks check if deadlock_timeout happens. This causes the
+			* request to continue to be sent every deadlock_timeout until the
+			* buffer is unpinned or ltime is reached. This would increase the
+			* workload in the startup process and backends. In practice it may
+			* not be so harmful because the period that the buffer is kept pinned
+			* is basically no so long. But we should fix this?
+			*/
+			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+		}
+		else
+		{
+			BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
+			uint64		buf_state = LockBufHdr(bufHdr);
+			uint32		buf_refcount = BUF_STATE_GET_REFCOUNT(buf_state);
+
+			UnlockBufHdr(bufHdr);
+
+			// When the buffer’s reference count exceeds 1, the exclusive lock
+			// remains unacquired. A SIGALRM signal appears to have been received
+			// unexpectedly, and it is not associated with any active timeout.
+			// The system should wait until either the buffer becomes unlocked
+			// or the anticipated timeout period elapses.
+			if (buf_refcount > 1)
+				continue;
+		}
+
+		break;
 	}
 
 	/*
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index 7b10932635a..e15dcd22e8f 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -38,7 +38,7 @@ extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
 extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
 
 extern void ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict);
-extern void ResolveRecoveryConflictWithBufferPin(void);
+extern void ResolveRecoveryConflictWithBufferPin(Buffer buffer);
 extern void CheckRecoveryConflictDeadlock(void);
 extern void StandbyDeadLockHandler(void);
 extern void StandbyTimeoutHandler(void);
-- 
2.43.0

