From c7e55c28bceca7ac3a659860e1f19d5243c1499a Mon Sep 17 00:00:00 2001
From: Maksim Melnikov <m.melnikov@postgrespro.ru>
Date: Thu, 4 Sep 2025 17:37:47 +0300
Subject: [PATCH v1] Try to handle torn reads of pg_control in sub postmaster
 processes.

The same problem was fixed in 63a582222c6b3db2b1103ddf67a04b31a8f8e9bb,
but for frontends. Current commit is fixing this problem for cases
when pg_control file is read by fork/exec'd processes.

There can be race between process, that replays WAL on start and
update control file and other sub-processes that read control file
and were started with exec. As the result sub-processes can read
partially updated file with incorrect crc. The reason is that
LocalProcessControlFile don't acquire ControlFileLock and it can't
do it.

Current patch is just copy-paste of changes, applied for frontends,
with little adaptation.
---
 src/backend/access/transam/xlog.c | 33 ++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7ffb2179151..98f992aa812 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4347,6 +4347,15 @@ ReadControlFile(void)
 	int			fd;
 	char		wal_segsz_str[20];
 	int			r;
+	bool		crc_ok;
+#ifdef EXEC_BACKEND
+	pg_crc32c	last_crc;
+	int			retries = 0;
+
+	INIT_CRC32C(last_crc);
+
+retry:
+#endif
 
 	/*
 	 * Read data...
@@ -4411,7 +4420,29 @@ ReadControlFile(void)
 				offsetof(ControlFileData, crc));
 	FIN_CRC32C(crc);
 
-	if (!EQ_CRC32C(crc, ControlFile->crc))
+	crc_ok = EQ_CRC32C(crc, ControlFile->crc);
+
+#ifdef EXEC_BACKEND
+
+	/*
+	 * If the server was writing at the same time, it is possible that we read
+	 * partially updated contents on some systems.  If the CRC doesn't match,
+	 * retry a limited number of times until we compute the same bad CRC twice
+	 * in a row with a short sleep in between.  Then the failure is unlikely
+	 * to be due to a concurrent write.
+	 */
+	if (!crc_ok &&
+		(retries == 0 || !EQ_CRC32C(crc, last_crc)) &&
+		retries < 10)
+	{
+		retries++;
+		last_crc = crc;
+		pg_usleep(10000);
+		goto retry;
+	}
+#endif
+
+	if (!crc_ok)
 		ereport(FATAL,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("incorrect checksum in control file")));
-- 
2.43.0

