public inbox for [email protected]  
help / color / mirror / Atom feed
From: Hayato Kuroda (Fujitsu) <[email protected]>
To: 'Bertrand Drouvot' <[email protected]>
To: Alexander Lakhin <[email protected]>
To: [email protected] <[email protected]>
Cc: pgsql-hackers <[email protected]>
Subject: RE: t/035_standby_logical_decoding.pl might fail on attempt to read wrong timeline
Date: Mon, 8 Jun 2026 04:25:45 +0000
Message-ID: <OS9PR01MB1214908BA67A7811BD6281208F51C2@OS9PR01MB12149.jpnprd01.prod.outlook.com> (raw)
In-Reply-To: <aiP/X1FThlZRCFiN@bdtpg>
References: <[email protected]>
	<aiP/X1FThlZRCFiN@bdtpg>

Hi Alexander, Bertrand, Xuneng,

Thanks for seeing the failure. Our team also recognized but could not find the reason.

> Yeah, it looks like there is a race condition here. I think we should check if
> the insertion timeline has already been set (like the walsummarizer is doing).

Sorry for stupid question; I tried to reproduce the failure but could not, see attached.

IIUC, the issue can happen if the walsender must read the WAL record generated
after the promotion but the timeline could not be updated. 

However, I think logical_read_xlog_page() is called after the new WAL records
are generated, i.e., am_cascading_walsender has already been false at that time.
So not sure where is the race?

Best regards,
Hayato Kuroda
FUJITSU LIMITED



Attachments:

  [application/octet-stream] 0001-WIP-try-reproducing-the-race-condition-for-promotion.patch (6.3K, 2-0001-WIP-try-reproducing-the-race-condition-for-promotion.patch)
  download | inline diff:
From 2cf5edb2639104ccc42333e7546c89076309c40f Mon Sep 17 00:00:00 2001
From: Hayato Kuroda <[email protected]>
Date: Mon, 8 Jun 2026 12:42:12 +0900
Subject: [PATCH] WIP: try reproducing the race condition for promotion

---
 src/backend/replication/walsender.c |   6 ++
 src/test/recovery/meson.build       |   1 +
 src/test/recovery/t/099_repro.pl    | 130 ++++++++++++++++++++++++++++
 3 files changed, 137 insertions(+)
 create mode 100644 src/test/recovery/t/099_repro.pl

diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 04aa770d981..f764007052d 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -94,6 +94,7 @@
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/guc.h"
+#include "utils/injection_point.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/pg_lsn.h"
@@ -1103,11 +1104,16 @@ logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int req
 	 */
 	am_cascading_walsender = RecoveryInProgress();
 
+	INJECTION_POINT("logical-read-xlog-page-before-tli", NULL);
+
 	if (am_cascading_walsender)
 		GetXLogReplayRecPtr(&currTLI);
 	else
 		currTLI = GetWALInsertionTimeLine();
 
+	elog(LOG, "XXX am_cascading_walsender: %d, currTLI: %u",
+		 am_cascading_walsender, currTLI);
+
 	XLogReadDetermineTimeline(state, targetPagePtr, reqLen, currTLI);
 	sendTimeLineIsHistoric = (state->currTLI != currTLI);
 	sendTimeLine = state->currTLI;
diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build
index 9eb8ed11425..bfd06a06124 100644
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@@ -62,6 +62,7 @@ tests += {
       't/051_effective_wal_level.pl',
       't/052_checkpoint_segment_missing.pl',
       't/053_standby_login_event_trigger.pl',
+      't/099_repro.pl',
     ],
   },
 }
diff --git a/src/test/recovery/t/099_repro.pl b/src/test/recovery/t/099_repro.pl
new file mode 100644
index 00000000000..909141c0773
--- /dev/null
+++ b/src/test/recovery/t/099_repro.pl
@@ -0,0 +1,130 @@
+# Copyright (c) 2026, PostgreSQL Global Development Group
+
+# Reproducer for wrong timeline bug
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+my ($stdout, $stderr, $cascading_stdout, $cascading_stderr, $handle);
+
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+my $default_timeout = $PostgreSQL::Test::Utils::timeout_default;
+my $res;
+
+# Name for the physical slot on primary
+my $primary_slotname = 'primary_physical';
+my $standby_physical_slotname = 'standby_physical';
+
+########################
+# Initialize primary node
+########################
+
+$node_primary->init(allows_streaming => 1, has_archiving => 1);
+$node_primary->append_conf(
+	'postgresql.conf', q{
+wal_level = 'logical'
+max_replication_slots = 4
+max_wal_senders = 4
+autovacuum = off
+});
+$node_primary->dump_info;
+$node_primary->start;
+
+# Check if the extension injection_points is available, as it may be
+# possible that this script is run with installcheck, where the module
+# would not be installed by default.
+if (!$node_primary->check_extension('injection_points'))
+{
+	plan skip_all => 'Extension injection_points not installed';
+}
+
+# Create the injection_points extension
+$node_primary->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
+$node_primary->safe_psql('postgres',
+	qq[SELECT * FROM pg_create_physical_replication_slot('$primary_slotname');]
+);
+
+$node_primary->safe_psql('postgres', "CREATE TABLE foo (id int)");
+
+my $backup_name = 'b1';
+$node_primary->backup($backup_name);
+
+#######################
+# Initialize standby node
+#######################
+
+$node_standby->init_from_backup(
+	$node_primary, $backup_name,
+	has_streaming => 1,
+	has_restoring => 1);
+$node_standby->append_conf(
+	'postgresql.conf',
+	qq[primary_slot_name = '$primary_slotname'
+       max_replication_slots = 5]);
+$node_standby->start;
+$node_primary->wait_for_replay_catchup($node_standby);
+
+# create logical slot
+$node_standby->create_logical_slot_on_standby($node_primary, 'testslot',
+                                              'postgres');
+
+# Start continuous logical decoding on the standby
+$handle = IPC::Run::start(
+    [
+        'pg_recvlogical',
+        '--dbname' => $node_standby->connstr('postgres'),
+        '--slot' => 'testslot',
+        '--option' => 'include-xids=0',
+        '--option' => 'skip-empty-xacts=1',
+        '--file' => '-',
+        '--no-loop',
+        '--start',
+    ],
+    '>' => \$stdout,
+    '2>' => \$stderr,
+    IPC::Run::timeout($default_timeout));
+
+# Ensure the pg_recvlogical works well
+$node_primary->safe_psql('postgres', "INSERT INTO foo SELECT generate_series(1, 10)");
+
+# check that we are decoding pre and post promotion inserted rows
+# with pg_recvlogical that has started before the promotion
+my $pump_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
+
+ok(pump_until($handle, $pump_timeout, \$stdout, qr/^.*COMMIT$/s),
+	'got 1 COMMIT from pg_recvlogical output');
+
+# Set an injection_point to make the walsender wait before reading the timeline
+# of the standby.
+$node_standby->safe_psql('postgres', "SELECT injection_points_attach('logical-read-xlog-page-before-tli', 'wait');");
+
+# Insert some data to make the walsender read the timeline
+$node_primary->safe_psql('postgres', "INSERT INTO foo SELECT generate_series(11, 20)");
+$node_standby->wait_for_event('walsender', 'logical-read-xlog-page-before-tli');
+
+# Now the standby can accept INSERTs
+$node_standby->promote;
+
+# Insert some data on the promoted standby
+$node_standby->safe_psql('postgres', "INSERT INTO foo SELECT generate_series(21, 30)");
+
+# Walsender now resumes decoding
+$node_standby->safe_psql('postgres', qq{SELECT injection_points_detach('logical-read-xlog-page-before-tli');
+SELECT injection_points_wakeup('logical-read-xlog-page-before-tli');});
+
+# Check that we can decode both pre and post promotion inserted rows with pg_recvlogical.
+ok(pump_until($handle, $pump_timeout, \$stdout, qr/^.*COMMIT.*COMMIT$/s),
+	'got 2 COMMIT from pg_recvlogical output');
+
+done_testing();
-- 
2.52.0



view thread (24+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected], [email protected], [email protected]
  Subject: RE: t/035_standby_logical_decoding.pl might fail on attempt to read wrong timeline
  In-Reply-To: <OS9PR01MB1214908BA67A7811BD6281208F51C2@OS9PR01MB12149.jpnprd01.prod.outlook.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox