public inbox for [email protected]
help / color / mirror / Atom feedFrom: Hayato Kuroda (Fujitsu) <[email protected]>
To: 'Bertrand Drouvot' <[email protected]>
Cc: Alexander Lakhin <[email protected]>
Cc: [email protected] <[email protected]>
Cc: pgsql-hackers <[email protected]>
Subject: RE: t/035_standby_logical_decoding.pl might fail on attempt to read wrong timeline
Date: Tue, 9 Jun 2026 04:04:20 +0000
Message-ID: <TYRPR01MB12156677791D04E4E858F3FCCF51D2@TYRPR01MB12156.jpnprd01.prod.outlook.com> (raw)
In-Reply-To: <aiaBtENl7tTf2MM8@bdtpg>
References: <[email protected]>
<aiP/X1FThlZRCFiN@bdtpg>
<OS9PR01MB1214908BA67A7811BD6281208F51C2@OS9PR01MB12149.jpnprd01.prod.outlook.com>
<aiaBtENl7tTf2MM8@bdtpg>
Dear Bertrand, Xuneng,
Thanks for sharing the reproducer and opinion. I could now understand.
While seeing, I felt the same issue can happen even for the streaming case.
Please see attached reproducer and possible fix.
Can you also include that point? Or this thread discusses only for the logical case?
If separated, I can take initiative for the streaming part, after the logical decoding
case.
Best regards,
Hayato Kuroda
FUJITSU LIMITED
Attachments:
[application/octet-stream] 0001-Reproducer-for-the-streaming-replication.patch (4.6K, 2-0001-Reproducer-for-the-streaming-replication.patch)
download | inline diff:
From 67e407e18c64dff51904232ef98f3f992dd82d03 Mon Sep 17 00:00:00 2001
From: Hayato Kuroda <[email protected]>
Date: Tue, 9 Jun 2026 12:48:19 +0900
Subject: [PATCH 1/2] Reproducer for the streaming replication
---
src/test/recovery/meson.build | 1 +
src/test/recovery/t/099_repro.pl | 123 +++++++++++++++++++++++++++++++
2 files changed, 124 insertions(+)
create mode 100644 src/test/recovery/t/099_repro.pl
diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build
index 9eb8ed11425..bfd06a06124 100644
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@@ -62,6 +62,7 @@ tests += {
't/051_effective_wal_level.pl',
't/052_checkpoint_segment_missing.pl',
't/053_standby_login_event_trigger.pl',
+ 't/099_repro.pl',
],
},
}
diff --git a/src/test/recovery/t/099_repro.pl b/src/test/recovery/t/099_repro.pl
new file mode 100644
index 00000000000..34a97bb19f3
--- /dev/null
+++ b/src/test/recovery/t/099_repro.pl
@@ -0,0 +1,123 @@
+# Copyright (c) 2026, PostgreSQL Global Development Group
+
+# Reproducer wrong timeline can be chosen during the promotion for the
+# streaming replication.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+ plan skip_all => 'Injection points not supported by this build';
+}
+
+my ($stdout, $stderr, $handle);
+
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+my $default_timeout = $PostgreSQL::Test::Utils::timeout_default;
+
+# Name for the physical slot on primary
+my $primary_slotname = 'primary_physical';
+my $standby_physical_slotname = 'standby_physical';
+
+########################
+# Initialize primary node
+########################
+
+$node_primary->init(allows_streaming => 1, has_archiving => 1);
+$node_primary->append_conf(
+ 'postgresql.conf', q{
+wal_level = 'logical'
+max_replication_slots = 4
+max_wal_senders = 4
+autovacuum = off
+});
+$node_primary->dump_info;
+$node_primary->start;
+
+# Check if the extension injection_points is available, as it may be
+# possible that this script is run with installcheck, where the module
+# would not be installed by default.
+if (!$node_primary->check_extension('injection_points'))
+{
+ plan skip_all => 'Extension injection_points not installed';
+}
+
+$node_primary->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
+$node_primary->safe_psql('postgres',
+ qq[SELECT * FROM pg_create_physical_replication_slot('$primary_slotname');]
+);
+
+my $backup_name = 'b1';
+$node_primary->backup($backup_name);
+
+# Some tests need to wait for VACUUM to be replayed. But vacuum does not flush
+# WAL. An insert into flush_wal outside transaction does guarantee a flush.
+$node_primary->psql('postgres', q[CREATE TABLE flush_wal();]);
+
+#######################
+# Initialize standby node
+#######################
+
+$node_standby->init_from_backup(
+ $node_primary, $backup_name,
+ has_streaming => 1,
+ has_restoring => 1);
+$node_standby->append_conf(
+ 'postgresql.conf',
+ qq[primary_slot_name = '$primary_slotname'
+ max_replication_slots = 5]);
+$node_standby->start;
+$node_primary->wait_for_replay_catchup($node_standby);
+
+$node_standby->safe_psql('postgres',
+ "SELECT pg_create_physical_replication_slot('physical_standby');"
+);
+
+# Attach injection point to pause startup after WAL segment cleanup
+# but before RecoveryInProgress() flips to false.
+$node_standby->safe_psql('postgres',
+ "SELECT injection_points_attach('promotion-after-wal-segment-cleanup', 'wait');"
+);
+
+# Promote with no-wait so we can synchronize with the injection point.
+$node_standby->safe_psql('postgres', "SELECT pg_promote(false)");
+
+# Wait for startup to pause after removing old timeline WAL segments.
+$node_standby->wait_for_event('startup',
+ 'promotion-after-wal-segment-cleanup');
+
+my $stream_dir = $node_primary->basedir . '/archive_wal';
+mkdir($stream_dir);
+
+my $log_offset = -s $node_standby->logfile;
+
+# Start pg_receivewal
+$handle = IPC::Run::start(
+ [
+ 'pg_receivewal',
+ '--directory' => $stream_dir,
+ '--dbname' => $node_standby->connstr('postgres'),
+ '--slot' => 'physical_standby',
+ '--no-loop',
+ ],
+ '>' => \$stdout,
+ '2>' => \$stderr,
+ IPC::Run::timeout($default_timeout));
+
+# XXX wait a bit to make sure pg_receivewal has started
+sleep(1);
+
+# Check the log content
+ok( !$node_standby->log_contains(
+ "requested WAL segment [0-9A-F]+ has already been removed",
+ $log_offset),
+ 'check that required WAL segments are still available');
+
+done_testing();
--
2.52.0
[application/octet-stream] 0002-Fix-race-condition-in-streaming-replication-timeline.patch (1.4K, 3-0002-Fix-race-condition-in-streaming-replication-timeline.patch)
download | inline diff:
From 6730d101000ada2a1aa1fe8d5bd17f47b96f4b8b Mon Sep 17 00:00:00 2001
From: Hayato Kuroda <[email protected]>
Date: Tue, 9 Jun 2026 13:01:19 +0900
Subject: [PATCH 2/2] Fix race condition in streaming replication timeline
selection during promotion
---
src/backend/replication/walsender.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e80ed052077..f2e9bbe9d95 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -3389,7 +3389,7 @@ XLogSendPhysical(void)
*/
bool becameHistoric = false;
- SendRqstPtr = GetStandbyFlushRecPtr(&SendRqstTLI);
+ SendRqstPtr = GetStandbyFlushRecPtr(NULL);
if (!RecoveryInProgress())
{
@@ -3400,9 +3400,22 @@ XLogSendPhysical(void)
}
else
{
+ TimeLineID insertTLI;
+
+ /*
+ * If the insertion timeline has already been set, use it. See
+ * logical_read_xlog_page() for details.
+ */
+ insertTLI = GetWALInsertionTimeLine();
+
+ if (insertTLI != 0)
+ SendRqstTLI = insertTLI;
+ else
+ GetXLogReplayRecPtr(&SendRqstTLI);
+
/*
- * Still a cascading standby. But is the timeline we're sending
- * still the one recovery is recovering from?
+ * Is the timeline we're sending still the one recovery is
+ * recovering from?
*/
if (sendTimeLine != SendRqstTLI)
becameHistoric = true;
--
2.52.0
view thread (24+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected], [email protected]
Subject: RE: t/035_standby_logical_decoding.pl might fail on attempt to read wrong timeline
In-Reply-To: <TYRPR01MB12156677791D04E4E858F3FCCF51D2@TYRPR01MB12156.jpnprd01.prod.outlook.com>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox