public inbox for [email protected]
help / color / mirror / Atom feedFrom: Anthonin Bonnefoy <[email protected]>
To: Andres Freund <[email protected]>
Cc: Fujii Masao <[email protected]>
Cc: Alexander Lakhin <[email protected]>
Cc: PostgreSQL Hackers <[email protected]>
Subject: Re: Shutdown indefinitely stuck due to unflushed FPI_FOR_HINT record
Date: Thu, 12 Mar 2026 15:07:59 +0100
Message-ID: <CAO6_Xqq=-KnxrFQvmyF++XH4ngtXVhcDB979rEn6SPtUhSmNYg@mail.gmail.com> (raw)
In-Reply-To: <vzguaguldbcyfbyuq76qj7hx5qdr5kmh67gqkncyb2yhsygrdt@dfhcpteqifux>
References: <CAHGQGwHFKF+x4E+SqedMCnmLCitxjTUUtSyL_+mMeuq-GbEt6w@mail.gmail.com>
<CAO6_Xqp+ADb6KZVWLMALu3xmwVUEO8S1EiCnp38mG6BrHrEnuA@mail.gmail.com>
<CAO6_XqqKDV+AuP=Gf4kRKPqzyYTsOyGd3LE8Jqkwi7EMPJpbhA@mail.gmail.com>
<CAHGQGwHc5yH4Nxp59KXJP0kAr61j3W7QeSKT2HxVjZa3OrLzmg@mail.gmail.com>
<CAO6_Xqq1h6kggb1o206rgouPS0H5jnjahzZ0We-9ggnBjB2JsA@mail.gmail.com>
<CAHGQGwFJnNUOMiW9wR-2WjSKzzj0wV8p55J8bnJ6mik=z0oFPQ@mail.gmail.com>
<[email protected]>
<CAO6_Xqq73TPa3M6nQ7RqRhKkcphy1JX7aNGTYy-x_Sn+6a8Z_Q@mail.gmail.com>
<CAHGQGwGvnpN=2bo+F7H90YLFcx9=SazwLkcx+0gEcrbQy5NVZg@mail.gmail.com>
<CAHGQGwECpyJtMqkCEvyqgZDiwAeMj3RKobui7jONrDd35W0x3Q@mail.gmail.com>
<vzguaguldbcyfbyuq76qj7hx5qdr5kmh67gqkncyb2yhsygrdt@dfhcpteqifux>
On Tue, Mar 10, 2026 at 6:11 PM Andres Freund <[email protected]> wrote:
> I'm pretty sure this is not correct as-is, it suffers from the same issue as
> https://postgr.es/m/vf4hbwrotvhbgcnknrqmfbqlu75oyjkmausvy66ic7x7vuhafx%40e4rvwavtjswo
> I.e. it is not safe to use GetXLogInsertRecPtr() to determine up to where to
> flush to, due to page boundaries.
I've managed to reproduce this issue by ensuring the FPI_FOR_HINT
record finishes at the end of a page with the following script (might
need some adjustment if the record sizes are different):
DROP TABLE IF EXISTS test_insert_rec_ptr;
CREATE TABLE test_insert_rec_ptr(aid int, data text) WITH
(autovacuum_enabled = false);
INSERT INTO test_insert_rec_ptr SELECT *, repeat('a', 100) FROM
generate_series(0, 57);
-- This should tag the page as full
BEGIN; UPDATE test_insert_rec_ptr SET aid=2 where aid=1; ROLLBACK;
CHECKPOINT;
-- Start with a fresh file
SELECT pg_switch_wal();
-- Our FPI_FOR_HINT writes 8193 bytes
-- With the long header, the first page has 8152 bytes available
-- With the short header, the second page has 8168 bytes available
-- We want our FPI_FOR_HINT to finish at the end of the second page
(+/- 8 bytes of alignment)
-- We need to write the first 25 bytes (or 32 with alignment) in the first page
-- For that, we need to write 8120 bytes of WAL records
BEGIN;
-- 264 bytes of FPW
INSERT INTO test_insert_rec_ptr VALUES(1);
-- 74 * 104 bytes
INSERT INTO test_insert_rec_ptr SELECT *, repeat('a', 44) FROM
generate_series(1, 74);
-- 108 bytes
INSERT INTO test_insert_rec_ptr VALUES(1, repeat('a', 48));
-- 46 bytes
COMMIT;
-- 264 + 74 * 104 + 46 + 108 = 8114 bytes, which will round up to 8120
with alignment
-- FPI_FOR_HINT record should be at 0x1FE0
BEGIN; SELECT * FROM test_insert_rec_ptr WHERE aid=2; ROLLBACK;
As far as I can tell, the only impact it has is to complain about the
write request being too far:
LOG: request to flush past end of generated WAL; request 0/01604018,
current position 0/01604000
ERROR: xlog flush request 0/01604018 is not satisfied --- flushed
only to 0/01604000
To avoid this issue, it sounds like we need something to use
XLogBytePosToEndRecPtr instead of XLogBytePosToRecPtr to convert the
byte position? With XLogBytePosToRecPtr(), the flush request would
stop at 01604000 instead of going to the next page with 01604018.
In the attached patch, I've added a GetXLogInsertEndRecPtr() function
which is similar to GetXLogInsertRecPtr(), except it uses
XLogBytePosToEndRecPtr() to stop at the page boundary.
There was also another XLogFlush(GetXLogWriteRecPtr()) call in
syncutils.c, so I replaced both calls with
XLogFlush(GetXLogInsertEndRecPtr()).
Regards,
Anthonin Bonnefoy
Attachments:
[application/octet-stream] v1-0001-Fix-flushing-record-ending-at-page-boundary.patch (3.4K, 2-v1-0001-Fix-flushing-record-ending-at-page-boundary.patch)
download | inline diff:
From 5cfdb02ef6d0697e5670356345fc89af78d1eeb9 Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <[email protected]>
Date: Thu, 12 Mar 2026 14:29:21 +0100
Subject: Fix flushing record ending at page boundary
In 6eedb2a5fd, a call to XLogFlush(GetXLogInsertRecPtr()) has been added
to allow walsender to flush the latest WAL record. However, if the last
record is at the end of a page, GetXLogInsertRecPtr() will return the
start position for the next record, which will be located in the next
page, after the page header.
XLogInsert will complain with a 'xlog flush request 0/03604018 is not
satisfied --- flushed only to 0/03604000' error, as the flush request
tries to write WAL that hasn't been reserved yet.
This patch fixes the issue by introducing and using a
GetXLogInsertEndRecPtr() which stops at the page boundary, instead of
the beginning of the next page.
---
src/backend/access/transam/xlog.c | 18 ++++++++++++++++++
src/backend/replication/logical/syncutils.c | 2 +-
src/backend/replication/walsender.c | 2 +-
src/include/access/xlog.h | 1 +
4 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index b9b678f3722..9fd90636ee1 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -9595,6 +9595,24 @@ GetXLogInsertRecPtr(void)
return XLogBytePosToRecPtr(current_bytepos);
}
+/*
+ * Like GetXLogInsertRecPtr, but if the position is at a page boundary, returns
+ * a pointer to the beginning of the page (ie. before page header), not to where
+ * the first xlog record on that page would go to.
+ */
+XLogRecPtr
+GetXLogInsertEndRecPtr(void)
+{
+ XLogCtlInsert *Insert = &XLogCtl->Insert;
+ uint64 current_bytepos;
+
+ SpinLockAcquire(&Insert->insertpos_lck);
+ current_bytepos = Insert->CurrBytePos;
+ SpinLockRelease(&Insert->insertpos_lck);
+
+ return XLogBytePosToEndRecPtr(current_bytepos);
+}
+
/*
* Get latest WAL write pointer
*/
diff --git a/src/backend/replication/logical/syncutils.c b/src/backend/replication/logical/syncutils.c
index ef61ca0437d..8c5da44d42e 100644
--- a/src/backend/replication/logical/syncutils.c
+++ b/src/backend/replication/logical/syncutils.c
@@ -62,7 +62,7 @@ FinishSyncWorker(void)
}
/* And flush all writes. */
- XLogFlush(GetXLogWriteRecPtr());
+ XLogFlush(GetXLogInsertEndRecPtr());
if (am_sequencesync_worker())
{
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 79fc192b171..dd46de7bcd6 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1887,7 +1887,7 @@ WalSndWaitForWal(XLogRecPtr loc)
* written, because walwriter has shut down already.
*/
if (got_STOPPING && !RecoveryInProgress())
- XLogFlush(GetXLogInsertRecPtr());
+ XLogFlush(GetXLogInsertEndRecPtr());
/*
* To avoid the scenario where standbys need to catch up to a newer
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index fdfb572467b..958f39edda4 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -238,6 +238,7 @@ extern bool RecoveryInProgress(void);
extern RecoveryState GetRecoveryState(void);
extern bool XLogInsertAllowed(void);
extern XLogRecPtr GetXLogInsertRecPtr(void);
+extern XLogRecPtr GetXLogInsertEndRecPtr(void);
extern XLogRecPtr GetXLogWriteRecPtr(void);
extern uint64 GetSystemIdentifier(void);
--
2.53.0
view thread (17+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected], [email protected]
Subject: Re: Shutdown indefinitely stuck due to unflushed FPI_FOR_HINT record
In-Reply-To: <CAO6_Xqq=-KnxrFQvmyF++XH4ngtXVhcDB979rEn6SPtUhSmNYg@mail.gmail.com>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox