public inbox for [email protected]
help / color / mirror / Atom feedFrom: Anthonin Bonnefoy <[email protected]>
To: Mircea Cadariu <[email protected]>
Cc: PostgreSQL Hackers <[email protected]>
Cc: Japin Li <[email protected]>
Subject: Re: Propagate XLogFindNextRecord error to callers
Date: Thu, 12 Feb 2026 08:43:03 +0100
Message-ID: <CAO6_XqptRqW5b3=BXZ3=OP2YL+7X_CRayCiRwpOySwuHJ2WYtw@mail.gmail.com> (raw)
In-Reply-To: <[email protected]>
References: <CAO6_XqoxJXddcT4wkd9Xd+cD6Sz-fyspRGuV4Bq-wbXG4pVNzA@mail.gmail.com>
<[email protected]>
<CAO6_Xqqd316HhigkBsE-rC8oVsAprc_mVh5Z312D+aUOzW-5Jg@mail.gmail.com>
<MEAPR01MB3031690E672656BB573711B3B665A@MEAPR01MB3031.ausprd01.prod.outlook.com>
<CAO6_Xqqv5F2cdgs5zCVzLJMOuO6zStsC97gcm41dFNSWOh6B8A@mail.gmail.com>
<[email protected]>
<CAO6_XqrUzBT70kE20uHH=FWyfGJaTDMwqv++yNppdusywsG8-w@mail.gmail.com>
<[email protected]>
On Wed, Feb 11, 2026 at 1:16 PM Mircea Cadariu <[email protected]> wrote:
> Indeed. Should we place this initialisation at the top of
> XLogFindNextRecord, before any processing? At that point, there's
> nothing to erase.
That makes sense, I've added the '*errormsg = NULL' at the top of the function.
> You can consider capitalising.
Done
Attachments:
[application/octet-stream] v5-0001-Propagate-errormsg-to-XLogFindNextRecord-caller.patch (8.2K, 2-v5-0001-Propagate-errormsg-to-XLogFindNextRecord-caller.patch)
download | inline diff:
From 3d3ca5ab3c6f6d2e7c275758dbb951e84d289e5e Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <[email protected]>
Date: Fri, 12 Dec 2025 09:56:00 +0100
Subject: Propagate errormsg to XLogFindNextRecord caller
Currently, XLogFindNextRecord errormsg is ignored and callers will only
output a generic 'could not find a valid record' message without
details.
Additionally, invalid page header won't go through XLogReadRecord,
leaving the error in state->errormsg_buf.
This patch propagates XLogFindNextRecord's error message to the callers
and displays it. In case of an invalid page header, the errormsg is
filled with errormsg_buf content.
---
contrib/pg_walinspect/pg_walinspect.c | 16 ++++++++++++----
src/backend/access/transam/xlogreader.c | 18 +++++++++++++++---
src/backend/postmaster/walsummarizer.c | 17 ++++++++++++-----
src/bin/pg_waldump/pg_waldump.c | 12 +++++++++---
src/bin/pg_waldump/t/001_basic.pl | 23 +++++++++++++++++++++++
src/include/access/xlogreader.h | 2 +-
6 files changed, 72 insertions(+), 16 deletions(-)
diff --git a/contrib/pg_walinspect/pg_walinspect.c b/contrib/pg_walinspect/pg_walinspect.c
index 716a0922c6b..98dd31b69bb 100644
--- a/contrib/pg_walinspect/pg_walinspect.c
+++ b/contrib/pg_walinspect/pg_walinspect.c
@@ -97,6 +97,7 @@ InitXLogReaderState(XLogRecPtr lsn)
XLogReaderState *xlogreader;
ReadLocalXLogPageNoWaitPrivate *private_data;
XLogRecPtr first_valid_record;
+ char *errormsg;
/*
* Reading WAL below the first page of the first segments isn't allowed.
@@ -124,12 +125,19 @@ InitXLogReaderState(XLogRecPtr lsn)
errdetail("Failed while allocating a WAL reading processor.")));
/* first find a valid recptr to start from */
- first_valid_record = XLogFindNextRecord(xlogreader, lsn);
+ first_valid_record = XLogFindNextRecord(xlogreader, lsn, &errormsg);
if (!XLogRecPtrIsValid(first_valid_record))
- ereport(ERROR,
- errmsg("could not find a valid record after %X/%08X",
- LSN_FORMAT_ARGS(lsn)));
+ {
+ if (errormsg)
+ ereport(ERROR,
+ errmsg("could not find a valid record after %X/%08X: %s",
+ LSN_FORMAT_ARGS(lsn), errormsg));
+ else
+ ereport(ERROR,
+ errmsg("could not find a valid record after %X/%08X",
+ LSN_FORMAT_ARGS(lsn)));
+ }
return xlogreader;
}
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 03ada8aa0c5..c8aa84e5652 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1391,12 +1391,13 @@ XLogReaderResetError(XLogReaderState *state)
* XLogReadRecord() will read the next valid record.
*/
XLogRecPtr
-XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
+XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
{
XLogRecPtr tmpRecPtr;
XLogRecPtr found = InvalidXLogRecPtr;
XLogPageHeader header;
- char *errormsg;
+
+ *errormsg = NULL;
Assert(XLogRecPtrIsValid(RecPtr));
@@ -1481,7 +1482,7 @@ XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
* or we just jumped over the remaining data of a continuation.
*/
XLogBeginRead(state, tmpRecPtr);
- while (XLogReadRecord(state, &errormsg) != NULL)
+ while (XLogReadRecord(state, errormsg) != NULL)
{
/* past the record we've found, break out */
if (RecPtr <= state->ReadRecPtr)
@@ -1496,6 +1497,17 @@ XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
err:
XLogReaderInvalReadState(state);
+ /*
+ * We may have reported errors due to invalid WAL header, propagate the
+ * error message to the caller.
+ */
+ if (state->errormsg_deferred)
+ {
+ if (state->errormsg_buf[0] != '\0')
+ *errormsg = state->errormsg_buf;
+ state->errormsg_deferred = false;
+ }
+
return InvalidXLogRecPtr;
}
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index 2d8f57099fd..f50976a7f76 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -917,6 +917,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
WalSummaryIO io;
BlockRefTable *brtab = CreateEmptyBlockRefTable();
bool fast_forward = true;
+ char *errormsg;
/* Initialize private data for xlogreader. */
private_data = palloc0_object(SummarizerReadLocalXLogPrivate);
@@ -968,7 +969,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
}
else
{
- summary_start_lsn = XLogFindNextRecord(xlogreader, start_lsn);
+ summary_start_lsn = XLogFindNextRecord(xlogreader, start_lsn, &errormsg);
if (!XLogRecPtrIsValid(summary_start_lsn))
{
/*
@@ -997,9 +998,16 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
switch_lsn = xlogreader->EndRecPtr;
}
else
- ereport(ERROR,
- errmsg("could not find a valid record after %X/%08X",
- LSN_FORMAT_ARGS(start_lsn)));
+ {
+ if (errormsg)
+ ereport(ERROR,
+ errmsg("could not find a valid record after %X/%08X: %s",
+ LSN_FORMAT_ARGS(start_lsn), errormsg));
+ else
+ ereport(ERROR,
+ errmsg("could not find a valid record after %X/%08X",
+ LSN_FORMAT_ARGS(start_lsn)));
+ }
}
/* We shouldn't go backward. */
@@ -1012,7 +1020,6 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
while (1)
{
int block_id;
- char *errormsg;
XLogRecord *record;
uint8 rmid;
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index f3446385d6a..503ba76e559 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -1212,11 +1212,17 @@ main(int argc, char **argv)
pg_fatal("out of memory while allocating a WAL reading processor");
/* first find a valid recptr to start from */
- first_record = XLogFindNextRecord(xlogreader_state, private.startptr);
+ first_record = XLogFindNextRecord(xlogreader_state, private.startptr, &errormsg);
if (!XLogRecPtrIsValid(first_record))
- pg_fatal("could not find a valid record after %X/%08X",
- LSN_FORMAT_ARGS(private.startptr));
+ {
+ if (errormsg)
+ pg_fatal("could not find a valid record after %X/%08X: %s",
+ LSN_FORMAT_ARGS(private.startptr), errormsg);
+ else
+ pg_fatal("could not find a valid record after %X/%08X",
+ LSN_FORMAT_ARGS(private.startptr));
+ }
/*
* Display a message that we're skipping data if `from` wasn't a pointer
diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl
index 5db5d20136f..1898848e418 100644
--- a/src/bin/pg_waldump/t/001_basic.pl
+++ b/src/bin/pg_waldump/t/001_basic.pl
@@ -6,6 +6,7 @@ use warnings FATAL => 'all';
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
+use File::Copy;
program_help_ok('pg_waldump');
program_version_ok('pg_waldump');
@@ -236,6 +237,28 @@ command_fails_like(
qr/error: error in WAL record at/,
'errors are shown with --quiet');
+# Wrong WAL version. We copy an existing wal file and set the
+# page's magic value to 0000.
+{
+ my $broken_wal_dir = PostgreSQL::Test::Utils::tempdir_short();
+ my $broken_wal = "$broken_wal_dir/$start_walfile";
+ copy($node->data_dir . '/pg_wal/' . $start_walfile, $broken_wal)
+ || die "copying $start_walfile $!";
+
+ my $fh;
+ open($fh, '+<', $broken_wal);
+ sysseek($fh, 0, 0)
+ or BAIL_OUT("sysseek failed: $!");
+ syswrite($fh, pack("S", 0))
+ or BAIL_OUT("syswrite failed: $!");
+ close($fh);
+
+ command_fails_like(
+ [ 'pg_waldump', $broken_wal ],
+ qr/invalid magic number 0000/i,
+ 'detailed error message shown for invalid WAL page magic'
+ );
+}
# Test for: Display a message that we're skipping data if `from`
# wasn't a pointer to the start of a record.
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index 80f1a548e08..567df33ac8e 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -342,7 +342,7 @@ extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
/* Position the XLogReader to given record */
extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
-extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
+extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg);
/* Return values from XLogPageReadCB. */
typedef enum XLogPageReadResult
--
2.52.0
view thread (12+ messages) latest in thread
reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Reply to all the recipients using the --to and --cc options:
reply via email
To: [email protected]
Cc: [email protected], [email protected], [email protected]
Subject: Re: Propagate XLogFindNextRecord error to callers
In-Reply-To: <CAO6_XqptRqW5b3=BXZ3=OP2YL+7X_CRayCiRwpOySwuHJ2WYtw@mail.gmail.com>
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox