From 83972cf9cd1e7bf057a0ff4f3969840ab0fb771a Mon Sep 17 00:00:00 2001 From: Anthonin Bonnefoy Date: Fri, 12 Dec 2025 09:56:00 +0100 Subject: Propagate errormsg to XLogFindNextRecord caller Currently, XLogFindNextRecord errormsg is ignored and callers will only output a generic 'could not find a valid record' message without details. Additionally, invalid page header won't go through XLogReadRecord, leaving the error in state->errormsg_buf. This patch propagates XLogFindNextRecord's error message to the callers and displays it. In case of an invalid page header, the errormsg is filled with errormsg_buf content. --- contrib/pg_walinspect/pg_walinspect.c | 16 ++++++++++++---- src/backend/access/transam/xlogreader.c | 24 +++++++++++++++++++++--- src/backend/postmaster/walsummarizer.c | 17 ++++++++++++----- src/bin/pg_waldump/pg_waldump.c | 12 +++++++++--- src/bin/pg_waldump/t/001_basic.pl | 23 +++++++++++++++++++++++ src/include/access/xlogreader.h | 3 ++- 6 files changed, 79 insertions(+), 16 deletions(-) diff --git a/contrib/pg_walinspect/pg_walinspect.c b/contrib/pg_walinspect/pg_walinspect.c index 716a0922c6b..98dd31b69bb 100644 --- a/contrib/pg_walinspect/pg_walinspect.c +++ b/contrib/pg_walinspect/pg_walinspect.c @@ -97,6 +97,7 @@ InitXLogReaderState(XLogRecPtr lsn) XLogReaderState *xlogreader; ReadLocalXLogPageNoWaitPrivate *private_data; XLogRecPtr first_valid_record; + char *errormsg; /* * Reading WAL below the first page of the first segments isn't allowed. @@ -124,12 +125,19 @@ InitXLogReaderState(XLogRecPtr lsn) errdetail("Failed while allocating a WAL reading processor."))); /* first find a valid recptr to start from */ - first_valid_record = XLogFindNextRecord(xlogreader, lsn); + first_valid_record = XLogFindNextRecord(xlogreader, lsn, &errormsg); if (!XLogRecPtrIsValid(first_valid_record)) - ereport(ERROR, - errmsg("could not find a valid record after %X/%08X", - LSN_FORMAT_ARGS(lsn))); + { + if (errormsg) + ereport(ERROR, + errmsg("could not find a valid record after %X/%08X: %s", + LSN_FORMAT_ARGS(lsn), errormsg)); + else + ereport(ERROR, + errmsg("could not find a valid record after %X/%08X", + LSN_FORMAT_ARGS(lsn))); + } return xlogreader; } diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 03ada8aa0c5..e079053ec1a 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1389,14 +1389,21 @@ XLogReaderResetError(XLogReaderState *state) * * This positions the reader, like XLogBeginRead(), so that the next call to * XLogReadRecord() will read the next valid record. + * + * On failure, InvalidXLogRecPtr is returned, and *errormsg is set to a string + * with details of the failure. + * + * When set, *errormsg points to an internal buffer that's valid until the next + * call to XLogReadRecord. */ XLogRecPtr -XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr) +XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg) { XLogRecPtr tmpRecPtr; XLogRecPtr found = InvalidXLogRecPtr; XLogPageHeader header; - char *errormsg; + + *errormsg = NULL; Assert(XLogRecPtrIsValid(RecPtr)); @@ -1481,7 +1488,7 @@ XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr) * or we just jumped over the remaining data of a continuation. */ XLogBeginRead(state, tmpRecPtr); - while (XLogReadRecord(state, &errormsg) != NULL) + while (XLogReadRecord(state, errormsg) != NULL) { /* past the record we've found, break out */ if (RecPtr <= state->ReadRecPtr) @@ -1496,6 +1503,17 @@ XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr) err: XLogReaderInvalReadState(state); + /* + * We may have reported errors due to invalid WAL header, propagate the + * error message to the caller. + */ + if (state->errormsg_deferred) + { + if (state->errormsg_buf[0] != '\0') + *errormsg = state->errormsg_buf; + state->errormsg_deferred = false; + } + return InvalidXLogRecPtr; } diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c index 742137edad6..3579c696ac7 100644 --- a/src/backend/postmaster/walsummarizer.c +++ b/src/backend/postmaster/walsummarizer.c @@ -914,6 +914,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, WalSummaryIO io; BlockRefTable *brtab = CreateEmptyBlockRefTable(); bool fast_forward = true; + char *errormsg; /* Initialize private data for xlogreader. */ private_data = palloc0_object(SummarizerReadLocalXLogPrivate); @@ -965,7 +966,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, } else { - summary_start_lsn = XLogFindNextRecord(xlogreader, start_lsn); + summary_start_lsn = XLogFindNextRecord(xlogreader, start_lsn, &errormsg); if (!XLogRecPtrIsValid(summary_start_lsn)) { /* @@ -994,9 +995,16 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, switch_lsn = xlogreader->EndRecPtr; } else - ereport(ERROR, - errmsg("could not find a valid record after %X/%08X", - LSN_FORMAT_ARGS(start_lsn))); + { + if (errormsg) + ereport(ERROR, + errmsg("could not find a valid record after %X/%08X: %s", + LSN_FORMAT_ARGS(start_lsn), errormsg)); + else + ereport(ERROR, + errmsg("could not find a valid record after %X/%08X", + LSN_FORMAT_ARGS(start_lsn))); + } } /* We shouldn't go backward. */ @@ -1009,7 +1017,6 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, while (1) { int block_id; - char *errormsg; XLogRecord *record; uint8 rmid; diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index f3446385d6a..503ba76e559 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -1212,11 +1212,17 @@ main(int argc, char **argv) pg_fatal("out of memory while allocating a WAL reading processor"); /* first find a valid recptr to start from */ - first_record = XLogFindNextRecord(xlogreader_state, private.startptr); + first_record = XLogFindNextRecord(xlogreader_state, private.startptr, &errormsg); if (!XLogRecPtrIsValid(first_record)) - pg_fatal("could not find a valid record after %X/%08X", - LSN_FORMAT_ARGS(private.startptr)); + { + if (errormsg) + pg_fatal("could not find a valid record after %X/%08X: %s", + LSN_FORMAT_ARGS(private.startptr), errormsg); + else + pg_fatal("could not find a valid record after %X/%08X", + LSN_FORMAT_ARGS(private.startptr)); + } /* * Display a message that we're skipping data if `from` wasn't a pointer diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl index 5db5d20136f..1898848e418 100644 --- a/src/bin/pg_waldump/t/001_basic.pl +++ b/src/bin/pg_waldump/t/001_basic.pl @@ -6,6 +6,7 @@ use warnings FATAL => 'all'; use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More; +use File::Copy; program_help_ok('pg_waldump'); program_version_ok('pg_waldump'); @@ -236,6 +237,28 @@ command_fails_like( qr/error: error in WAL record at/, 'errors are shown with --quiet'); +# Wrong WAL version. We copy an existing wal file and set the +# page's magic value to 0000. +{ + my $broken_wal_dir = PostgreSQL::Test::Utils::tempdir_short(); + my $broken_wal = "$broken_wal_dir/$start_walfile"; + copy($node->data_dir . '/pg_wal/' . $start_walfile, $broken_wal) + || die "copying $start_walfile $!"; + + my $fh; + open($fh, '+<', $broken_wal); + sysseek($fh, 0, 0) + or BAIL_OUT("sysseek failed: $!"); + syswrite($fh, pack("S", 0)) + or BAIL_OUT("syswrite failed: $!"); + close($fh); + + command_fails_like( + [ 'pg_waldump', $broken_wal ], + qr/invalid magic number 0000/i, + 'detailed error message shown for invalid WAL page magic' + ); +} # Test for: Display a message that we're skipping data if `from` # wasn't a pointer to the start of a record. diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 80f1a548e08..c2a48bfe809 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -342,7 +342,8 @@ extern void XLogReaderSetDecodeBuffer(XLogReaderState *state, /* Position the XLogReader to given record */ extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr); -extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr); +extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr, + char **errormsg); /* Return values from XLogPageReadCB. */ typedef enum XLogPageReadResult -- 2.52.0