From a09e289393ce4b54efcb22aab7b6bed78bc41450 Mon Sep 17 00:00:00 2001 From: Anthonin Bonnefoy Date: Mon, 23 Mar 2026 08:41:44 +0100 Subject: Propagate errormsg to XLogFindNextRecord caller Currently, XLogFindNextRecord errormsg is ignored and callers will only output a generic 'could not find a valid record' message without details. Additionally, invalid page header won't go through XLogReadRecord, leaving the error in state->errormsg_buf. This patch propagates XLogFindNextRecord's error message to the callers and displays it. In case of an invalid page header, the errormsg is filled with errormsg_buf content. --- contrib/pg_walinspect/pg_walinspect.c | 16 ++++++++++---- src/backend/access/transam/xlogreader.c | 24 +++++++++++++++++--- src/backend/postmaster/walsummarizer.c | 17 ++++++++++----- src/bin/pg_waldump/pg_waldump.c | 12 +++++++--- src/bin/pg_waldump/t/001_basic.pl | 29 +++++++++++++++++++++++++ src/include/access/xlogreader.h | 3 ++- 6 files changed, 85 insertions(+), 16 deletions(-) diff --git a/contrib/pg_walinspect/pg_walinspect.c b/contrib/pg_walinspect/pg_walinspect.c index f6f5f0792d2..4cf6e41e2f5 100644 --- a/contrib/pg_walinspect/pg_walinspect.c +++ b/contrib/pg_walinspect/pg_walinspect.c @@ -99,6 +99,7 @@ InitXLogReaderState(XLogRecPtr lsn) XLogReaderState *xlogreader; ReadLocalXLogPageNoWaitPrivate *private_data; XLogRecPtr first_valid_record; + char *errormsg; /* * Reading WAL below the first page of the first segments isn't allowed. @@ -126,12 +127,19 @@ InitXLogReaderState(XLogRecPtr lsn) errdetail("Failed while allocating a WAL reading processor."))); /* first find a valid recptr to start from */ - first_valid_record = XLogFindNextRecord(xlogreader, lsn); + first_valid_record = XLogFindNextRecord(xlogreader, lsn, &errormsg); if (!XLogRecPtrIsValid(first_valid_record)) - ereport(ERROR, - errmsg("could not find a valid record after %X/%08X", - LSN_FORMAT_ARGS(lsn))); + { + if (errormsg) + ereport(ERROR, + errmsg("could not find a valid record after %X/%08X: %s", + LSN_FORMAT_ARGS(lsn), errormsg)); + else + ereport(ERROR, + errmsg("could not find a valid record after %X/%08X", + LSN_FORMAT_ARGS(lsn))); + } return xlogreader; } diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 8cb2110cb99..8849610db00 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1390,14 +1390,21 @@ XLogReaderResetError(XLogReaderState *state) * * This positions the reader, like XLogBeginRead(), so that the next call to * XLogReadRecord() will read the next valid record. + * + * On failure, InvalidXLogRecPtr is returned, and *errormsg is set to a string + * with details of the failure. + * + * When set, *errormsg points to an internal buffer that's valid until the next + * call to XLogReadRecord. */ XLogRecPtr -XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr) +XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg) { XLogRecPtr tmpRecPtr; XLogRecPtr found = InvalidXLogRecPtr; XLogPageHeader header; - char *errormsg; + + *errormsg = NULL; Assert(XLogRecPtrIsValid(RecPtr)); @@ -1482,7 +1489,7 @@ XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr) * or we just jumped over the remaining data of a continuation. */ XLogBeginRead(state, tmpRecPtr); - while (XLogReadRecord(state, &errormsg) != NULL) + while (XLogReadRecord(state, errormsg) != NULL) { /* past the record we've found, break out */ if (RecPtr <= state->ReadRecPtr) @@ -1497,6 +1504,17 @@ XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr) err: XLogReaderInvalReadState(state); + /* + * We may have reported errors due to invalid WAL header, propagate the + * error message to the caller. + */ + if (state->errormsg_deferred) + { + if (state->errormsg_buf[0] != '\0') + *errormsg = state->errormsg_buf; + state->errormsg_deferred = false; + } + return InvalidXLogRecPtr; } diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c index e1aa102f41d..0c0670f7da9 100644 --- a/src/backend/postmaster/walsummarizer.c +++ b/src/backend/postmaster/walsummarizer.c @@ -915,6 +915,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, WalSummaryIO io; BlockRefTable *brtab = CreateEmptyBlockRefTable(); bool fast_forward = true; + char *errormsg; /* Initialize private data for xlogreader. */ private_data = palloc0_object(SummarizerReadLocalXLogPrivate); @@ -966,7 +967,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, } else { - summary_start_lsn = XLogFindNextRecord(xlogreader, start_lsn); + summary_start_lsn = XLogFindNextRecord(xlogreader, start_lsn, &errormsg); if (!XLogRecPtrIsValid(summary_start_lsn)) { /* @@ -995,9 +996,16 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, switch_lsn = xlogreader->EndRecPtr; } else - ereport(ERROR, - errmsg("could not find a valid record after %X/%08X", - LSN_FORMAT_ARGS(start_lsn))); + { + if (errormsg) + ereport(ERROR, + errmsg("could not find a valid record after %X/%08X: %s", + LSN_FORMAT_ARGS(start_lsn), errormsg)); + else + ereport(ERROR, + errmsg("could not find a valid record after %X/%08X", + LSN_FORMAT_ARGS(start_lsn))); + } } /* We shouldn't go backward. */ @@ -1010,7 +1018,6 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, while (1) { int block_id; - char *errormsg; XLogRecord *record; uint8 rmid; diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index f82507ef696..630d9859882 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -1384,11 +1384,17 @@ main(int argc, char **argv) pg_fatal("out of memory while allocating a WAL reading processor"); /* first find a valid recptr to start from */ - first_record = XLogFindNextRecord(xlogreader_state, private.startptr); + first_record = XLogFindNextRecord(xlogreader_state, private.startptr, &errormsg); if (!XLogRecPtrIsValid(first_record)) - pg_fatal("could not find a valid record after %X/%08X", - LSN_FORMAT_ARGS(private.startptr)); + { + if (errormsg) + pg_fatal("could not find a valid record after %X/%08X: %s", + LSN_FORMAT_ARGS(private.startptr), errormsg); + else + pg_fatal("could not find a valid record after %X/%08X", + LSN_FORMAT_ARGS(private.startptr)); + } /* * Display a message that we're skipping data if `from` wasn't a pointer diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl index 11df7e092bf..7ff9e918ff2 100644 --- a/src/bin/pg_waldump/t/001_basic.pl +++ b/src/bin/pg_waldump/t/001_basic.pl @@ -10,6 +10,7 @@ use Test::More; use List::Util qw(shuffle); my $tar = $ENV{TAR}; +use File::Copy; program_help_ok('pg_waldump'); program_version_ok('pg_waldump'); @@ -246,6 +247,34 @@ command_like( qr/^$/, 'no output with --quiet option'); +# Wrong WAL version. We copy an existing wal file and set the +# page's magic value to 0000. +{ + # The broken WAL file is created by copying a valid WAL file and + # overwriting its magic number with 0000. + my $broken_wal_dir = PostgreSQL::Test::Utils::tempdir_short(); + my $broken_wal = "$broken_wal_dir/$start_walfile"; + copy($node->data_dir . '/pg_wal/' . $start_walfile, $broken_wal) + || die "copying $start_walfile $!"; + + my $fh; + open($fh, '+<', $broken_wal) + or BAIL_OUT("open failed: $!"); + binmode $fh; + sysseek($fh, 0, 0) + or BAIL_OUT("sysseek failed: $!"); + syswrite($fh, pack("S", 0)) + or BAIL_OUT("syswrite failed: $!"); + close($fh) + or BAIL_OUT("close failed: $!"); + + command_fails_like( + [ 'pg_waldump', $broken_wal ], + qr/invalid magic number 0000/i, + 'detailed error message shown for invalid WAL page magic' + ); +} + # Test for: Display a message that we're skipping data if `from` # wasn't a pointer to the start of a record. sub test_pg_waldump_skip_bytes diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 80f1a548e08..c2a48bfe809 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -342,7 +342,8 @@ extern void XLogReaderSetDecodeBuffer(XLogReaderState *state, /* Position the XLogReader to given record */ extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr); -extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr); +extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr, + char **errormsg); /* Return values from XLogPageReadCB. */ typedef enum XLogPageReadResult -- 2.53.0