From 8025d208a5e7bc8a174445849a730e2a2ff7b172 Mon Sep 17 00:00:00 2001 From: "duankunren.dkr" Date: Thu, 19 Mar 2026 22:08:49 +0800 Subject: [PATCH] Fix multixact compat logic via SLRU buffer check --- src/backend/access/transam/multixact.c | 73 ++++++++++++++++++++------ 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 26b8d4e1230..5e18f988e6d 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -897,25 +897,68 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, * multixid was assigned. If we're replaying WAL that was generated by * such a version, the next page might not be initialized yet. Initialize * it now. - */ - if (InRecovery && - next_pageno != pageno && - MultiXactOffsetCtl->shared->latest_page_number == pageno) + * + * The previous condition checked latest_page_number == pageno, but that + * fails after a crash-restart: StartupMultiXact() sets + * latest_page_number to page(checkPoint.nextMulti), which can be + * next_pageno or even higher when the checkpoint captured an advanced + * nextMXact. In that case, the == check doesn't match and we skip + * initialization, causing SimpleLruReadPage(next_pageno) to fail with + * "read too few bytes" because the page doesn't exist on disk. + * + * When latest_page_number == pageno, we know for sure the next page has + * not been initialized yet. Otherwise (e.g. after crash-restart), + * latest_page_number is unreliable, so fall back to checking whether the + * next page exists in the SLRU buffer pool. SimpleLruZeroPage is + * idempotent, and we use pre_initialized_offsets_page to skip the + * subsequent ZERO_OFF_PAGE replay, so this is safe. + */ + if (InRecovery && next_pageno != pageno) { - elog(DEBUG1, "next offsets page is not initialized, initializing it now"); + bool need_init; - /* Create and zero the page */ - slotno = SimpleLruZeroPage(MultiXactOffsetCtl, next_pageno); + if (MultiXactOffsetCtl->shared->latest_page_number == pageno) + { + /* Fast path: latest_page_number confirms next page not initialized */ + need_init = true; + } + else + { + /* + * latest_page_number != pageno, but we may still need to + * initialize. Check SLRU buffer pool to decide. + */ + SlruShared shared = MultiXactOffsetCtl->shared; - /* Make sure it's written out */ - SimpleLruWritePage(MultiXactOffsetCtl, slotno); - Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]); + need_init = true; + for (slotno = 0; slotno < shared->num_slots; slotno++) + { + if (shared->page_number[slotno] == next_pageno && + shared->page_status[slotno] != SLRU_PAGE_EMPTY) + { + need_init = false; + break; + } + } + } - /* - * Remember that we initialized the page, so that we don't zero it - * again at the XLOG_MULTIXACT_ZERO_OFF_PAGE record. - */ - pre_initialized_offsets_page = next_pageno; + if (need_init) + { + elog(DEBUG1, "next offsets page is not initialized, initializing it now"); + + /* Create and zero the page */ + slotno = SimpleLruZeroPage(MultiXactOffsetCtl, next_pageno); + + /* Make sure it's written out */ + SimpleLruWritePage(MultiXactOffsetCtl, slotno); + Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]); + + /* + * Remember that we initialized the page, so that we don't zero it + * again at the XLOG_MULTIXACT_ZERO_OFF_PAGE record. + */ + pre_initialized_offsets_page = next_pageno; + } } /* -- 2.32.0.3.g01195cf9f