From ee1ff0f2e7322f0e034083d37ceab3d8a8ff374d Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Thu, 11 Dec 2025 10:48:13 -0500
Subject: [PATCH v26 02/15] Eliminate use of cached VM value in
 lazy_scan_prune()

lazy_scan_prune() takes a parameter from lazy_scan_heap() indicating
whether the page was marked all-visible in the VM at the time it was
last checked in find_next_unskippable_block(). This behavior is
historical, dating back to commit 608195a3a365, when we did not pin the
VM page until confirming it was not all-visible. Now that the VM page is
already pinned, there is no meaningful benefit to relying on a cached VM
status.

Removing this cached value simplifies the logic in both lazy_scan_heap()
and lazy_scan_prune(). It also clarifies future work that will set the
visibility map on-access: such paths will not have a cached value
available which would make the logic harder to reason about. Eliminating
it also enables us to detect and repair VM corruption on-access.

Along with removing the cached value and unconditionally checking the
visibility status of the heap page, this commit also moves the VM
corruption handling to occur first. This reordering should have no
performance impact, since the checks are inexpensive and performed only
once per page. It does, however, make the control flow easier to
understand. The new restructuring also makes it possible that after
fixing corruption, the VM could be newly set, if pruning found the page
all-visible.

Author: Melanie Plageman <melanieplageman@gmail.com>
Discussion: https://postgr.es/m/5CEAA162-67B1-44DA-B60D-8B65717E8B05%40gmail.com
---
 src/backend/access/heap/vacuumlazy.c | 172 ++++++++++++---------------
 1 file changed, 79 insertions(+), 93 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 811e7e33678..436143cd12c 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -253,7 +253,6 @@ typedef enum
  * about the block it read to the caller.
  */
 #define VAC_BLK_WAS_EAGER_SCANNED (1 << 0)
-#define VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM (1 << 1)
 
 typedef struct LVRelState
 {
@@ -358,7 +357,6 @@ typedef struct LVRelState
 	/* State maintained by heap_vac_scan_next_block() */
 	BlockNumber current_block;	/* last block returned */
 	BlockNumber next_unskippable_block; /* next unskippable block */
-	bool		next_unskippable_allvis;	/* its visibility status */
 	bool		next_unskippable_eager_scanned; /* if it was eagerly scanned */
 	Buffer		next_unskippable_vmbuffer;	/* buffer containing its VM bit */
 
@@ -432,7 +430,7 @@ static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
 								   bool sharelock, Buffer vmbuffer);
 static int	lazy_scan_prune(LVRelState *vacrel, Buffer buf,
 							BlockNumber blkno, Page page,
-							Buffer vmbuffer, bool all_visible_according_to_vm,
+							Buffer vmbuffer,
 							bool *has_lpdead_items, bool *vm_page_frozen);
 static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
 							  BlockNumber blkno, Page page,
@@ -1249,7 +1247,6 @@ lazy_scan_heap(LVRelState *vacrel)
 	/* Initialize for the first heap_vac_scan_next_block() call */
 	vacrel->current_block = InvalidBlockNumber;
 	vacrel->next_unskippable_block = InvalidBlockNumber;
-	vacrel->next_unskippable_allvis = false;
 	vacrel->next_unskippable_eager_scanned = false;
 	vacrel->next_unskippable_vmbuffer = InvalidBuffer;
 
@@ -1265,13 +1262,13 @@ lazy_scan_heap(LVRelState *vacrel)
 										MAIN_FORKNUM,
 										heap_vac_scan_next_block,
 										vacrel,
-										sizeof(uint8));
+										sizeof(bool));
 
 	while (true)
 	{
 		Buffer		buf;
 		Page		page;
-		uint8		blk_info = 0;
+		bool		was_eager_scanned = false;
 		int			ndeleted = 0;
 		bool		has_lpdead_items;
 		void	   *per_buffer_data = NULL;
@@ -1340,13 +1337,13 @@ lazy_scan_heap(LVRelState *vacrel)
 		if (!BufferIsValid(buf))
 			break;
 
-		blk_info = *((uint8 *) per_buffer_data);
+		was_eager_scanned = *((bool *) per_buffer_data);
 		CheckBufferIsPinnedOnce(buf);
 		page = BufferGetPage(buf);
 		blkno = BufferGetBlockNumber(buf);
 
 		vacrel->scanned_pages++;
-		if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
+		if (was_eager_scanned)
 			vacrel->eager_scanned_pages++;
 
 		/* Report as block scanned, update error traceback information */
@@ -1417,7 +1414,6 @@ lazy_scan_heap(LVRelState *vacrel)
 		if (got_cleanup_lock)
 			ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
 									   vmbuffer,
-									   blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
 									   &has_lpdead_items, &vm_page_frozen);
 
 		/*
@@ -1434,8 +1430,7 @@ lazy_scan_heap(LVRelState *vacrel)
 		 * exclude pages skipped due to cleanup lock contention from eager
 		 * freeze algorithm caps.
 		 */
-		if (got_cleanup_lock &&
-			(blk_info & VAC_BLK_WAS_EAGER_SCANNED))
+		if (got_cleanup_lock && was_eager_scanned)
 		{
 			/* Aggressive vacuums do not eager scan. */
 			Assert(!vacrel->aggressive);
@@ -1602,7 +1597,6 @@ heap_vac_scan_next_block(ReadStream *stream,
 {
 	BlockNumber next_block;
 	LVRelState *vacrel = callback_private_data;
-	uint8		blk_info = 0;
 
 	/* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
 	next_block = vacrel->current_block + 1;
@@ -1665,8 +1659,8 @@ heap_vac_scan_next_block(ReadStream *stream,
 		 * otherwise they would've been unskippable.
 		 */
 		vacrel->current_block = next_block;
-		blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
-		*((uint8 *) per_buffer_data) = blk_info;
+		/* Block was not eager scanned */
+		*((bool *) per_buffer_data) = false;
 		return vacrel->current_block;
 	}
 	else
@@ -1678,11 +1672,7 @@ heap_vac_scan_next_block(ReadStream *stream,
 		Assert(next_block == vacrel->next_unskippable_block);
 
 		vacrel->current_block = next_block;
-		if (vacrel->next_unskippable_allvis)
-			blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
-		if (vacrel->next_unskippable_eager_scanned)
-			blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
-		*((uint8 *) per_buffer_data) = blk_info;
+		*((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
 		return vacrel->current_block;
 	}
 }
@@ -1707,7 +1697,6 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
 	BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
 	Buffer		next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
 	bool		next_unskippable_eager_scanned = false;
-	bool		next_unskippable_allvis;
 
 	*skipsallvis = false;
 
@@ -1717,7 +1706,6 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
 													   next_unskippable_block,
 													   &next_unskippable_vmbuffer);
 
-		next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
 
 		/*
 		 * At the start of each eager scan region, normal vacuums with eager
@@ -1736,7 +1724,7 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
 		 * A block is unskippable if it is not all visible according to the
 		 * visibility map.
 		 */
-		if (!next_unskippable_allvis)
+		if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
 		{
 			Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
 			break;
@@ -1793,7 +1781,6 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
 
 	/* write the local variables back to vacrel */
 	vacrel->next_unskippable_block = next_unskippable_block;
-	vacrel->next_unskippable_allvis = next_unskippable_allvis;
 	vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
 	vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
 }
@@ -1954,9 +1941,7 @@ cmpOffsetNumbers(const void *a, const void *b)
  * Caller must hold pin and buffer cleanup lock on the buffer.
  *
  * vmbuffer is the buffer containing the VM block with visibility information
- * for the heap block, blkno. all_visible_according_to_vm is the saved
- * visibility status of the heap block looked up earlier by the caller. We
- * won't rely entirely on this status, as it may be out of date.
+ * for the heap block, blkno.
  *
  * *has_lpdead_items is set to true or false depending on whether, upon return
  * from this function, any LP_DEAD items are still present on the page.
@@ -1973,7 +1958,6 @@ lazy_scan_prune(LVRelState *vacrel,
 				BlockNumber blkno,
 				Page page,
 				Buffer vmbuffer,
-				bool all_visible_according_to_vm,
 				bool *has_lpdead_items,
 				bool *vm_page_frozen)
 {
@@ -1987,6 +1971,8 @@ lazy_scan_prune(LVRelState *vacrel,
 		.vistest = vacrel->vistest,
 		.cutoffs = &vacrel->cutoffs,
 	};
+	uint8		old_vmbits = 0;
+	uint8		new_vmbits = 0;
 
 	Assert(BufferGetBlockNumber(buf) == blkno);
 
@@ -2089,70 +2075,7 @@ lazy_scan_prune(LVRelState *vacrel,
 	Assert(!presult.all_visible || !(*has_lpdead_items));
 	Assert(!presult.all_frozen || presult.all_visible);
 
-	/*
-	 * Handle setting visibility map bit based on information from the VM (as
-	 * of last heap_vac_scan_next_block() call), and from all_visible and
-	 * all_frozen variables
-	 */
-	if ((presult.all_visible && !all_visible_according_to_vm) ||
-		(presult.all_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer)))
-	{
-		uint8		old_vmbits;
-		uint8		new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
-
-		if (presult.all_frozen)
-			new_vmbits |= VISIBILITYMAP_ALL_FROZEN;
-
-		/*
-		 * It should never be the case that the visibility map page is set
-		 * while the page-level bit is clear, but the reverse is allowed (if
-		 * checksums are not enabled).  Regardless, set both bits so that we
-		 * get back in sync.
-		 *
-		 * Even if PD_ALL_VISIBLE is already set, we don't need to worry about
-		 * unnecessarily dirtying the heap buffer, as it must be marked dirty
-		 * before adding it to the WAL chain. The only scenario where it is
-		 * not already dirty is if the VM was removed, and that isn't worth
-		 * optimizing for.
-		 */
-		PageSetAllVisible(page);
-		MarkBufferDirty(buf);
-
-		/*
-		 * If the page is being set all-frozen, we pass InvalidTransactionId
-		 * as the cutoff_xid, since a snapshot conflict horizon sufficient to
-		 * make everything safe for REDO was logged when the page's tuples
-		 * were frozen.
-		 */
-		Assert(!presult.all_frozen ||
-			   !TransactionIdIsValid(presult.vm_conflict_horizon));
-
-		old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
-									   InvalidXLogRecPtr,
-									   vmbuffer,
-									   presult.vm_conflict_horizon,
-									   new_vmbits);
-
-		/*
-		 * If the page wasn't already set all-visible and/or all-frozen in the
-		 * VM, count it as newly set for logging.
-		 */
-		if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
-		{
-			vacrel->vm_new_visible_pages++;
-			if (presult.all_frozen)
-			{
-				vacrel->vm_new_visible_frozen_pages++;
-				*vm_page_frozen = true;
-			}
-		}
-		else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
-				 presult.all_frozen)
-		{
-			vacrel->vm_new_frozen_pages++;
-			*vm_page_frozen = true;
-		}
-	}
+	old_vmbits = visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer);
 
 	/*
 	 * As of PostgreSQL 9.2, the visibility map bit should never be set if the
@@ -2160,8 +2083,8 @@ lazy_scan_prune(LVRelState *vacrel,
 	 * cleared after heap_vac_scan_next_block() was called, so we must recheck
 	 * with buffer lock before concluding that the VM is corrupt.
 	 */
-	else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
-			 visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
+	if (!PageIsAllVisible(page) &&
+		(old_vmbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
 	{
 		ereport(WARNING,
 				(errcode(ERRCODE_DATA_CORRUPTED),
@@ -2199,6 +2122,69 @@ lazy_scan_prune(LVRelState *vacrel,
 							VISIBILITYMAP_VALID_BITS);
 	}
 
+	if (!presult.all_visible)
+		return presult.ndeleted;
+
+	/* Set the visibility map and page visibility hint */
+	new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
+
+	if (presult.all_frozen)
+		new_vmbits |= VISIBILITYMAP_ALL_FROZEN;
+
+	/* Nothing to do */
+	if (old_vmbits == new_vmbits)
+		return presult.ndeleted;
+
+	Assert(presult.all_visible);
+
+	/*
+	 * It should never be the case that the visibility map page is set while
+	 * the page-level bit is clear, but the reverse is allowed (if checksums
+	 * are not enabled).  Regardless, set both bits so that we get back in
+	 * sync.
+	 *
+	 * Even if PD_ALL_VISIBLE is already set, we don't need to worry about
+	 * unnecessarily dirtying the heap buffer, as it must be marked dirty
+	 * before adding it to the WAL chain. The only scenario where it is not
+	 * already dirty is if the VM was removed, and that isn't worth optimizing
+	 * for.
+	 */
+	PageSetAllVisible(page);
+	MarkBufferDirty(buf);
+
+	/*
+	 * If the page is being set all-frozen, we pass InvalidTransactionId as
+	 * the cutoff_xid, since a snapshot conflict horizon sufficient to make
+	 * everything safe for REDO was logged when the page's tuples were frozen.
+	 */
+	Assert(!presult.all_frozen ||
+		   !TransactionIdIsValid(presult.vm_conflict_horizon));
+
+	visibilitymap_set(vacrel->rel, blkno, buf,
+					  InvalidXLogRecPtr,
+					  vmbuffer, presult.vm_conflict_horizon,
+					  new_vmbits);
+
+	/*
+	 * If the page wasn't already set all-visible and/or all-frozen in the VM,
+	 * count it as newly set for logging.
+	 */
+	if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
+	{
+		vacrel->vm_new_visible_pages++;
+		if (presult.all_frozen)
+		{
+			vacrel->vm_new_visible_frozen_pages++;
+			*vm_page_frozen = true;
+		}
+	}
+	else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
+			 presult.all_frozen)
+	{
+		vacrel->vm_new_frozen_pages++;
+		*vm_page_frozen = true;
+	}
+
 	return presult.ndeleted;
 }
 
-- 
2.43.0

