From 558df69caf2c977989781da2757a4c930728e596 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 17 Sep 2025 17:29:59 -0400
Subject: [PATCH v15 16/23] Eliminate XLOG_HEAP2_VISIBLE from vacuum phase I
 prune/freeze

Instead of emitting a separate WAL record for every block rendered
all-visible/frozen by vacuum's phase I, include the changes to the VM in
the XLOG_HEAP2_PRUNE_VACUUM_SCAN record already emitted.

This is only enabled for vacuum's prune/freeze work, not for on-access
pruning.
---
 src/backend/access/heap/pruneheap.c | 184 +++++++++++++++++-----------
 src/include/access/heapam.h         |   3 +-
 2 files changed, 113 insertions(+), 74 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index e3f9967e26c..473822a8e26 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -662,50 +662,58 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 	prstate.deadoffsets = presult->deadoffsets;
 
 	/*
-	 * Caller may update the VM after we're done.  We can keep track of
-	 * whether the page will be all-visible and all-frozen after pruning and
-	 * freezing to help the caller to do that.
+	 * Keep track of whether or not the page will be all-visible and
+	 * all-frozen for use in opportunistic freezing and to update the VM if
+	 * the caller requests it.
 	 *
-	 * Currently, only VACUUM sets the VM bits.  To save the effort, only do
-	 * the bookkeeping if the caller needs it.  Currently, that's tied to
-	 * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
-	 * to update the VM bits without also freezing or freeze without also
-	 * setting the VM bits.
+	 * Currently, only VACUUM attempts freezing. But other callers could. The
+	 * visibility bookkeeping is required for opportunistic freezing (in
+	 * addition to setting the VM bits) because we only consider
+	 * opportunistically freezing tuples if the whole page would become
+	 * all-frozen or if the whole page will be frozen except for dead tuples
+	 * that will be removed by vacuum. But if consider_update_vm is false,
+	 * we'll not set the VM even if the page is discovered to be all-visible.
 	 *
-	 * In addition to telling the caller whether it can set the VM bit, we
-	 * also use 'all_visible' and 'all_frozen' for our own decision-making. If
-	 * the whole page would become frozen, we consider opportunistically
-	 * freezing tuples.  We will not be able to freeze the whole page if there
-	 * are tuples present that are not visible to everyone or if there are
-	 * dead tuples which are not yet removable.  However, dead tuples which
-	 * will be removed by the end of vacuuming should not preclude us from
-	 * opportunistically freezing.  Because of that, we do not immediately
-	 * clear all_visible when we see LP_DEAD items.  We fix that after
-	 * scanning the line pointers, before we return the value to the caller,
-	 * so that the caller doesn't set the VM bit incorrectly.
+	 * If only HEAP_PAGE_PRUNE_UPDATE_ViS is passed and not
+	 * HEAP_PAGE_PRUNE_FREEZE, prstate.all_frozen must be initialized to false
+	 * because we will not call heap_prepare_freeze_tuple() on each tuple.
+	 *
+	 * Dead tuples which will be removed by the end of vacuuming should not
+	 * preclude us from opportunistically freezing, so we do not clear
+	 * all_visible when we see LP_DEAD items. We fix that after determining
+	 * whether or not to freeze but before deciding whether or not to update
+	 * the VM so that we don't set the VM bit incorrectly.
+	 *
+	 * If not freezing and not updating the VM, we avoid the extra
+	 * bookkeeping. Initializing all_visible to false allows skipping the work
+	 * to update them in heap_prune_record_unchanged_lp_normal().
 	 */
 	if (prstate.attempt_freeze)
 	{
 		prstate.all_visible = true;
 		prstate.all_frozen = true;
 	}
+	else if ((options & HEAP_PAGE_PRUNE_UPDATE_VIS) != 0)
+	{
+		prstate.all_visible = true;
+		prstate.all_frozen = false;
+	}
 	else
 	{
-		/*
-		 * Initializing to false allows skipping the work to update them in
-		 * heap_prune_record_unchanged_lp_normal().
-		 */
 		prstate.all_visible = false;
 		prstate.all_frozen = false;
 	}
 
 	/*
-	 * The visibility cutoff xid is the newest xmin of live tuples on the
-	 * page.  In the common case, this will be set as the conflict horizon the
-	 * caller can use for updating the VM.  If, at the end of freezing and
-	 * pruning, the page is all-frozen, there is no possibility that any
-	 * running transaction on the standby does not see tuples on the page as
-	 * all-visible, so the conflict horizon remains InvalidTransactionId.
+	 * The visibility cutoff xid is the newest xmin of live, committed tuples
+	 * older than OldestXmin on the page. This field is only kept up-to-date
+	 * if the page is all-visible. As soon as a tuple is encountered that is
+	 * not visible to all, this field is unmaintained. As long as it is
+	 * maintained, it can be used to calculate the snapshot conflict horizon.
+	 * This is most likely to happen when updating the VM and/or freezing all
+	 * live tuples on the page. It is updated before returning to the caller
+	 * because vacuum does assert-build only validation on the page using this
+	 * field.
 	 */
 	prstate.visibility_cutoff_xid = InvalidTransactionId;
 
@@ -943,16 +951,15 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 	 * visibility map bits based on information from the VM and from
 	 * all_visible and all_frozen variables.
 	 *
-	 * Though callers should set the VM if PD_ALL_VISIBLE is set, it is
-	 * allowed for the page-level bit to be set and the VM to be clear. We log
-	 * setting PD_ALL_VISIBLE on the heap page in a
-	 * XLOG_HEAP2_PRUNE_VACUUM_SCAN record and setting the VM bits in a later
-	 * emitted XLOG_HEAP2_VISIBLE record.
+	 * It is allowed for the page-level bit to be set and the VM to be clear,
+	 * however, we have a strong preference for keeping them in sync.
 	 *
-	 * Setting PD_ALL_VISIBLE when we are making the changes to the page that
-	 * render it all-visible allows us to omit the heap page from the WAL
-	 * chain when later updating the VM -- even when checksums/wal_log_hints
-	 * are enabled.
+	 * Prior to Postgres 19, it was possible for the page-level bit to be set
+	 * and the VM bit to be clear. This could happen if we crashed after
+	 * setting PD_ALL_VISIBLE but before setting bits in the VM.
+	 *
+	 * As such, it is possible to only update the VM when PD_ALL_VISIBLE is
+	 * already set.
 	 */
 	do_set_pd_vis = false;
 	do_set_vm = false;
@@ -961,6 +968,10 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 										   blockno, buffer, vmbuffer, blk_known_av,
 										   &prstate, &new_vmbits, &do_set_pd_vis);
 
+	/* Lock vmbuffer before entering a critical section */
+	if (do_set_vm)
+		LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+
 	/* Any error while applying the changes is critical */
 	START_CRIT_SECTION();
 
@@ -991,7 +1002,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 			MarkBufferDirtyHint(buffer, true);
 	}
 
-	if (do_prune || do_freeze || do_set_pd_vis)
+	if (do_prune || do_freeze || do_set_pd_vis || do_set_vm)
 	{
 		/* Apply the planned item changes and repair page fragmentation. */
 		if (do_prune)
@@ -1008,12 +1019,32 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 		if (do_set_pd_vis)
 			PageSetAllVisible(page);
 
-		MarkBufferDirty(buffer);
+		if (do_prune || do_freeze || do_set_pd_vis)
+			MarkBufferDirty(buffer);
+
+		if (do_set_vm)
+		{
+			Assert(PageIsAllVisible(page));
+
+			old_vmbits = visibilitymap_set_vmbits(blockno,
+												  vmbuffer, new_vmbits,
+												  RelationGetRelationName(relation));
+			if (old_vmbits == new_vmbits)
+			{
+				LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+				/* Unset so we don't emit WAL since no change occurred */
+				do_set_vm = false;
+			}
+		}
 
 		/*
-		 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
+		 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did If we were
+		 * only updating the VM and it turns out it was already set, we will
+		 * have unset do_set_vm earlier. As such, check it again before
+		 * emitting the record.
 		 */
-		if (RelationNeedsWAL(relation))
+		if (RelationNeedsWAL(relation) &&
+			(do_prune || do_freeze || do_set_pd_vis || do_set_vm))
 		{
 			/*
 			 * The snapshotConflictHorizon for the whole record should be the
@@ -1025,15 +1056,45 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 			 * on the standby with xids older than the youngest tuple this
 			 * record will freeze will conflict.
 			 */
-			TransactionId conflict_xid;
+			TransactionId conflict_xid = InvalidTransactionId;
 
-			if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed))
+			/*
+			 * If we are updating the VM, the conflict horizon is almost
+			 * always the visibility cutoff XID.
+			 *
+			 * Separately, if we are freezing any tuples, as an optimization,
+			 * we can use the visibility_cutoff_xid as the conflict horizon if
+			 * the page will be all-frozen. This is true even if there are
+			 * LP_DEAD line pointers because we ignored those when maintaining
+			 * the visibility_cutoff_xid. This will have been calculated
+			 * earlier as the frz_conflict_horizon when we determined we would
+			 * freeze.
+			 */
+			if (do_set_vm)
+				conflict_xid = prstate.visibility_cutoff_xid;
+			else if (do_freeze)
 				conflict_xid = frz_conflict_horizon;
-			else
+
+			/*
+			 * If we are removing tuples with a younger xmax than our so far
+			 * calculated conflict_xid, we must use this as our horizon.
+			 */
+			if (TransactionIdFollows(prstate.latest_xid_removed, conflict_xid))
 				conflict_xid = prstate.latest_xid_removed;
 
+			/*
+			 * We can omit the snapshot conflict horizon if we are not pruning
+			 * or freezing any tuples and are setting an already all-visible
+			 * page all-frozen in the VM. In this case, all of the tuples on
+			 * the page must already be visible to all MVCC snapshots on the
+			 * standby.
+			 */
+			if (!do_prune && !do_freeze && do_set_vm &&
+				blk_known_av && (new_vmbits & VISIBILITYMAP_ALL_FROZEN))
+				conflict_xid = InvalidTransactionId;
+
 			log_heap_prune_and_freeze(relation, buffer,
-									  InvalidBuffer, 0,
+									  vmbuffer, new_vmbits,
 									  conflict_xid,
 									  true,
 									  do_set_pd_vis,
@@ -1047,6 +1108,9 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 
 	END_CRIT_SECTION();
 
+	if (do_set_vm)
+		LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+
 	Assert(!prstate.all_visible || (prstate.lpdead_items == 0));
 
 	/*
@@ -1078,32 +1142,6 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 	}
 #endif
 
-	/* Now set the VM */
-	if (do_set_vm)
-	{
-		TransactionId vm_conflict_horizon;
-
-		Assert((new_vmbits & VISIBILITYMAP_VALID_BITS) != 0);
-
-		/*
-		 * The conflict horizon for that record must be the newest xmin on the
-		 * page.  However, if the page is completely frozen, there can be no
-		 * conflict and the vm_conflict_horizon should remain
-		 * InvalidTransactionId.  This includes the case that we just froze
-		 * all the tuples; the prune-freeze record included the conflict XID
-		 * already so a snapshotConflictHorizon sufficient to make everything
-		 * safe for REDO was logged when the page's tuples were frozen.
-		 */
-		if (prstate.all_frozen)
-			vm_conflict_horizon = InvalidTransactionId;
-		else
-			vm_conflict_horizon = prstate.visibility_cutoff_xid;
-		old_vmbits = visibilitymap_set(relation, blockno,
-									   InvalidXLogRecPtr,
-									   vmbuffer, vm_conflict_horizon,
-									   new_vmbits);
-	}
-
 	/* Copy information back for caller */
 	presult->ndeleted = prstate.ndeleted;
 	presult->nnewlpdead = prstate.ndead;
@@ -2261,7 +2299,7 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
  * - Reaping: During vacuum phase III, items that are already LP_DEAD are
  *   marked as unused.
  *
- * - VM updates: After vacuum phase III, the heap page may be marked
+ * - VM updates: After vacuum phases I and III, the heap page may be marked
  *   all-visible and all-frozen.
  *
  * These changes all happen together, so we use a single WAL record for them
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 493ddeacbc0..394f62a21e5 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -239,7 +239,8 @@ typedef struct PruneFreezeResult
 	 * visibility map before updating it during phase I of vacuuming.
 	 * new_vmbits are the state of those bits after phase I of vacuuming.
 	 *
-	 * These are only set if the HEAP_PAGE_PRUNE_UPDATE_VIS option is set.
+	 * These are only set if the HEAP_PAGE_PRUNE_UPDATE_VIS option is set and
+	 * we have actually updated the VM.
 	 */
 	uint8		new_vmbits;
 	uint8		old_vmbits;
-- 
2.43.0

