From b191695afcba438ae8c5d1c3b4d5939c76d22a4f Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Tue, 2 Dec 2025 16:16:22 -0500
Subject: [PATCH v24 07/16] Eliminate XLOG_HEAP2_VISIBLE from vacuum phase I
 prune/freeze

Vacuum no longer emits a separate WAL record for each page set
all-visible or all-frozen during phase I. Instead, visibility map
updates are now included in the XLOG_HEAP2_PRUNE_VACUUM_SCAN record that
is already emitted for pruning and freezing.

Previously, heap_page_prune_and_freeze() determined whether a page was
all-visible, but the corresponding VM bits were only set later in
lazy_scan_prune(). Now the VM is updated immediately in
heap_page_prune_and_freeze(), at the same time as the heap
modifications.

This change applies only to vacuum phase I, not to pruning performed
during normal page access.

NOTE: This commit is the main commit and all review-only commits
preceding it will be squashed into it.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
---
 src/backend/access/heap/pruneheap.c | 255 ++++++++++++++++------------
 1 file changed, 144 insertions(+), 111 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 96dc902ec12..489b8487599 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -194,6 +194,12 @@ static void page_verify_redirects(Page page);
 static bool heap_page_will_freeze(Relation relation, Buffer buffer,
 								  bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
 								  PruneState *prstate);
+static TransactionId get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm,
+									  uint8 new_vmbits,
+									  TransactionId latest_xid_removed,
+									  TransactionId frz_conflict_horizon,
+									  TransactionId visibility_cutoff_xid,
+									  bool blk_already_av);
 static bool heap_page_will_set_vm(Relation relation,
 								  BlockNumber heap_blk,
 								  Buffer heap_buf,
@@ -782,6 +788,64 @@ heap_page_will_freeze(Relation relation, Buffer buffer,
 	return do_freeze;
 }
 
+/*
+ * Calculate the conflict horizon for the whole XLOG_HEAP2_PRUNE_VACUUM_SCAN
+ * or XLOG_HEAP2_PRUNE_ON_ACCESS record.
+ */
+static TransactionId
+get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm, uint8 new_vmbits,
+				 TransactionId latest_xid_removed, TransactionId frz_conflict_horizon,
+				 TransactionId visibility_cutoff_xid, bool blk_already_av)
+{
+	TransactionId conflict_xid;
+
+	/*
+	 * We can omit the snapshot conflict horizon if we are not pruning or
+	 * freezing any tuples and are setting an already all-visible page
+	 * all-frozen in the VM. In this case, all of the tuples on the page must
+	 * already be visible to all MVCC snapshots on the standby.
+	 */
+	if (!do_prune && !do_freeze &&
+		do_set_vm && blk_already_av && (new_vmbits & VISIBILITYMAP_ALL_FROZEN))
+		return InvalidTransactionId;
+
+	/*
+	 * The snapshotConflictHorizon for the whole record should be the most
+	 * conservative of all the horizons calculated for any of the possible
+	 * modifications.  If this record will prune tuples, any transactions on
+	 * the standby older than the youngest xmax of the most recently removed
+	 * tuple this record will prune will conflict.  If this record will freeze
+	 * tuples, any transactions on the standby with xids older than the
+	 * youngest tuple this record will freeze will conflict.
+	 */
+	conflict_xid = InvalidTransactionId;
+
+	/*
+	 * If we are updating the VM, the conflict horizon is almost always the
+	 * visibility cutoff XID.
+	 *
+	 * Separately, if we are freezing any tuples, as an optimization, we can
+	 * use the visibility_cutoff_xid as the conflict horizon if the page will
+	 * be all-frozen. This is true even if there are LP_DEAD line pointers
+	 * because we ignored those when maintaining the visibility_cutoff_xid.
+	 * This will have been calculated earlier as the frz_conflict_horizon when
+	 * we determined we would freeze.
+	 */
+	if (do_set_vm)
+		conflict_xid = visibility_cutoff_xid;
+	else if (do_freeze)
+		conflict_xid = frz_conflict_horizon;
+
+	/*
+	 * If we are removing tuples with a younger xmax than our so far
+	 * calculated conflict_xid, we must use this as our horizon.
+	 */
+	if (TransactionIdFollows(latest_xid_removed, conflict_xid))
+		conflict_xid = latest_xid_removed;
+
+	return conflict_xid;
+}
+
 /*
  * Decide whether to set the visibility map bits (all-visible and all-frozen)
  * for heap_blk using information from PruneState and blk_known_av. Some
@@ -969,7 +1033,6 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 	Buffer		vmbuffer = params->vmbuffer;
 	Page		page = BufferGetPage(buffer);
 	BlockNumber blockno = BufferGetBlockNumber(buffer);
-	TransactionId vm_conflict_horizon = InvalidTransactionId;
 	PruneState	prstate;
 	bool		do_freeze;
 	bool		do_prune;
@@ -977,6 +1040,9 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 	bool		do_set_vm;
 	bool		did_tuple_hint_fpi;
 	int64		fpi_before = pgWalUsage.wal_fpi;
+	TransactionId conflict_xid = InvalidTransactionId;
+	uint8		new_vmbits = 0;
+	uint8		old_vmbits = 0;
 
 	/* Initialize prstate */
 	prune_freeze_setup(params,
@@ -1038,6 +1104,36 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 		prstate.all_visible = prstate.all_frozen = false;
 
 	Assert(!prstate.all_frozen || prstate.all_visible);
+	Assert(!prstate.all_visible || (prstate.lpdead_items == 0));
+
+	/*
+	 * Decide whether to set the page-level PD_ALL_VISIBLE bit and the VM bits
+	 * based on information from the VM and the all_visible/all_frozen flags.
+	 *
+	 * While it is valid for PD_ALL_VISIBLE to be set when the corresponding
+	 * VM bit is clear, we strongly prefer to keep them in sync.
+	 *
+	 * Accordingly, we also allow updating only the VM when PD_ALL_VISIBLE has
+	 * already been set. Setting only the VM is most common when setting an
+	 * already all-visible page all-frozen.
+	 */
+	do_set_vm = heap_page_will_set_vm(params->relation,
+									  blockno, buffer, vmbuffer, params->blk_known_av,
+									  &prstate, &new_vmbits);
+
+	/*
+	 * new_vmbits should be 0 regardless of whether or not the page is
+	 * all-visible if we do not intend to set the VM.
+	 */
+	Assert(do_set_vm || new_vmbits == 0);
+
+	conflict_xid = get_conflict_xid(do_prune, do_freeze, do_set_vm, new_vmbits,
+									prstate.latest_xid_removed, prstate.frz_conflict_horizon,
+									prstate.visibility_cutoff_xid, params->blk_known_av);
+
+	/* Lock vmbuffer before entering a critical section */
+	if (do_set_vm)
+		LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
 
 	/* Any error while applying the changes is critical */
 	START_CRIT_SECTION();
@@ -1059,14 +1155,17 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 
 		/*
 		 * If that's all we had to do to the page, this is a non-WAL-logged
-		 * hint.  If we are going to freeze or prune the page, we will mark
-		 * the buffer dirty below.
+		 * hint.  If we are going to freeze or prune the page or set
+		 * PD_ALL_VISIBLE, we will mark the buffer dirty below.
+		 *
+		 * Setting PD_ALL_VISIBLE is fully WAL-logged because it is forbidden
+		 * for the VM to be set and PD_ALL_VISIBLE to be clear.
 		 */
-		if (!do_freeze && !do_prune)
+		if (!do_freeze && !do_prune && !do_set_vm)
 			MarkBufferDirtyHint(buffer, true);
 	}
 
-	if (do_prune || do_freeze)
+	if (do_prune || do_freeze || do_set_vm)
 	{
 		/* Apply the planned item changes and repair page fragmentation. */
 		if (do_prune)
@@ -1080,6 +1179,15 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 		if (do_freeze)
 			heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
 
+		if (do_set_vm)
+		{
+			PageSetAllVisible(page);
+			old_vmbits = visibilitymap_set_vmbits(blockno,
+												  vmbuffer, new_vmbits,
+												  params->relation->rd_locator);
+			Assert(old_vmbits != new_vmbits);
+		}
+
 		MarkBufferDirty(buffer);
 
 		/*
@@ -1087,29 +1195,12 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 		 */
 		if (RelationNeedsWAL(params->relation))
 		{
-			/*
-			 * The snapshotConflictHorizon for the whole record should be the
-			 * most conservative of all the horizons calculated for any of the
-			 * possible modifications.  If this record will prune tuples, any
-			 * transactions on the standby older than the youngest xmax of the
-			 * most recently removed tuple this record will prune will
-			 * conflict.  If this record will freeze tuples, any transactions
-			 * on the standby with xids older than the youngest tuple this
-			 * record will freeze will conflict.
-			 */
-			TransactionId conflict_xid;
-
-			if (TransactionIdFollows(prstate.frz_conflict_horizon,
-									 prstate.latest_xid_removed))
-				conflict_xid = prstate.frz_conflict_horizon;
-			else
-				conflict_xid = prstate.latest_xid_removed;
-
 			log_heap_prune_and_freeze(params->relation, buffer,
-									  InvalidBuffer,	/* vmbuffer */
-									  0,	/* vmflags */
+									  do_set_vm ? vmbuffer : InvalidBuffer,
+									  do_set_vm ? new_vmbits : 0,
 									  conflict_xid,
-									  true, params->reason,
+									  true, /* cleanup lock */
+									  params->reason,
 									  prstate.frozen, prstate.nfrozen,
 									  prstate.redirected, prstate.nredirected,
 									  prstate.nowdead, prstate.ndead,
@@ -1119,46 +1210,8 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 
 	END_CRIT_SECTION();
 
-	/* Copy information back for caller */
-	presult->ndeleted = prstate.ndeleted;
-	presult->nnewlpdead = prstate.ndead;
-	presult->nfrozen = prstate.nfrozen;
-	presult->live_tuples = prstate.live_tuples;
-	presult->recently_dead_tuples = prstate.recently_dead_tuples;
-	presult->hastup = prstate.hastup;
-
-	presult->lpdead_items = prstate.lpdead_items;
-	/* the presult->deadoffsets array was already filled in */
-
-	if (prstate.attempt_freeze)
-	{
-		if (presult->nfrozen > 0)
-		{
-			*new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
-			*new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
-		}
-		else
-		{
-			*new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
-			*new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
-		}
-	}
-
-	presult->new_vmbits = 0;
-	presult->old_vmbits = 0;
-
-	/*
-	 * If updating the visibility map, the conflict horizon for that record
-	 * must be the newest xmin on the page.  However, if the page is
-	 * completely frozen, there can be no conflict and the vm_conflict_horizon
-	 * should remain InvalidTransactionId.  This includes the case that we
-	 * just froze all the tuples; the prune-freeze record included the
-	 * conflict XID already so we don't need to again.
-	 */
-	if (prstate.all_frozen)
-		vm_conflict_horizon = InvalidTransactionId;
-	else
-		vm_conflict_horizon = prstate.visibility_cutoff_xid;
+	if (do_set_vm)
+		LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
 
 	/*
 	 * During its second pass over the heap, VACUUM calls
@@ -1173,7 +1226,8 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 		TransactionId debug_cutoff;
 		bool		debug_all_frozen;
 
-		Assert(presult->lpdead_items == 0);
+		Assert(prstate.lpdead_items == 0);
+		Assert(prstate.cutoffs);
 
 		Assert(heap_page_is_all_visible(params->relation, buffer,
 										prstate.cutoffs->OldestXmin,
@@ -1183,56 +1237,35 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 		Assert(prstate.all_frozen == debug_all_frozen);
 
 		Assert(!TransactionIdIsValid(debug_cutoff) ||
-			   debug_cutoff == vm_conflict_horizon);
+			   debug_cutoff == prstate.visibility_cutoff_xid);
 	}
 #endif
 
-	Assert(!prstate.all_frozen || prstate.all_visible);
-	Assert(!prstate.all_visible || (prstate.lpdead_items == 0));
-
-	/*
-	 * Decide whether to set the page-level PD_ALL_VISIBLE bit and the VM bits
-	 * based on information from the VM and the all_visible/all_frozen flags.
-	 *
-	 * While it is valid for PD_ALL_VISIBLE to be set when the corresponding
-	 * VM bit is clear, we strongly prefer to keep them in sync.
-	 *
-	 * Accordingly, we also allow updating only the VM when PD_ALL_VISIBLE has
-	 * already been set. Setting only the VM is most common when setting an
-	 * already all-visible page all-frozen.
-	 */
-	do_set_vm = heap_page_will_set_vm(params->relation,
-									  blockno,
-									  buffer,
-									  vmbuffer,
-									  params->blk_known_av,
-									  &prstate,
-									  &presult->new_vmbits);
+	/* Copy information back for caller */
+	presult->ndeleted = prstate.ndeleted;
+	presult->nnewlpdead = prstate.ndead;
+	presult->nfrozen = prstate.nfrozen;
+	presult->live_tuples = prstate.live_tuples;
+	presult->recently_dead_tuples = prstate.recently_dead_tuples;
+	presult->hastup = prstate.hastup;
+	presult->new_vmbits = new_vmbits;
+	presult->old_vmbits = old_vmbits;
 
-	/*
-	 * new_vmbits should be 0 regardless of whether or not the page is
-	 * all-visible if we do not intend to set the VM.
-	 */
-	Assert(do_set_vm || presult->new_vmbits == 0);
+	presult->lpdead_items = prstate.lpdead_items;
+	/* the presult->deadoffsets array was already filled in */
 
-	if (do_set_vm)
+	if (prstate.attempt_freeze)
 	{
-		/*
-		 * It should never be the case that the visibility map page is set
-		 * while the page-level bit is clear, but the reverse is allowed (if
-		 * checksums are not enabled).
-		 *
-		 * The heap page is added to the WAL chain even if it wasn't modified,
-		 * so we still need to mark it dirty. The only scenario where it isn't
-		 * modified in phase I is when the VM was truncated or removed, which
-		 * isn't worth optimizing for.
-		 */
-		PageSetAllVisible(page);
-		MarkBufferDirty(buffer);
-		presult->old_vmbits = visibilitymap_set(params->relation, blockno, buffer,
-												InvalidXLogRecPtr,
-												vmbuffer, vm_conflict_horizon,
-												presult->new_vmbits);
+		if (presult->nfrozen > 0)
+		{
+			*new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
+			*new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
+		}
+		else
+		{
+			*new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
+			*new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
+		}
 	}
 }
 
-- 
2.43.0

