From a94267babeedec6705fd7f3b43242c6ba0e458c0 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Tue, 2 Dec 2025 16:16:22 -0500
Subject: [PATCH v34 04/14] Eliminate XLOG_HEAP2_VISIBLE from vacuum phase I
 prune/freeze

Vacuum no longer emits a separate WAL record for each page set
all-visible or all-frozen during phase I. Instead, visibility map
updates are now included in the XLOG_HEAP2_PRUNE_VACUUM_SCAN record that
is already emitted for pruning and freezing.

Previously, heap_page_prune_and_freeze() determined whether a page was
all-visible, but the corresponding VM bits were only set later in
lazy_scan_prune(). Now the VM is updated immediately in
heap_page_prune_and_freeze(), at the same time as the heap
modifications.

This change applies only to vacuum phase I, not to pruning performed
during normal page access.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
---
 src/backend/access/heap/pruneheap.c | 266 ++++++++++++++++------------
 1 file changed, 152 insertions(+), 114 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 192df9a2218..b8ba5b7a681 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -205,6 +205,11 @@ static bool heap_page_will_set_vm(PruneState *prstate,
 								  int nlpdead_items,
 								  uint8 *old_vmbits,
 								  uint8 *new_vmbits);
+static TransactionId get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm,
+									  uint8 old_vmbits, uint8 new_vmbits,
+									  TransactionId latest_xid_removed,
+									  TransactionId frz_conflict_horizon,
+									  TransactionId visibility_cutoff_xid);
 
 
 /*
@@ -804,6 +809,62 @@ heap_page_will_freeze(Relation relation, Buffer buffer,
 	return do_freeze;
 }
 
+/*
+ * Calculate the conflict horizon for the whole XLOG_HEAP2_PRUNE_VACUUM_SCAN
+ * or XLOG_HEAP2_PRUNE_ON_ACCESS record.
+ */
+static TransactionId
+get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm,
+				 uint8 old_vmbits, uint8 new_vmbits,
+				 TransactionId latest_xid_removed, TransactionId frz_conflict_horizon,
+				 TransactionId visibility_cutoff_xid)
+{
+	TransactionId conflict_xid;
+
+	/*
+	 * We can omit the snapshot conflict horizon if we are not pruning or
+	 * freezing any tuples and are setting an already all-visible page
+	 * all-frozen in the VM. In this case, all of the tuples on the page must
+	 * already be seen as frozen by all MVCC snapshots on the standby.
+	 */
+	if (!do_prune &&
+		!do_freeze &&
+		(old_vmbits & VISIBILITYMAP_ALL_VISIBLE) != 0 &&
+		(new_vmbits & VISIBILITYMAP_ALL_FROZEN) != 0)
+		return InvalidTransactionId;
+
+	/*
+	 * The snapshot conflict horizon for the whole record should be the most
+	 * conservative of all the horizons calculated for any of the possible
+	 * modifications.  If this record will prune tuples, any transactions on
+	 * the standby older than the youngest xmax of the most recently removed
+	 * tuple this record will prune will conflict.  If this record will freeze
+	 * tuples, any transactions on the standby with xids older than the
+	 * youngest tuple this record will freeze will conflict.
+	 *
+	 * If we are setting the VM, the conflict horizon is almost always the
+	 * visibility cutoff XID, except in the situation described above.
+	 *
+	 * By picking the newest of all of those, we can ensure that all changes
+	 * in the record have been taken into account.
+	 */
+	if (do_set_vm)
+		conflict_xid = visibility_cutoff_xid;
+	else if (do_freeze)
+		conflict_xid = frz_conflict_horizon;
+	else
+		conflict_xid = InvalidTransactionId;
+
+	/*
+	 * If we are removing tuples with a younger xmax than our so far
+	 * calculated conflict_xid, we must use this as our horizon.
+	 */
+	if (TransactionIdFollows(latest_xid_removed, conflict_xid))
+		conflict_xid = latest_xid_removed;
+
+	return conflict_xid;
+}
+
 /*
  * Helper to correct any corruption detected on a heap page and its
  * corresponding visibility map page after pruning but before setting the
@@ -991,7 +1052,6 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 	Buffer		vmbuffer = params->vmbuffer;
 	Page		page = BufferGetPage(buffer);
 	BlockNumber blockno = BufferGetBlockNumber(buffer);
-	TransactionId vm_conflict_horizon = InvalidTransactionId;
 	PruneState	prstate;
 	bool		do_freeze;
 	bool		do_prune;
@@ -999,6 +1059,7 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 	bool		do_set_vm;
 	bool		did_tuple_hint_fpi;
 	int64		fpi_before = pgWalUsage.wal_fpi;
+	TransactionId conflict_xid;
 	uint8		new_vmbits;
 	uint8		old_vmbits;
 
@@ -1063,6 +1124,37 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 		prstate.all_visible = prstate.all_frozen = false;
 
 	Assert(!prstate.all_frozen || prstate.all_visible);
+	Assert(!prstate.all_visible || (prstate.lpdead_items == 0));
+
+	/*
+	 * Decide whether to set the VM bits based on information from the VM and
+	 * the all_visible/all_frozen flags.
+	 */
+	do_set_vm = heap_page_will_set_vm(&prstate,
+									  params->relation,
+									  blockno,
+									  buffer,
+									  page,
+									  vmbuffer,
+									  prstate.lpdead_items,
+									  &old_vmbits,
+									  &new_vmbits);
+
+	/*
+	 * new_vmbits should be 0 regardless of whether or not the page is
+	 * all-visible if we do not intend to set the VM.
+	 */
+	Assert(do_set_vm || new_vmbits == 0);
+
+	conflict_xid = get_conflict_xid(do_prune, do_freeze, do_set_vm,
+									old_vmbits, new_vmbits,
+									prstate.latest_xid_removed,
+									prstate.frz_conflict_horizon,
+									prstate.visibility_cutoff_xid);
+
+	/* Lock vmbuffer before entering a critical section */
+	if (do_set_vm)
+		LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
 
 	/* Any error while applying the changes is critical */
 	START_CRIT_SECTION();
@@ -1084,14 +1176,17 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 
 		/*
 		 * If that's all we had to do to the page, this is a non-WAL-logged
-		 * hint.  If we are going to freeze or prune the page, we will mark
-		 * the buffer dirty below.
+		 * hint.  If we are going to freeze or prune the page or set
+		 * PD_ALL_VISIBLE, we will mark the buffer dirty below.
+		 *
+		 * Setting PD_ALL_VISIBLE is fully WAL-logged because it is forbidden
+		 * for the VM to be set and PD_ALL_VISIBLE to be clear.
 		 */
-		if (!do_freeze && !do_prune)
+		if (!do_freeze && !do_prune && !do_set_vm)
 			MarkBufferDirtyHint(buffer, true);
 	}
 
-	if (do_prune || do_freeze)
+	if (do_prune || do_freeze || do_set_vm)
 	{
 		/* Apply the planned item changes and repair page fragmentation. */
 		if (do_prune)
@@ -1105,6 +1200,26 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 		if (do_freeze)
 			heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
 
+		/* Set the visibility map and page visibility hint */
+		if (do_set_vm)
+		{
+			/*
+			 * While it is valid for PD_ALL_VISIBLE to be set when the
+			 * corresponding VM bit is clear, we strongly prefer to keep them
+			 * in sync.
+			 *
+			 * The heap buffer must be marked dirty before adding it to the
+			 * WAL chain when setting the VM. We don't worry about
+			 * unnecessarily dirtying the heap buffer if PD_ALL_VISIBLE is
+			 * already set, though. It is extremely rare to have a clean heap
+			 * buffer with PD_ALL_VISIBLE already set and the VM bits clear,
+			 * so there is no point in optimizing it.
+			 */
+			PageSetAllVisible(page);
+			visibilitymap_set_vmbits(blockno, vmbuffer, new_vmbits,
+									 params->relation->rd_locator);
+		}
+
 		MarkBufferDirty(buffer);
 
 		/*
@@ -1112,29 +1227,12 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 		 */
 		if (RelationNeedsWAL(params->relation))
 		{
-			/*
-			 * The snapshotConflictHorizon for the whole record should be the
-			 * most conservative of all the horizons calculated for any of the
-			 * possible modifications.  If this record will prune tuples, any
-			 * transactions on the standby older than the youngest xmax of the
-			 * most recently removed tuple this record will prune will
-			 * conflict.  If this record will freeze tuples, any transactions
-			 * on the standby with xids older than the youngest tuple this
-			 * record will freeze will conflict.
-			 */
-			TransactionId conflict_xid;
-
-			if (TransactionIdFollows(prstate.frz_conflict_horizon,
-									 prstate.latest_xid_removed))
-				conflict_xid = prstate.frz_conflict_horizon;
-			else
-				conflict_xid = prstate.latest_xid_removed;
-
 			log_heap_prune_and_freeze(params->relation, buffer,
-									  InvalidBuffer,	/* vmbuffer */
-									  0,	/* vmflags */
+									  do_set_vm ? vmbuffer : InvalidBuffer,
+									  do_set_vm ? new_vmbits : 0,
 									  conflict_xid,
-									  true, params->reason,
+									  true, /* cleanup lock */
+									  params->reason,
 									  prstate.frozen, prstate.nfrozen,
 									  prstate.redirected, prstate.nredirected,
 									  prstate.nowdead, prstate.ndead,
@@ -1144,43 +1242,8 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 
 	END_CRIT_SECTION();
 
-	/* Copy information back for caller */
-	presult->ndeleted = prstate.ndeleted;
-	presult->nnewlpdead = prstate.ndead;
-	presult->nfrozen = prstate.nfrozen;
-	presult->live_tuples = prstate.live_tuples;
-	presult->recently_dead_tuples = prstate.recently_dead_tuples;
-	presult->hastup = prstate.hastup;
-
-	presult->lpdead_items = prstate.lpdead_items;
-	/* the presult->deadoffsets array was already filled in */
-
-	if (prstate.attempt_freeze)
-	{
-		if (presult->nfrozen > 0)
-		{
-			*new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
-			*new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
-		}
-		else
-		{
-			*new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
-			*new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
-		}
-	}
-
-	/*
-	 * If updating the visibility map, the conflict horizon for that record
-	 * must be the newest xmin on the page.  However, if the page is
-	 * completely frozen, there can be no conflict and the vm_conflict_horizon
-	 * should remain InvalidTransactionId.  This includes the case that we
-	 * just froze all the tuples; the prune-freeze record included the
-	 * conflict XID already so we don't need to again.
-	 */
-	if (prstate.all_frozen)
-		vm_conflict_horizon = InvalidTransactionId;
-	else
-		vm_conflict_horizon = prstate.visibility_cutoff_xid;
+	if (do_set_vm)
+		LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
 
 	/*
 	 * During its second pass over the heap, VACUUM calls
@@ -1195,7 +1258,8 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 		TransactionId debug_cutoff;
 		bool		debug_all_frozen;
 
-		Assert(presult->lpdead_items == 0);
+		Assert(prstate.lpdead_items == 0);
+		Assert(prstate.cutoffs);
 
 		Assert(heap_page_is_all_visible(params->relation, buffer,
 										prstate.cutoffs->OldestXmin,
@@ -1205,63 +1269,23 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 		Assert(prstate.all_frozen == debug_all_frozen);
 
 		Assert(!TransactionIdIsValid(debug_cutoff) ||
-			   debug_cutoff == vm_conflict_horizon);
+			   debug_cutoff == prstate.visibility_cutoff_xid);
 	}
 #endif
 
-	/* Now update the visibility map and PD_ALL_VISIBLE hint */
-	Assert(!prstate.all_visible || (prstate.lpdead_items == 0));
-
-	do_set_vm = heap_page_will_set_vm(&prstate,
-									  params->relation,
-									  blockno,
-									  buffer,
-									  page,
-									  vmbuffer,
-									  prstate.lpdead_items,
-									  &old_vmbits,
-									  &new_vmbits);
+	/* Copy information back for caller */
+	presult->ndeleted = prstate.ndeleted;
+	presult->nnewlpdead = prstate.ndead;
+	presult->nfrozen = prstate.nfrozen;
+	presult->live_tuples = prstate.live_tuples;
+	presult->recently_dead_tuples = prstate.recently_dead_tuples;
+	presult->hastup = prstate.hastup;
 
-	/*
-	 * new_vmbits should be 0 regardless of whether or not the page is
-	 * all-visible if we do not intend to set the VM.
-	 */
-	Assert(do_set_vm || new_vmbits == 0);
+	presult->lpdead_items = prstate.lpdead_items;
+	/* the presult->deadoffsets array was already filled in */
 
-	/* Set the visibility map and page visibility hint, if relevant */
 	if (do_set_vm)
 	{
-		Assert(prstate.all_visible);
-
-		/*
-		 * It should never be the case that the visibility map page is set
-		 * while the page-level bit is clear (and if so, we cleared it above),
-		 * but the reverse is allowed (if checksums are not enabled).
-		 * Regardless, set both bits so that we get back in sync.
-		 *
-		 * The heap buffer must be marked dirty before adding it to the WAL
-		 * chain when setting the VM. We don't worry about unnecessarily
-		 * dirtying the heap buffer if PD_ALL_VISIBLE is already set, though.
-		 * It is extremely rare to have a clean heap buffer with
-		 * PD_ALL_VISIBLE already set and the VM bits clear, so there is no
-		 * point in optimizing it.
-		 */
-		PageSetAllVisible(page);
-		MarkBufferDirty(buffer);
-
-		/*
-		 * If the page is being set all-frozen, we pass InvalidTransactionId
-		 * as the cutoff_xid, since a snapshot conflict horizon sufficient to
-		 * make everything safe for REDO was logged when the page's tuples
-		 * were frozen.
-		 */
-		Assert(!prstate.all_frozen || !TransactionIdIsValid(vm_conflict_horizon));
-
-		visibilitymap_set(params->relation, blockno, buffer,
-						  InvalidXLogRecPtr,
-						  vmbuffer, vm_conflict_horizon,
-						  new_vmbits);
-
 		if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
 		{
 			presult->new_all_visible_pages = 1;
@@ -1272,6 +1296,20 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 				 prstate.all_frozen)
 			presult->new_all_frozen_pages = 1;
 	}
+
+	if (prstate.attempt_freeze)
+	{
+		if (presult->nfrozen > 0)
+		{
+			*new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
+			*new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
+		}
+		else
+		{
+			*new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
+			*new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
+		}
+	}
 }
 
 
-- 
2.43.0

