From 81b134346c1a981382d1eb915472aa3f26bb3586 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 18 Jun 2025 12:42:13 -0400
Subject: [PATCH v9 05/22] Eliminate xl_heap_visible from vacuum phase III

Instead of emitting a separate xl_heap_visible record for each page that
is rendered all-visible by vacuum's third phase, include the updates to
the VM in the already emitted xl_heap_prune record.

The visibilitymap bits are stored in the flags member of the
xl_heap_prune record.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
---
 src/backend/access/heap/heapam_xlog.c  | 145 ++++++++++++++++++----
 src/backend/access/heap/pruneheap.c    |  66 ++++++++--
 src/backend/access/heap/vacuumlazy.c   | 164 +++++++++++++++++--------
 src/backend/access/rmgrdesc/heapdesc.c |   7 +-
 src/include/access/heapam.h            |   9 ++
 src/include/access/heapam_xlog.h       |  36 ++++--
 6 files changed, 330 insertions(+), 97 deletions(-)

diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index 0820f7d052d..11c11929ed9 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -35,7 +35,9 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 	Buffer		buffer;
 	RelFileLocator rlocator;
 	BlockNumber blkno;
-	XLogRedoAction action;
+	Buffer		vmbuffer = InvalidBuffer;
+	uint8		vmflags = 0;
+	Size		freespace = 0;
 
 	XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
 	memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
@@ -50,11 +52,17 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 	Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
 		   (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
 
+	vmflags = xlrec.flags & VISIBILITYMAP_VALID_BITS;
+
 	/*
-	 * We are about to remove and/or freeze tuples.  In Hot Standby mode,
-	 * ensure that there are no queries running for which the removed tuples
-	 * are still visible or which still consider the frozen xids as running.
-	 * The conflict horizon XID comes after xl_heap_prune.
+	 * After xl_heap_prune is the optional snapshot conflict horizon.
+	 *
+	 * In Hot Standby mode, we must ensure that there are no running queries
+	 * which would conflict with the changes in this record. That means we
+	 * can't replay this record if it removes tuples that are still visible to
+	 * transactions on the standby, freeze tuples with xids that are still
+	 * considered running on the standby, or set a page as all-visible in the
+	 * VM if it isn't all-visible to all transactions on the standby.
 	 */
 	if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
 	{
@@ -71,12 +79,12 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 	}
 
 	/*
-	 * If we have a full-page image, restore it and we're done.
+	 * If we have a full-page image of the heap block, restore it and we're
+	 * done with the heap block.
 	 */
-	action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
-										   (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
-										   &buffer);
-	if (action == BLK_NEEDS_REDO)
+	if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
+									  (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
+									  &buffer) == BLK_NEEDS_REDO)
 	{
 		Page		page = BufferGetPage(buffer);
 		OffsetNumber *redirected;
@@ -89,6 +97,9 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 		Size		datalen;
 		xlhp_freeze_plan *plans;
 		OffsetNumber *frz_offsets;
+		bool		do_prune;
+		bool		mark_buffer_dirty;
+		bool		set_heap_lsn;
 		char	   *dataptr = XLogRecGetBlockData(record, 0, &datalen);
 
 		heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
@@ -97,11 +108,18 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 											   &ndead, &nowdead,
 											   &nunused, &nowunused);
 
+		do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
+		set_heap_lsn = mark_buffer_dirty = do_prune || nplans > 0;
+
+		/* Ensure the record does something */
+		Assert(do_prune || nplans > 0 ||
+			   vmflags & VISIBILITYMAP_VALID_BITS);
+
 		/*
 		 * Update all line pointers per the record, and repair fragmentation
 		 * if needed.
 		 */
-		if (nredirected > 0 || ndead > 0 || nunused > 0)
+		if (do_prune)
 			heap_page_prune_execute(buffer,
 									(xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
 									redirected, nredirected,
@@ -138,36 +156,117 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 		/* There should be no more data */
 		Assert((char *) frz_offsets == dataptr + datalen);
 
+		/*
+		 * Now set PD_ALL_VISIBLE, if required. We'll only do this if we are
+		 * also going to set bits in the VM later.
+		 *
+		 * We must never end up with the VM bit set and the page-level
+		 * PD_ALL_VISIBLE bit clear. If that were to occur, a subsequent page
+		 * modification would fail to clear the VM bit.
+		 */
+		if ((vmflags & VISIBILITYMAP_VALID_BITS) && !PageIsAllVisible(page))
+		{
+			PageSetAllVisible(page);
+
+			/*
+			 * If the only change to the heap page is setting PD_ALL_VISIBLE,
+			 * we can avoid setting the page LSN unless checksums or
+			 * wal_log_hints are enabled.
+			 */
+			set_heap_lsn = XLogHintBitIsNeeded() ? true : set_heap_lsn;
+			mark_buffer_dirty = true;
+		}
+
 		/*
 		 * Note: we don't worry about updating the page's prunability hints.
 		 * At worst this will cause an extra prune cycle to occur soon.
 		 */
 
-		PageSetLSN(page, lsn);
-		MarkBufferDirty(buffer);
+		if (mark_buffer_dirty)
+			MarkBufferDirty(buffer);
+		if (set_heap_lsn)
+			PageSetLSN(page, lsn);
 	}
 
 	/*
-	 * If we released any space or line pointers, update the free space map.
+	 * If we released any space or line pointers or will be setting a page in
+	 * the visibility map, measure the page's freespace to later update the
+	 * freespace map.
+	 *
+	 * Even if we are just updating the VM (and thus not freeing up any
+	 * space), we'll still update the FSM for this page. Since FSM is not
+	 * WAL-logged and only updated heuristically, it easily becomes stale in
+	 * standbys.  If the standby is later promoted and runs VACUUM, it will
+	 * skip updating individual free space figures for pages that became
+	 * all-visible (or all-frozen, depending on the vacuum mode,) which is
+	 * troublesome when FreeSpaceMapVacuum propagates too optimistic free
+	 * space values to upper FSM layers; later inserters try to use such pages
+	 * only to find out that they are unusable.  This can cause long stalls
+	 * when there are many such pages.
+	 *
+	 * Forestall those problems by updating FSM's idea about a page that is
+	 * becoming all-visible or all-frozen.
 	 *
 	 * Do this regardless of a full-page image being applied, since the FSM
 	 * data is not in the page anyway.
+	 *
+	 * We want to avoid holding an exclusive lock on the heap buffer while
+	 * doing IO (either of the FSM or the VM), so we'll release the lock on
+	 * the heap buffer before doing either.
 	 */
 	if (BufferIsValid(buffer))
 	{
-		if (xlrec.flags & (XLHP_HAS_REDIRECTIONS |
-						   XLHP_HAS_DEAD_ITEMS |
-						   XLHP_HAS_NOW_UNUSED_ITEMS))
-		{
-			Size		freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
+		if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
+							XLHP_HAS_DEAD_ITEMS |
+							XLHP_HAS_NOW_UNUSED_ITEMS)) ||
+			vmflags & VISIBILITYMAP_VALID_BITS)
+			freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
+
+		UnlockReleaseBuffer(buffer);
+	}
+
+	/*
+	 * Read and update the VM block. Even if we skipped updating the heap page
+	 * due to the file being dropped or truncated later in recovery, it's
+	 * still safe to update the visibility map.  Any WAL record that clears
+	 * the visibility map bit does so before checking the page LSN, so any
+	 * bits that need to be cleared will still be cleared.
+	 *
+	 * Note that it is _only_ okay that we do not hold a lock on the heap page
+	 * because we are in recovery and can expect no other writers to clear
+	 * PD_ALL_VISIBLE before we are able to update the VM.
+	 */
+	if (vmflags & VISIBILITYMAP_VALID_BITS &&
+		XLogReadBufferForRedoExtended(record, 1,
+									  RBM_ZERO_ON_ERROR,
+									  false,
+									  &vmbuffer) == BLK_NEEDS_REDO)
+	{
+		Page		vmpage = BufferGetPage(vmbuffer);
+		uint8		old_vmbits = 0;
+		Relation	reln = CreateFakeRelcacheEntry(rlocator);
 
-			UnlockReleaseBuffer(buffer);
+		/* initialize the page if it was read as zeros */
+		if (PageIsNew(vmpage))
+			PageInit(vmpage, BLCKSZ, 0);
+
+		old_vmbits = visibilitymap_set_vmbits(reln, blkno, vmbuffer, vmflags);
 
-			XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
+		/* Only set VM page LSN if we modified the page */
+		if (old_vmbits != vmflags)
+		{
+			Assert(BufferIsDirty(vmbuffer));
+			PageSetLSN(BufferGetPage(vmbuffer), lsn);
 		}
-		else
-			UnlockReleaseBuffer(buffer);
+
+		FreeFakeRelcacheEntry(reln);
 	}
+
+	if (BufferIsValid(vmbuffer))
+		UnlockReleaseBuffer(vmbuffer);
+
+	if (freespace > 0)
+		XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
 }
 
 /*
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 7ebd22f00a3..f0b33d1b696 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -21,6 +21,7 @@
 #include "access/transam.h"
 #include "access/xlog.h"
 #include "access/xloginsert.h"
+#include "access/visibilitymapdefs.h"
 #include "commands/vacuum.h"
 #include "executor/instrument.h"
 #include "miscadmin.h"
@@ -835,6 +836,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 				conflict_xid = prstate.latest_xid_removed;
 
 			log_heap_prune_and_freeze(relation, buffer,
+									  InvalidBuffer, 0, false,
 									  conflict_xid,
 									  true, reason,
 									  prstate.frozen, prstate.nfrozen,
@@ -2030,14 +2032,18 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
  *
  * This is used for several different page maintenance operations:
  *
- * - Page pruning, in VACUUM's 1st pass or on access: Some items are
+ * - Page pruning, in vacuum phase I or on-access: Some items are
  *   redirected, some marked dead, and some removed altogether.
  *
- * - Freezing: Items are marked as 'frozen'.
+ * - Freezing: During vacuum phase I, items are marked as 'frozen'
  *
- * - Vacuum, 2nd pass: Items that are already LP_DEAD are marked as unused.
+ * - Reaping: During vacuum phase III, items that are already LP_DEAD are
+ *   marked as unused.
  *
- * They have enough commonalities that we use a single WAL record for them
+ * - VM updates: After vacuum phase III, the heap page may be marked
+ *   all-visible and all-frozen.
+ *
+ * These changes all happen together, so we use a singel WAL record for them
  * all.
  *
  * If replaying the record requires a cleanup lock, pass cleanup_lock = true.
@@ -2045,12 +2051,23 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
  * replaying 'unused' items depends on whether they were all previously marked
  * as dead.
  *
+ * If the VM is being updated, vmflags will contain the bits to set. In this
+ * case, vmbuffer should already have been updated and marked dirty and should
+ * still be pinned and locked.
+ *
+ * set_pd_all_vis indicates that we set PD_ALL_VISIBLE and thus should update
+ * the page LSN when checksums/wal_log_hints are enabled even if we did not
+ * prune or freeze tuples on the page.
+ *
  * Note: This function scribbles on the 'frozen' array.
  *
  * Note: This is called in a critical section, so careful what you do here.
  */
 void
 log_heap_prune_and_freeze(Relation relation, Buffer buffer,
+						  Buffer vmbuffer,
+						  uint8 vmflags,
+						  bool set_pd_all_vis,
 						  TransactionId conflict_xid,
 						  bool cleanup_lock,
 						  PruneReason reason,
@@ -2062,6 +2079,7 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	xl_heap_prune xlrec;
 	XLogRecPtr	recptr;
 	uint8		info;
+	uint8		regbuf_flags;
 
 	/* The following local variables hold data registered in the WAL record: */
 	xlhp_freeze_plan plans[MaxHeapTuplesPerPage];
@@ -2070,8 +2088,21 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	xlhp_prune_items dead_items;
 	xlhp_prune_items unused_items;
 	OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
+	bool		do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
+
+	Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
+	xlrec.flags = vmflags;
 
-	xlrec.flags = 0;
+	regbuf_flags = REGBUF_STANDARD;
+
+	/*
+	 * We can avoid an FPI if the only modification we are making to the heap
+	 * page is to set PD_ALL_VISIBLE and checksums/wal_log_hints are disabled.
+	 */
+	if (!do_prune &&
+		nfrozen == 0 &&
+		(!set_pd_all_vis || !XLogHintBitIsNeeded()))
+		regbuf_flags |= REGBUF_NO_IMAGE;
 
 	/*
 	 * Prepare data for the buffer.  The arrays are not actually in the
@@ -2079,7 +2110,11 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	 * page image, the arrays can be omitted.
 	 */
 	XLogBeginInsert();
-	XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+	XLogRegisterBuffer(0, buffer, regbuf_flags);
+
+	if (vmflags & VISIBILITYMAP_VALID_BITS)
+		XLogRegisterBuffer(1, vmbuffer, 0);
+
 	if (nfrozen > 0)
 	{
 		int			nplans;
@@ -2168,5 +2203,22 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	}
 	recptr = XLogInsert(RM_HEAP2_ID, info);
 
-	PageSetLSN(BufferGetPage(buffer), recptr);
+	if (vmflags & VISIBILITYMAP_VALID_BITS)
+	{
+		Assert(BufferIsDirty(vmbuffer));
+		PageSetLSN(BufferGetPage(vmbuffer), recptr);
+	}
+
+	/*
+	 * If pruning or freezing tuples or setting the page all-visible when
+	 * checksums or wal_hint_bits are enabled, we must bump the LSN. Torn
+	 * pages are possible if we update PD_ALL_VISIBLE without bumping the LSN,
+	 * but this is deemed okay for page hint updates.
+	 */
+	if (do_prune || nfrozen > 0 ||
+		(set_pd_all_vis && XLogHintBitIsNeeded()))
+	{
+		Assert(BufferIsDirty(buffer));
+		PageSetLSN(BufferGetPage(buffer), recptr);
+	}
 }
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 7f6f684bc63..a50652ca5a0 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -463,11 +463,13 @@ static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *
 						   int num_offsets);
 static void dead_items_reset(LVRelState *vacrel);
 static void dead_items_cleanup(LVRelState *vacrel);
-static bool heap_page_is_all_visible(Relation rel, Buffer buf,
-									 TransactionId OldestXmin,
-									 bool *all_frozen,
-									 TransactionId *visibility_cutoff_xid,
-									 OffsetNumber *logging_offnum);
+static bool heap_page_would_be_all_visible(Relation rel, Buffer buf,
+										   TransactionId OldestXmin,
+										   OffsetNumber *deadoffsets,
+										   int ndeadoffsets,
+										   bool *all_frozen,
+										   TransactionId *visibility_cutoff_xid,
+										   OffsetNumber *logging_offnum);
 static void update_relstats_all_indexes(LVRelState *vacrel);
 static void vacuum_error_callback(void *arg);
 static void update_vacuum_error_info(LVRelState *vacrel,
@@ -2846,8 +2848,11 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 	OffsetNumber unused[MaxHeapTuplesPerPage];
 	int			nunused = 0;
 	TransactionId visibility_cutoff_xid;
+	TransactionId conflict_xid = InvalidTransactionId;
 	bool		all_frozen;
 	LVSavedErrInfo saved_err_info;
+	uint8		vmflags = 0;
+	bool		set_pd_all_vis = false;
 
 	Assert(vacrel->do_index_vacuuming);
 
@@ -2858,6 +2863,20 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 							 VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
 							 InvalidOffsetNumber);
 
+	if (heap_page_would_be_all_visible(vacrel->rel, buffer,
+									   vacrel->cutoffs.OldestXmin,
+									   deadoffsets, num_offsets,
+									   &all_frozen, &visibility_cutoff_xid,
+									   &vacrel->offnum))
+	{
+		vmflags |= VISIBILITYMAP_ALL_VISIBLE;
+		if (all_frozen)
+		{
+			vmflags |= VISIBILITYMAP_ALL_FROZEN;
+			Assert(!TransactionIdIsValid(visibility_cutoff_xid));
+		}
+	}
+
 	START_CRIT_SECTION();
 
 	for (int i = 0; i < num_offsets; i++)
@@ -2877,6 +2896,18 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 	/* Attempt to truncate line pointer array now */
 	PageTruncateLinePointerArray(page);
 
+	if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
+	{
+		Assert(!PageIsAllVisible(page));
+		set_pd_all_vis = true;
+		PageSetAllVisible(page);
+		LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+		visibilitymap_set_vmbits(vacrel->rel,
+								 blkno,
+								 vmbuffer, vmflags);
+		conflict_xid = visibility_cutoff_xid;
+	}
+
 	/*
 	 * Mark buffer dirty before we write WAL.
 	 */
@@ -2886,7 +2917,10 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 	if (RelationNeedsWAL(vacrel->rel))
 	{
 		log_heap_prune_and_freeze(vacrel->rel, buffer,
-								  InvalidTransactionId,
+								  vmbuffer,
+								  vmflags,
+								  set_pd_all_vis,
+								  conflict_xid,
 								  false,	/* no cleanup lock required */
 								  PRUNE_VACUUM_CLEANUP,
 								  NULL, 0,	/* frozen */
@@ -2895,39 +2929,12 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 								  unused, nunused);
 	}
 
-	/*
-	 * End critical section, so we safely can do visibility tests (which
-	 * possibly need to perform IO and allocate memory!). If we crash now the
-	 * page (including the corresponding vm bit) might not be marked all
-	 * visible, but that's fine. A later vacuum will fix that.
-	 */
 	END_CRIT_SECTION();
 
-	/*
-	 * Now that we have removed the LP_DEAD items from the page, once again
-	 * check if the page has become all-visible.  The page is already marked
-	 * dirty, exclusively locked, and, if needed, a full page image has been
-	 * emitted.
-	 */
-	Assert(!PageIsAllVisible(page));
-	if (heap_page_is_all_visible(vacrel->rel, buffer, vacrel->cutoffs.OldestXmin,
-								 &all_frozen, &visibility_cutoff_xid, &vacrel->offnum))
+	if ((vmflags & VISIBILITYMAP_ALL_VISIBLE) != 0)
 	{
-		uint8		flags = VISIBILITYMAP_ALL_VISIBLE;
-
-		if (all_frozen)
-		{
-			Assert(!TransactionIdIsValid(visibility_cutoff_xid));
-			flags |= VISIBILITYMAP_ALL_FROZEN;
-		}
-
-		PageSetAllVisible(page);
-		visibilitymap_set(vacrel->rel, blkno, buffer,
-						  InvalidXLogRecPtr,
-						  vmbuffer, visibility_cutoff_xid,
-						  flags);
-
 		/* Count the newly set VM page for logging */
+		LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
 		vacrel->vm_new_visible_pages++;
 		if (all_frozen)
 			vacrel->vm_new_visible_frozen_pages++;
@@ -3594,40 +3601,85 @@ dead_items_cleanup(LVRelState *vacrel)
 }
 
 /*
- * Check if every tuple in the given page in buf is visible to all current and
- * future transactions.
+ * Wrapper for heap_page_would_be_all_visible() which can be used for
+ * callers that expect no LP_DEAD on the page.
+ */
+bool
+heap_page_is_all_visible(Relation rel, Buffer buf,
+						 TransactionId OldestXmin,
+						 bool *all_frozen,
+						 TransactionId *visibility_cutoff_xid,
+						 OffsetNumber *logging_offnum)
+{
+
+	return heap_page_would_be_all_visible(rel, buf, OldestXmin,
+										  NULL, 0,
+										  all_frozen,
+										  visibility_cutoff_xid,
+										  logging_offnum);
+}
+
+/*
+ * Determines whether or not the heap page in buf is all-visible other than
+ * the dead line pointers referred to by the provided deadoffsets array.
  *
- * OldestXmin is used to determine visibility.
+ * deadoffsets are the offsets the caller knows about and already removed
+ * associated index entries. Vacuum will call this before setting those line
+ * pointers LP_UNUSED. So, if there are no new LP_DEAD items, then the page
+ * can be set all-visible in the VM by the caller.
+ *
+ * Returns true if the page is all-visible other than the provided
+ * deadoffsets and false otherwise.
  *
- * Sets *all_frozen to true if every tuple on this page is frozen.
+ * OldestXmin is used to determine visibility.
  *
- * Sets *visibility_cutoff_xid to the highest xmin amongst the visible tuples.
- * It is only valid if the page is all-visible.
+ * *all_frozen is an output parameter indicating to the caller if every tuple
+ * on the page is frozen.
  *
  * *logging_offnum will have the OffsetNumber of the current tuple being
  * processed for vacuum's error callback system.
  *
- * This is a stripped down version of lazy_scan_prune().  If you change
- * anything here, make sure that everything stays in sync.  Note that an
- * assertion calls us to verify that everybody still agrees.  Be sure to avoid
- * introducing new side-effects here.
+ * *visibility_cutoff_xid is an output parameter with the highest xmin amongst the
+ * visible tuples. It is only valid if the page is all-visible.
+ *
+ * Callers looking to verify that the page is already all-visible can call
+ * heap_page_is_all_visible().
+ *
+ * This is similar logic to that in heap_prune_record_unchanged_lp_normal() If
+ * you change anything here, make sure that everything stays in sync.  Note
+ * that an assertion calls us to verify that everybody still agrees.  Be sure
+ * to avoid introducing new side-effects here.
  */
 static bool
-heap_page_is_all_visible(Relation rel, Buffer buf,
-						 TransactionId OldestXmin,
-						 bool *all_frozen,
-						 TransactionId *visibility_cutoff_xid,
-						 OffsetNumber *logging_offnum)
+heap_page_would_be_all_visible(Relation rel, Buffer buf,
+							   TransactionId OldestXmin,
+							   OffsetNumber *deadoffsets,
+							   int ndeadoffsets,
+							   bool *all_frozen,
+							   TransactionId *visibility_cutoff_xid,
+							   OffsetNumber *logging_offnum)
 {
 	Page		page = BufferGetPage(buf);
 	BlockNumber blockno = BufferGetBlockNumber(buf);
 	OffsetNumber offnum,
 				maxoff;
 	bool		all_visible = true;
+	int			matched_dead_count = 0;
 
 	*visibility_cutoff_xid = InvalidTransactionId;
 	*all_frozen = true;
 
+	Assert(ndeadoffsets == 0 || deadoffsets);
+
+#ifdef USE_ASSERT_CHECKING
+	/* Confirm input deadoffsets[] is strictly sorted */
+	if (ndeadoffsets > 1)
+	{
+		for (int i = 1; i < ndeadoffsets; i++)
+			Assert(deadoffsets[i - 1] < deadoffsets[i]);
+	}
+#endif
+
 	maxoff = PageGetMaxOffsetNumber(page);
 	for (offnum = FirstOffsetNumber;
 		 offnum <= maxoff && all_visible;
@@ -3655,9 +3707,15 @@ heap_page_is_all_visible(Relation rel, Buffer buf,
 		 */
 		if (ItemIdIsDead(itemid))
 		{
-			all_visible = false;
-			*all_frozen = false;
-			break;
+			if (!deadoffsets ||
+				matched_dead_count >= ndeadoffsets ||
+				deadoffsets[matched_dead_count] != offnum)
+			{
+				*all_frozen = all_visible = false;
+				break;
+			}
+			matched_dead_count++;
+			continue;
 		}
 
 		Assert(ItemIdIsNormal(itemid));
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index b48d7dc1d24..c95d30dfe8d 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -103,7 +103,7 @@ plan_elem_desc(StringInfo buf, void *plan, void *data)
  * code, the latter of which is used in frontend (pg_waldump) code.
  */
 void
-heap_xlog_deserialize_prune_and_freeze(char *cursor, uint8 flags,
+heap_xlog_deserialize_prune_and_freeze(char *cursor, uint16 flags,
 									   int *nplans, xlhp_freeze_plan **plans,
 									   OffsetNumber **frz_offsets,
 									   int *nredirected, OffsetNumber **redirected,
@@ -279,7 +279,6 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
 			TransactionId conflict_xid;
 
 			memcpy(&conflict_xid, rec + SizeOfHeapPrune, sizeof(TransactionId));
-
 			appendStringInfo(buf, "snapshotConflictHorizon: %u",
 							 conflict_xid);
 		}
@@ -287,6 +286,10 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
 		appendStringInfo(buf, ", isCatalogRel: %c",
 						 xlrec->flags & XLHP_IS_CATALOG_REL ? 'T' : 'F');
 
+		if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
+			appendStringInfo(buf, ", vm_flags: 0x%02X",
+							 xlrec->flags & VISIBILITYMAP_VALID_BITS);
+
 		if (XLogRecHasBlockData(record, 0))
 		{
 			Size		datalen;
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index a2bd5a897f8..8b47295efa2 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -344,6 +344,12 @@ extern void heap_inplace_update_and_unlock(Relation relation,
 										   Buffer buffer);
 extern void heap_inplace_unlock(Relation relation,
 								HeapTuple oldtup, Buffer buffer);
+
+extern bool heap_page_is_all_visible(Relation rel, Buffer buf,
+									 TransactionId OldestXmin,
+									 bool *all_frozen,
+									 TransactionId *visibility_cutoff_xid,
+									 OffsetNumber *logging_offnum);
 extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 									  const struct VacuumCutoffs *cutoffs,
 									  HeapPageFreeze *pagefrz,
@@ -388,6 +394,9 @@ extern void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
 									OffsetNumber *nowunused, int nunused);
 extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
 extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
+									  Buffer vmbuffer,
+									  uint8 vmflags,
+									  bool vm_modified_heap_page,
 									  TransactionId conflict_xid,
 									  bool cleanup_lock,
 									  PruneReason reason,
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index d4c0625b632..d8508593e7c 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -249,7 +249,7 @@ typedef struct xl_heap_update
  * Main data section:
  *
  *	xl_heap_prune
- *		uint8				flags
+ *		uint16				flags
  *	TransactionId			snapshot_conflict_horizon
  *
  * Block 0 data section:
@@ -284,7 +284,7 @@ typedef struct xl_heap_update
  */
 typedef struct xl_heap_prune
 {
-	uint8		flags;
+	uint16		flags;
 
 	/*
 	 * If XLHP_HAS_CONFLICT_HORIZON is set, the conflict horizon XID follows,
@@ -292,10 +292,22 @@ typedef struct xl_heap_prune
 	 */
 } xl_heap_prune;
 
-#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint8))
+#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint16))
+
+/*
+ * The xl_heap_prune record's flags may also contain which VM bits to set. As
+ * such, (1 << 0) and (1 << 1) are reserved for VISIBILITYMAP_ALL_VISIBLE and
+ * VISIBILITYMAP_ALL_FROZEN.
+ */
 
-/* to handle recovery conflict during logical decoding on standby */
-#define		XLHP_IS_CATALOG_REL			(1 << 1)
+/*
+ * To handle recovery conflict during logical decoding on standby, we must know
+ * if the table is a catalog table. Note that in visibilitymapdefs.h
+ * VISIBLITYMAP_XLOG_CATALOG_REL is also defined as (1 << 2). xl_heap_prune
+ * records should use XLHP_IS_CATALOG_REL, not VISIBILIYTMAP_XLOG_CATALOG_REL --
+ * even if they only contain updates to the VM.
+ */
+#define		XLHP_IS_CATALOG_REL			(1 << 2)
 
 /*
  * Does replaying the record require a cleanup-lock?
@@ -305,7 +317,7 @@ typedef struct xl_heap_prune
  * marks LP_DEAD line pointers as unused without moving any tuple data, an
  * ordinary exclusive lock is sufficient.
  */
-#define		XLHP_CLEANUP_LOCK	       (1 << 2)
+#define		XLHP_CLEANUP_LOCK	       (1 << 3)
 
 /*
  * If we remove or freeze any entries that contain xids, we need to include a
@@ -313,22 +325,22 @@ typedef struct xl_heap_prune
  * there are no queries running for which the removed tuples are still
  * visible, or which still consider the frozen XIDs as running.
  */
-#define		XLHP_HAS_CONFLICT_HORIZON   (1 << 3)
+#define		XLHP_HAS_CONFLICT_HORIZON   (1 << 4)
 
 /*
  * Indicates that an xlhp_freeze_plans sub-record and one or more
  * xlhp_freeze_plan sub-records are present.
  */
-#define		XLHP_HAS_FREEZE_PLANS		(1 << 4)
+#define		XLHP_HAS_FREEZE_PLANS		(1 << 5)
 
 /*
  * XLHP_HAS_REDIRECTIONS, XLHP_HAS_DEAD_ITEMS, and XLHP_HAS_NOW_UNUSED_ITEMS
  * indicate that xlhp_prune_items sub-records with redirected, dead, and
  * unused item offsets are present.
  */
-#define		XLHP_HAS_REDIRECTIONS		(1 << 5)
-#define		XLHP_HAS_DEAD_ITEMS	        (1 << 6)
-#define		XLHP_HAS_NOW_UNUSED_ITEMS   (1 << 7)
+#define		XLHP_HAS_REDIRECTIONS		(1 << 6)
+#define		XLHP_HAS_DEAD_ITEMS	        (1 << 7)
+#define		XLHP_HAS_NOW_UNUSED_ITEMS   (1 << 8)
 
 /*
  * xlhp_freeze_plan describes how to freeze a group of one or more heap tuples
@@ -497,7 +509,7 @@ extern XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer,
 								   uint8 vmflags);
 
 /* in heapdesc.c, so it can be shared between frontend/backend code */
-extern void heap_xlog_deserialize_prune_and_freeze(char *cursor, uint8 flags,
+extern void heap_xlog_deserialize_prune_and_freeze(char *cursor, uint16 flags,
 												   int *nplans, xlhp_freeze_plan **plans,
 												   OffsetNumber **frz_offsets,
 												   int *nredirected, OffsetNumber **redirected,
-- 
2.43.0

