From 1153915bc4db207f63e2718234478d2237fdb73d Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 3 Dec 2025 15:24:08 -0500
Subject: [PATCH v26 14/15] Allow on-access pruning to set pages all-visible

Many queries do not modify the underlying relation. For such queries, if
on-access pruning occurs during the scan, we can check whether the page
has become all-visible and update the visibility map accordingly.
Previously, only vacuum and COPY FREEZE marked pages as all-visible or
all-frozen.

This commit implements on-access VM setting for sequential scans as well
as for the underlying heap relation in index scans and bitmap heap
scans.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
---
 src/backend/access/heap/heapam.c              | 15 ++++++-
 src/backend/access/heap/heapam_handler.c      | 15 ++++++-
 src/backend/access/heap/pruneheap.c           | 44 +++++++++++++++++--
 src/include/access/heapam.h                   | 24 ++++++++--
 .../t/035_standby_logical_decoding.pl         |  3 +-
 5 files changed, 90 insertions(+), 11 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index fb7a7548aa0..d9dc79f4a96 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -570,6 +570,7 @@ heap_prepare_pagescan(TableScanDesc sscan)
 	Buffer		buffer = scan->rs_cbuf;
 	BlockNumber block = scan->rs_cblock;
 	Snapshot	snapshot;
+	Buffer	   *vmbuffer = NULL;
 	Page		page;
 	int			lines;
 	bool		all_visible;
@@ -584,7 +585,9 @@ heap_prepare_pagescan(TableScanDesc sscan)
 	/*
 	 * Prune and repair fragmentation for the whole page, if possible.
 	 */
-	heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
+	if (sscan->rs_flags & SO_HINT_REL_READ_ONLY)
+		vmbuffer = &scan->rs_vmbuffer;
+	heap_page_prune_opt(scan->rs_base.rs_rd, buffer, vmbuffer);
 
 	/*
 	 * We must hold share lock on the buffer content while examining tuple
@@ -1261,6 +1264,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 														  sizeof(TBMIterateResult));
 	}
 
+	scan->rs_vmbuffer = InvalidBuffer;
 
 	return (TableScanDesc) scan;
 }
@@ -1299,6 +1303,12 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
 		scan->rs_cbuf = InvalidBuffer;
 	}
 
+	if (BufferIsValid(scan->rs_vmbuffer))
+	{
+		ReleaseBuffer(scan->rs_vmbuffer);
+		scan->rs_vmbuffer = InvalidBuffer;
+	}
+
 	/*
 	 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
 	 * additional data vs a normal HeapScan
@@ -1331,6 +1341,9 @@ heap_endscan(TableScanDesc sscan)
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
+	if (BufferIsValid(scan->rs_vmbuffer))
+		ReleaseBuffer(scan->rs_vmbuffer);
+
 	/*
 	 * Must free the read stream before freeing the BufferAccessStrategy.
 	 */
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 6c2e4e08b16..2cb98e58956 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -85,6 +85,7 @@ heapam_index_fetch_begin(Relation rel, uint32 flags)
 
 	hscan->xs_base.rel = rel;
 	hscan->xs_cbuf = InvalidBuffer;
+	hscan->xs_vmbuffer = InvalidBuffer;
 	hscan->modifies_base_rel = !(flags & SO_HINT_REL_READ_ONLY);
 
 	return &hscan->xs_base;
@@ -100,6 +101,12 @@ heapam_index_fetch_reset(IndexFetchTableData *scan)
 		ReleaseBuffer(hscan->xs_cbuf);
 		hscan->xs_cbuf = InvalidBuffer;
 	}
+
+	if (BufferIsValid(hscan->xs_vmbuffer))
+	{
+		ReleaseBuffer(hscan->xs_vmbuffer);
+		hscan->xs_vmbuffer = InvalidBuffer;
+	}
 }
 
 static void
@@ -139,7 +146,8 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
 		 * Prune page, but only if we weren't already on this page
 		 */
 		if (prev_buf != hscan->xs_cbuf)
-			heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
+			heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf,
+								hscan->modifies_base_rel ? NULL : &hscan->xs_vmbuffer);
 	}
 
 	/* Obtain share-lock on the buffer so we can examine visibility */
@@ -2472,6 +2480,7 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
 	TBMIterateResult *tbmres;
 	OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
 	int			noffsets = -1;
+	Buffer	   *vmbuffer = NULL;
 
 	Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
 	Assert(hscan->rs_read_stream);
@@ -2518,7 +2527,9 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
 	/*
 	 * Prune and repair fragmentation for the whole page, if possible.
 	 */
-	heap_page_prune_opt(scan->rs_rd, buffer);
+	if (scan->rs_flags & SO_HINT_REL_READ_ONLY)
+		vmbuffer = &hscan->rs_vmbuffer;
+	heap_page_prune_opt(scan->rs_rd, buffer, vmbuffer);
 
 	/*
 	 * We must hold share lock on the buffer content while examining tuple
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index a4c3bd00253..d1ec6d1b601 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -202,6 +202,8 @@ static bool heap_page_will_set_vm(PruneState *prstate,
 								  Relation relation,
 								  BlockNumber heap_blk, Buffer heap_buffer, Page heap_page,
 								  Buffer vmbuffer,
+								  PruneReason reason,
+								  bool do_prune, bool do_freeze,
 								  int nlpdead_items,
 								  uint8 *old_vmbits,
 								  uint8 *new_vmbits);
@@ -223,9 +225,13 @@ static TransactionId get_conflict_xid(bool do_prune, bool do_freeze, bool do_set
  * if there's not any use in pruning.
  *
  * Caller must have pin on the buffer, and must *not* have a lock on it.
+ *
+ * If vmbuffer is not NULL, it is okay for pruning to set the visibility map if
+ * the page is all-visible. We will take care of pinning and, if needed,
+ * reading in the page of the visibility map.
  */
 void
-heap_page_prune_opt(Relation relation, Buffer buffer)
+heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
 {
 	Page		page = BufferGetPage(buffer);
 	TransactionId prune_xid;
@@ -306,6 +312,13 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
 				.cutoffs = NULL,
 			};
 
+			if (vmbuffer)
+			{
+				visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer);
+				params.options |= HEAP_PAGE_PRUNE_UPDATE_VM;
+				params.vmbuffer = *vmbuffer;
+			}
+
 			heap_page_prune_and_freeze(&params, &presult, &dummy_off_loc,
 									   NULL, NULL);
 
@@ -935,6 +948,9 @@ identify_and_fix_vm_corruption(Relation rel, Buffer heap_buffer,
  * corrupted, it will fix them by clearing the VM bits and visibility hint.
  * This does not need to be done in a critical section.
  *
+ * This should be called only after do_freeze has been decided (and do_prune
+ * has been set), as these factor into our heuristic-based decision.
+ *
  * Returns true if one or both VM bits should be set, along with returning the
  * current value of the VM bits in *old_vmbits and the desired new value of
  * the VM bits in *new_vmbits.
@@ -944,6 +960,8 @@ heap_page_will_set_vm(PruneState *prstate,
 					  Relation relation,
 					  BlockNumber heap_blk, Buffer heap_buffer, Page heap_page,
 					  Buffer vmbuffer,
+					  PruneReason reason,
+					  bool do_prune, bool do_freeze,
 					  int nlpdead_items,
 					  uint8 *old_vmbits,
 					  uint8 *new_vmbits)
@@ -951,6 +969,24 @@ heap_page_will_set_vm(PruneState *prstate,
 	if (!prstate->attempt_update_vm)
 		return false;
 
+	/*
+	 * If this is an on-access call and we're not actually pruning, avoid
+	 * setting the visibility map if it would newly dirty the heap page or, if
+	 * the page is already dirty, if doing so would require including a
+	 * full-page image (FPI) of the heap page in the WAL. This situation
+	 * should be rare, as on-access pruning is only attempted when
+	 * pd_prune_xid is valid.
+	 */
+	if (reason == PRUNE_ON_ACCESS &&
+		prstate->all_visible &&
+		!do_prune && !do_freeze &&
+		(!BufferIsDirty(heap_buffer) || XLogCheckBufferNeedsBackup(heap_buffer)))
+	{
+		prstate->all_visible = false;
+		prstate->all_frozen = false;
+		return false;
+	}
+
 	*old_vmbits = visibilitymap_get_status(relation, heap_blk,
 										   &vmbuffer);
 
@@ -1146,6 +1182,8 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 									  buffer,
 									  page,
 									  vmbuffer,
+									  params->reason,
+									  do_prune, do_freeze,
 									  prstate.lpdead_items,
 									  &old_vmbits,
 									  &new_vmbits);
@@ -1232,9 +1270,7 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 
 		MarkBufferDirty(buffer);
 
-		/*
-		 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
-		 */
+		/* Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did */
 		if (RelationNeedsWAL(params->relation))
 		{
 			log_heap_prune_and_freeze(params->relation, buffer,
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index ba3ff8c0845..c835c792c80 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -95,6 +95,13 @@ typedef struct HeapScanDescData
 	 */
 	ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
 
+	/*
+	 * For sequential scans and bitmap heap scans. If the relation is not
+	 * being modified, on-access pruning may read in the current heap page's
+	 * corresponding VM block to this buffer.
+	 */
+	Buffer		rs_vmbuffer;
+
 	/* these fields only used in page-at-a-time mode and for bitmap scans */
 	uint32		rs_cindex;		/* current tuple's index in vistuples */
 	uint32		rs_ntuples;		/* number of visible tuples on page */
@@ -117,8 +124,18 @@ typedef struct IndexFetchHeapData
 {
 	IndexFetchTableData xs_base;	/* AM independent part of the descriptor */
 
-	Buffer		xs_cbuf;		/* current heap buffer in scan, if any */
-	/* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
+	/*
+	 * Current heap buffer in scan, if any. NB: if xs_cbuf is not
+	 * InvalidBuffer, we hold a pin on that buffer.
+	 */
+	Buffer		xs_cbuf;
+
+	/*
+	 * For index scans that do not modify the underlying heap table, on-access
+	 * pruning may read in the current heap page's corresponding VM block to
+	 * this buffer.
+	 */
+	Buffer		xs_vmbuffer;
 
 	/*
 	 * Some optimizations can only be performed if the query does not modify
@@ -419,7 +436,8 @@ extern TransactionId heap_index_delete_tuples(Relation rel,
 											  TM_IndexDeleteOp *delstate);
 
 /* in heap/pruneheap.c */
-extern void heap_page_prune_opt(Relation relation, Buffer buffer);
+extern void heap_page_prune_opt(Relation relation, Buffer buffer,
+								Buffer *vmbuffer);
 extern void heap_page_prune_and_freeze(PruneFreezeParams *params,
 									   PruneFreezeResult *presult,
 									   OffsetNumber *off_loc,
diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl
index ebe2fae1789..bdd9f0a62cd 100644
--- a/src/test/recovery/t/035_standby_logical_decoding.pl
+++ b/src/test/recovery/t/035_standby_logical_decoding.pl
@@ -296,6 +296,7 @@ wal_level = 'logical'
 max_replication_slots = 4
 max_wal_senders = 4
 autovacuum = off
+hot_standby_feedback = on
 });
 $node_primary->dump_info;
 $node_primary->start;
@@ -748,7 +749,7 @@ check_pg_recvlogical_stderr($handle,
 $logstart = -s $node_standby->logfile;
 
 reactive_slots_change_hfs_and_wait_for_xmins('shared_row_removal_',
-	'no_conflict_', 0, 1);
+	'no_conflict_', 1, 0);
 
 # This should not trigger a conflict
 wait_until_vacuum_can_remove(
-- 
2.43.0

