From ba527a1cf0fff9f2b80d64d9d0f80888c6e5db66 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Tue, 29 Jul 2025 16:12:56 -0400
Subject: [PATCH v34 14/14] Set pd_prune_xid on insert
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that visibility map (VM) updates can occur during read-only queries,
it makes sense to also set the page’s pd_prune_xid hint during inserts.
This enables heap_page_prune_and_freeze() to run after a page is
filled with newly inserted tuples the first time it is read.

This change also addresses a long-standing note in heap_insert() and
heap_multi_insert(), which observed that setting pd_prune_xid would
help clean up aborted insertions sooner. Without it, such tuples might
linger until VACUUM, whereas now they can be pruned earlier.

The index killtuples test had to be updated to reflect a larger number
of hits by some accesses. Since the prune_xid is set by the fill/insert
step, on-access pruning can happen during the first access step (before
the DELETE). This is when the VM is extended. After the DELETE, the next
access hits the VM block instead of extending it. Thus, an additional
buffer hit is counted for the table.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
---
 src/backend/access/heap/heapam.c              | 25 +++++++++++++------
 src/backend/access/heap/heapam_xlog.c         | 15 ++++++++++-
 .../modules/index/expected/killtuples.out     |  6 ++---
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 7d22549a290..afafc7e2cb2 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2166,6 +2166,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	TransactionId xid = GetCurrentTransactionId();
 	HeapTuple	heaptup;
 	Buffer		buffer;
+	Page		page;
 	Buffer		vmbuffer = InvalidBuffer;
 	bool		all_visible_cleared = false;
 
@@ -2225,15 +2226,19 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	}
 
 	/*
-	 * XXX Should we set PageSetPrunable on this page ?
+	 * Set pd_prune_xid to trigger heap_page_prune_and_freeze() once the page
+	 * is full so that we can set the page all-visible in the VM.
 	 *
-	 * The inserting transaction may eventually abort thus making this tuple
-	 * DEAD and hence available for pruning. Though we don't want to optimize
-	 * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
-	 * aborted tuple will never be pruned until next vacuum is triggered.
+	 * Setting pd_prune_xid is also handy if the inserting transaction
+	 * eventually aborts making this tuple DEAD and hence available for
+	 * pruning. If no other tuple in this page is UPDATEd/DELETEd, the aborted
+	 * tuple would never otherwise be pruned until next vacuum is triggered.
 	 *
-	 * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
+	 * Don't set it if we are in bootstrap mode, though.
 	 */
+	page = BufferGetPage(buffer);
+	if (TransactionIdIsNormal(xid))
+		PageSetPrunable(page, xid);
 
 	MarkBufferDirty(buffer);
 
@@ -2243,7 +2248,6 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 		xl_heap_insert xlrec;
 		xl_heap_header xlhdr;
 		XLogRecPtr	recptr;
-		Page		page = BufferGetPage(buffer);
 		uint8		info = XLOG_HEAP_INSERT;
 		int			bufflags = 0;
 
@@ -2607,8 +2611,13 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 		}
 
 		/*
-		 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
+		 * Set pd_prune_xid. See heap_insert() for more on why we do this when
+		 * inserting tuples. This only makes sense if we aren't already
+		 * setting the page frozen in the VM. We also don't set it in
+		 * bootstrap mode.
 		 */
+		if (!all_frozen_set && TransactionIdIsNormal(xid))
+			PageSetPrunable(page, xid);
 
 		MarkBufferDirty(buffer);
 
diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index 9a29fda3601..49cc83a6479 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -447,6 +447,12 @@ heap_xlog_insert(XLogReaderState *record)
 
 		freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
 
+		/*
+		 * Set the page prunable to trigger on-access pruning later which may
+		 * set the page all-visible in the VM.
+		 */
+		PageSetPrunable(page, XLogRecGetXid(record));
+
 		PageSetLSN(page, lsn);
 
 		if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
@@ -596,9 +602,16 @@ heap_xlog_multi_insert(XLogReaderState *record)
 		if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
 			PageClearAllVisible(page);
 
-		/* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
+		/*
+		 * XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible. If
+		 * we are not setting the page frozen, then set the page's prunable
+		 * hint so that we trigger on-access pruning later which may set the
+		 * page all-visible in the VM.
+		 */
 		if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
 			PageSetAllVisible(page);
+		else
+			PageSetPrunable(page, XLogRecGetXid(record));
 
 		MarkBufferDirty(buffer);
 	}
diff --git a/src/test/modules/index/expected/killtuples.out b/src/test/modules/index/expected/killtuples.out
index be7ddd756ef..b29f2434b00 100644
--- a/src/test/modules/index/expected/killtuples.out
+++ b/src/test/modules/index/expected/killtuples.out
@@ -54,7 +54,7 @@ step flush: SELECT FROM pg_stat_force_next_flush();
 step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple';
 new_heap_accesses
 -----------------
-                1
+                2
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
@@ -130,7 +130,7 @@ step flush: SELECT FROM pg_stat_force_next_flush();
 step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple';
 new_heap_accesses
 -----------------
-                1
+                2
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
@@ -283,7 +283,7 @@ step flush: SELECT FROM pg_stat_force_next_flush();
 step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple';
 new_heap_accesses
 -----------------
-                1
+                2
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-- 
2.43.0

