public inbox for [email protected]
help / color / mirror / Atom feedRe: Expanding HOT updates for expression and partial indexes
2+ messages / 1 participants
[nested] [flat]
* Re: Expanding HOT updates for expression and partial indexes
@ 2026-01-08 20:25 Greg Burd <[email protected]>
2026-01-13 14:54 ` Re: Expanding HOT updates for expression and partial indexes Greg Burd <[email protected]>
0 siblings, 1 reply; 2+ messages in thread
From: Greg Burd @ 2026-01-08 20:25 UTC (permalink / raw)
To: pgsql-hackers
Rebased to address conflicts.
best.
-greg
Attachments:
[application/octet-stream] v27-0001-Prepare-heapam_tuple_update-and-simple_heap_upda.patch (47.8K, 2-v27-0001-Prepare-heapam_tuple_update-and-simple_heap_upda.patch)
download | inline diff:
From 82a76ba192e37b038af0adebfa2eca0aedd0e3d8 Mon Sep 17 00:00:00 2001
From: Greg Burd <[email protected]>
Date: Sun, 2 Nov 2025 11:36:20 -0500
Subject: [PATCH v27 1/4] Prepare heapam_tuple_update() and
simple_heap_update() for divergence
This commit lays the foundation for larger changes to come by taking the
first portion of heap_update() through the HeapDeterminColumnsInfo() and
replicating that logic in both heapam_tuple_update() and
simple_heap_upate(). This is done so that these two paths might diverge
in implementation later on. The simple_heap_update() path deals solely
with updates to catalog tuples which could record their modified
attributes rather than relearn them. The remaining calls from the
executor into the table AM update API could include the set of updated
attributes. This is foreshadowing... of course, as that's what the next
commit will start to do.
As part of this reorganization, the handling of replica identity key
attributes has been adjusted. Instead of fetching a second copy of
the bitmap during an update operation, the caller is now required to
provide it. This change applies to both heap_update() and
heap_delete().
---
src/backend/access/heap/heapam.c | 568 +++++++++++------------
src/backend/access/heap/heapam_handler.c | 117 ++++-
src/include/access/heapam.h | 24 +-
3 files changed, 410 insertions(+), 299 deletions(-)
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index ad9d6338ec2..2579f21e212 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -39,18 +39,24 @@
#include "access/syncscan.h"
#include "access/valid.h"
#include "access/visibilitymap.h"
+#include "access/xact.h"
#include "access/xloginsert.h"
+#include "catalog/catalog.h"
#include "catalog/pg_database.h"
#include "catalog/pg_database_d.h"
#include "commands/vacuum.h"
+#include "nodes/bitmapset.h"
#include "pgstat.h"
#include "port/pg_bitutils.h"
+#include "storage/bufmgr.h"
+#include "storage/itemptr.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "storage/procarray.h"
#include "utils/datum.h"
#include "utils/injection_point.h"
#include "utils/inval.h"
+#include "utils/relcache.h"
#include "utils/spccache.h"
#include "utils/syscache.h"
@@ -62,16 +68,8 @@ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
HeapTuple newtup, HeapTuple old_key_tuple,
bool all_visible_cleared, bool new_all_visible_cleared);
#ifdef USE_ASSERT_CHECKING
-static void check_lock_if_inplace_updateable_rel(Relation relation,
- const ItemPointerData *otid,
- HeapTuple newtup);
static void check_inplace_rel_lock(HeapTuple oldtup);
#endif
-static Bitmapset *HeapDetermineColumnsInfo(Relation relation,
- Bitmapset *interesting_cols,
- Bitmapset *external_cols,
- HeapTuple oldtup, HeapTuple newtup,
- bool *has_external);
static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid,
LockTupleMode mode, LockWaitPolicy wait_policy,
bool *have_tuple_lock);
@@ -106,10 +104,10 @@ static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status
static void index_delete_sort(TM_IndexDeleteOp *delstate);
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate);
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup);
-static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
+static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp,
+ Bitmapset *rid_attrs, bool key_required,
bool *copy);
-
/*
* Each tuple lock mode has a corresponding heavyweight lock, and one or two
* corresponding MultiXactStatuses (one to merely lock tuples, another one to
@@ -2817,6 +2815,7 @@ heap_delete(Relation relation, const ItemPointerData *tid,
Buffer buffer;
Buffer vmbuffer = InvalidBuffer;
TransactionId new_xmax;
+ Bitmapset *rid_attrs;
uint16 new_infomask,
new_infomask2;
bool have_tuple_lock = false;
@@ -2829,6 +2828,8 @@ heap_delete(Relation relation, const ItemPointerData *tid,
AssertHasSnapshotForToast(relation);
+ rid_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
/*
* Forbid this during a parallel operation, lest it allocate a combo CID.
* Other workers might need that combo CID for visibility checks, and we
@@ -3032,6 +3033,7 @@ l1:
UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
+ bms_free(rid_attrs);
return result;
}
@@ -3053,7 +3055,10 @@ l1:
* Compute replica identity tuple before entering the critical section so
* we don't PANIC upon a memory allocation failure.
*/
- old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
+ old_key_tuple = ExtractReplicaIdentity(relation, &tp, rid_attrs,
+ true, &old_key_copied);
+ bms_free(rid_attrs);
+ rid_attrs = NULL;
/*
* If this is the first possibly-multixact-able operation in the current
@@ -3265,7 +3270,10 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid)
* heap_update - replace a tuple
*
* See table_tuple_update() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a heap tuple rather than a slot.
+ *
+ * It's required that the caller has acquired the pin and lock on the buffer.
+ * That lock and pin will be managed here, not in the caller.
*
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -3273,30 +3281,21 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid)
* generated by another transaction).
*/
TM_Result
-heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
- CommandId cid, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes)
+heap_update(Relation relation, HeapTupleData *oldtup,
+ HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait,
+ TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer,
+ Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs,
+ Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs,
+ Bitmapset *mix_attrs, Buffer *vmbuffer,
+ bool rep_id_key_required, TU_UpdateIndexes *update_indexes)
{
TM_Result result;
TransactionId xid = GetCurrentTransactionId();
- Bitmapset *hot_attrs;
- Bitmapset *sum_attrs;
- Bitmapset *key_attrs;
- Bitmapset *id_attrs;
- Bitmapset *interesting_attrs;
- Bitmapset *modified_attrs;
- ItemId lp;
- HeapTupleData oldtup;
HeapTuple heaptup;
HeapTuple old_key_tuple = NULL;
bool old_key_copied = false;
- Page page;
- BlockNumber block;
MultiXactStatus mxact_status;
- Buffer buffer,
- newbuf,
- vmbuffer = InvalidBuffer,
+ Buffer newbuf,
vmbuffer_new = InvalidBuffer;
bool need_toast;
Size newtupsize,
@@ -3310,7 +3309,6 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
bool all_visible_cleared_new = false;
bool checked_lockers;
bool locker_remains;
- bool id_has_external = false;
TransactionId xmax_new_tuple,
xmax_old_tuple;
uint16 infomask_old_tuple,
@@ -3318,144 +3316,13 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
infomask_new_tuple,
infomask2_new_tuple;
- Assert(ItemPointerIsValid(otid));
-
- /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
- Assert(HeapTupleHeaderGetNatts(newtup->t_data) <=
- RelationGetNumberOfAttributes(relation));
-
+ Assert(BufferIsLockedByMe(buffer));
+ Assert(ItemIdIsNormal(lp));
AssertHasSnapshotForToast(relation);
- /*
- * Forbid this during a parallel operation, lest it allocate a combo CID.
- * Other workers might need that combo CID for visibility checks, and we
- * have no provision for broadcasting it to them.
- */
- if (IsInParallelMode())
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
- errmsg("cannot update tuples during a parallel operation")));
-
-#ifdef USE_ASSERT_CHECKING
- check_lock_if_inplace_updateable_rel(relation, otid, newtup);
-#endif
-
- /*
- * Fetch the list of attributes to be checked for various operations.
- *
- * For HOT considerations, this is wasted effort if we fail to update or
- * have to put the new tuple on a different page. But we must compute the
- * list before obtaining buffer lock --- in the worst case, if we are
- * doing an update on one of the relevant system catalogs, we could
- * deadlock if we try to fetch the list later. In any case, the relcache
- * caches the data so this is usually pretty cheap.
- *
- * We also need columns used by the replica identity and columns that are
- * considered the "key" of rows in the table.
- *
- * Note that we get copies of each bitmap, so we need not worry about
- * relcache flush happening midway through.
- */
- hot_attrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_HOT_BLOCKING);
- sum_attrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_SUMMARIZED);
- key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
- id_attrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_IDENTITY_KEY);
- interesting_attrs = NULL;
- interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
- interesting_attrs = bms_add_members(interesting_attrs, sum_attrs);
- interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
- interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
-
- block = ItemPointerGetBlockNumber(otid);
- INJECTION_POINT("heap_update-before-pin", NULL);
- buffer = ReadBuffer(relation, block);
- page = BufferGetPage(buffer);
-
- /*
- * Before locking the buffer, pin the visibility map page if it appears to
- * be necessary. Since we haven't got the lock yet, someone else might be
- * in the middle of changing this, so we'll need to recheck after we have
- * the lock.
- */
- if (PageIsAllVisible(page))
- visibilitymap_pin(relation, block, &vmbuffer);
-
- LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
-
- lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
-
- /*
- * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
- * we see LP_NORMAL here. When the otid origin is a syscache, we may have
- * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
- * of which indicates concurrent pruning.
- *
- * Failing with TM_Updated would be most accurate. However, unlike other
- * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
- * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
- * does matter to SQL statements UPDATE and MERGE, those SQL statements
- * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
- * TM_Updated and TM_Deleted affects only the wording of error messages.
- * Settle on TM_Deleted, for two reasons. First, it avoids complicating
- * the specification of when tmfd->ctid is valid. Second, it creates
- * error log evidence that we took this branch.
- *
- * Since it's possible to see LP_UNUSED at otid, it's also possible to see
- * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
- * unrelated row, we'll fail with "duplicate key value violates unique".
- * XXX if otid is the live, newer version of the newtup row, we'll discard
- * changes originating in versions of this catalog row after the version
- * the caller got from syscache. See syscache-update-pruned.spec.
- */
- if (!ItemIdIsNormal(lp))
- {
- Assert(RelationSupportsSysCache(RelationGetRelid(relation)));
-
- UnlockReleaseBuffer(buffer);
- Assert(!have_tuple_lock);
- if (vmbuffer != InvalidBuffer)
- ReleaseBuffer(vmbuffer);
- tmfd->ctid = *otid;
- tmfd->xmax = InvalidTransactionId;
- tmfd->cmax = InvalidCommandId;
- *update_indexes = TU_None;
-
- bms_free(hot_attrs);
- bms_free(sum_attrs);
- bms_free(key_attrs);
- bms_free(id_attrs);
- /* modified_attrs not yet initialized */
- bms_free(interesting_attrs);
- return TM_Deleted;
- }
-
- /*
- * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
- * properly.
- */
- oldtup.t_tableOid = RelationGetRelid(relation);
- oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
- oldtup.t_len = ItemIdGetLength(lp);
- oldtup.t_self = *otid;
-
- /* the new tuple is ready, except for this: */
+ /* The new tuple is ready, except for this */
newtup->t_tableOid = RelationGetRelid(relation);
- /*
- * Determine columns modified by the update. Additionally, identify
- * whether any of the unmodified replica identity key attributes in the
- * old tuple is externally stored or not. This is required because for
- * such attributes the flattened value won't be WAL logged as part of the
- * new tuple so we must include it as part of the old_key_tuple. See
- * ExtractReplicaIdentity.
- */
- modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
- id_attrs, &oldtup,
- newtup, &id_has_external);
-
/*
* If we're not updating any "key" column, we can grab a weaker lock type.
* This allows for more concurrency when we are running simultaneously
@@ -3467,7 +3334,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* is updates that don't manipulate key columns, not those that
* serendipitously arrive at the same key values.
*/
- if (!bms_overlap(modified_attrs, key_attrs))
+ if (!bms_overlap(mix_attrs, pk_attrs))
{
*lockmode = LockTupleNoKeyExclusive;
mxact_status = MultiXactStatusNoKeyUpdate;
@@ -3491,17 +3358,10 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
key_intact = false;
}
- /*
- * Note: beyond this point, use oldtup not otid to refer to old tuple.
- * otid may very well point at newtup->t_self, which we will overwrite
- * with the new tuple's location, so there's great risk of confusion if we
- * use otid anymore.
- */
-
l2:
checked_lockers = false;
locker_remains = false;
- result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
+ result = HeapTupleSatisfiesUpdate(oldtup, cid, buffer);
/* see below about the "no wait" case */
Assert(result != TM_BeingModified || wait);
@@ -3533,8 +3393,8 @@ l2:
*/
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
- infomask = oldtup.t_data->t_infomask;
+ xwait = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+ infomask = oldtup->t_data->t_infomask;
/*
* Now we have to do something about the existing locker. If it's a
@@ -3574,13 +3434,12 @@ l2:
* requesting a lock and already have one; avoids deadlock).
*/
if (!current_is_member)
- heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
+ heap_acquire_tuplock(relation, &oldtup->t_self, *lockmode,
LockWaitBlock, &have_tuple_lock);
/* wait for multixact */
MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
- relation, &oldtup.t_self, XLTW_Update,
- &remain);
+ relation, &oldtup->t_self, XLTW_Update, &remain);
checked_lockers = true;
locker_remains = remain != 0;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3590,9 +3449,9 @@ l2:
* could update this tuple before we get to this point. Check
* for xmax change, and start over if so.
*/
- if (xmax_infomask_changed(oldtup.t_data->t_infomask,
+ if (xmax_infomask_changed(oldtup->t_data->t_infomask,
infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup->t_data),
xwait))
goto l2;
}
@@ -3617,8 +3476,8 @@ l2:
* before this one, which are important to keep in case this
* subxact aborts.
*/
- if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
- update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
+ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup->t_data->t_infomask))
+ update_xact = HeapTupleGetUpdateXid(oldtup->t_data);
else
update_xact = InvalidTransactionId;
@@ -3659,9 +3518,9 @@ l2:
* lock.
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
+ heap_acquire_tuplock(relation, &oldtup->t_self, *lockmode,
LockWaitBlock, &have_tuple_lock);
- XactLockTableWait(xwait, relation, &oldtup.t_self,
+ XactLockTableWait(xwait, relation, &oldtup->t_self,
XLTW_Update);
checked_lockers = true;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3671,20 +3530,20 @@ l2:
* other xact could update this tuple before we get to this point.
* Check for xmax change, and start over if so.
*/
- if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
+ if (xmax_infomask_changed(oldtup->t_data->t_infomask, infomask) ||
!TransactionIdEquals(xwait,
- HeapTupleHeaderGetRawXmax(oldtup.t_data)))
+ HeapTupleHeaderGetRawXmax(oldtup->t_data)))
goto l2;
/* Otherwise check if it committed or aborted */
- UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
- if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
+ UpdateXmaxHintBits(oldtup->t_data, buffer, xwait);
+ if (oldtup->t_data->t_infomask & HEAP_XMAX_INVALID)
can_continue = true;
}
if (can_continue)
result = TM_Ok;
- else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
+ else if (!ItemPointerEquals(&oldtup->t_self, &oldtup->t_data->t_ctid))
result = TM_Updated;
else
result = TM_Deleted;
@@ -3697,39 +3556,33 @@ l2:
result == TM_Updated ||
result == TM_Deleted ||
result == TM_BeingModified);
- Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
+ Assert(!(oldtup->t_data->t_infomask & HEAP_XMAX_INVALID));
Assert(result != TM_Updated ||
- !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
+ !ItemPointerEquals(&oldtup->t_self, &oldtup->t_data->t_ctid));
}
if (crosscheck != InvalidSnapshot && result == TM_Ok)
{
/* Perform additional check for transaction-snapshot mode RI updates */
- if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
+ if (!HeapTupleSatisfiesVisibility(oldtup, crosscheck, buffer))
result = TM_Updated;
}
if (result != TM_Ok)
{
- tmfd->ctid = oldtup.t_data->t_ctid;
- tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
+ tmfd->ctid = oldtup->t_data->t_ctid;
+ tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup->t_data);
if (result == TM_SelfModified)
- tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
+ tmfd->cmax = HeapTupleHeaderGetCmax(oldtup->t_data);
else
tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
if (have_tuple_lock)
- UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
- if (vmbuffer != InvalidBuffer)
- ReleaseBuffer(vmbuffer);
+ UnlockTupleTuplock(relation, &oldtup->t_self, *lockmode);
+ if (*vmbuffer != InvalidBuffer)
+ ReleaseBuffer(*vmbuffer);
*update_indexes = TU_None;
- bms_free(hot_attrs);
- bms_free(sum_attrs);
- bms_free(key_attrs);
- bms_free(id_attrs);
- bms_free(modified_attrs);
- bms_free(interesting_attrs);
return result;
}
@@ -3742,10 +3595,10 @@ l2:
* tuple has been locked or updated under us, but hopefully it won't
* happen very often.
*/
- if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
+ if (*vmbuffer == InvalidBuffer && PageIsAllVisible(page))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- visibilitymap_pin(relation, block, &vmbuffer);
+ visibilitymap_pin(relation, block, vmbuffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto l2;
}
@@ -3756,9 +3609,9 @@ l2:
* If the tuple we're updating is locked, we need to preserve the locking
* info in the old tuple's Xmax. Prepare a new Xmax value for this.
*/
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
- oldtup.t_data->t_infomask,
- oldtup.t_data->t_infomask2,
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup->t_data),
+ oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2,
xid, *lockmode, true,
&xmax_old_tuple, &infomask_old_tuple,
&infomask2_old_tuple);
@@ -3770,12 +3623,12 @@ l2:
* tuple. (In rare cases that might also be InvalidTransactionId and yet
* not have the HEAP_XMAX_INVALID bit set; that's fine.)
*/
- if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
- HEAP_LOCKED_UPGRADED(oldtup.t_data->t_infomask) ||
+ if ((oldtup->t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HEAP_LOCKED_UPGRADED(oldtup->t_data->t_infomask) ||
(checked_lockers && !locker_remains))
xmax_new_tuple = InvalidTransactionId;
else
- xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
+ xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup->t_data);
if (!TransactionIdIsValid(xmax_new_tuple))
{
@@ -3790,7 +3643,7 @@ l2:
* Note that since we're doing an update, the only possibility is that
* the lockers had FOR KEY SHARE lock.
*/
- if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
+ if (oldtup->t_data->t_infomask & HEAP_XMAX_IS_MULTI)
{
GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
&infomask2_new_tuple);
@@ -3818,7 +3671,7 @@ l2:
* Replace cid with a combo CID if necessary. Note that we already put
* the plain cid into the new tuple.
*/
- HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
+ HeapTupleHeaderAdjustCmax(oldtup->t_data, &cid, &iscombo);
/*
* If the toaster needs to be activated, OR if the new tuple will not fit
@@ -3835,12 +3688,12 @@ l2:
relation->rd_rel->relkind != RELKIND_MATVIEW)
{
/* toast table entries should never be recursively toasted */
- Assert(!HeapTupleHasExternal(&oldtup));
+ Assert(!HeapTupleHasExternal(oldtup));
Assert(!HeapTupleHasExternal(newtup));
need_toast = false;
}
else
- need_toast = (HeapTupleHasExternal(&oldtup) ||
+ need_toast = (HeapTupleHasExternal(oldtup) ||
HeapTupleHasExternal(newtup) ||
newtup->t_len > TOAST_TUPLE_THRESHOLD);
@@ -3873,9 +3726,9 @@ l2:
* updating, because the potentially created multixact would otherwise
* be wrong.
*/
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
- oldtup.t_data->t_infomask,
- oldtup.t_data->t_infomask2,
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup->t_data),
+ oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2,
xid, *lockmode, false,
&xmax_lock_old_tuple, &infomask_lock_old_tuple,
&infomask2_lock_old_tuple);
@@ -3885,18 +3738,18 @@ l2:
START_CRIT_SECTION();
/* Clear obsolete visibility flags ... */
- oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
- oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
- HeapTupleClearHotUpdated(&oldtup);
+ oldtup->t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ oldtup->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ HeapTupleClearHotUpdated(oldtup);
/* ... and store info about transaction updating this tuple */
Assert(TransactionIdIsValid(xmax_lock_old_tuple));
- HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
- oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
- oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
- HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
+ HeapTupleHeaderSetXmax(oldtup->t_data, xmax_lock_old_tuple);
+ oldtup->t_data->t_infomask |= infomask_lock_old_tuple;
+ oldtup->t_data->t_infomask2 |= infomask2_lock_old_tuple;
+ HeapTupleHeaderSetCmax(oldtup->t_data, cid, iscombo);
/* temporarily make it look not-updated, but locked */
- oldtup.t_data->t_ctid = oldtup.t_self;
+ oldtup->t_data->t_ctid = oldtup->t_self;
/*
* Clear all-frozen bit on visibility map if needed. We could
@@ -3905,7 +3758,7 @@ l2:
* worthwhile.
*/
if (PageIsAllVisible(page) &&
- visibilitymap_clear(relation, block, vmbuffer,
+ visibilitymap_clear(relation, block, *vmbuffer,
VISIBILITYMAP_ALL_FROZEN))
cleared_all_frozen = true;
@@ -3919,10 +3772,10 @@ l2:
XLogBeginInsert();
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
- xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
+ xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup->t_self);
xlrec.xmax = xmax_lock_old_tuple;
- xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
- oldtup.t_data->t_infomask2);
+ xlrec.infobits_set = compute_infobits(oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2);
xlrec.flags =
cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
XLogRegisterData(&xlrec, SizeOfHeapLock);
@@ -3944,7 +3797,7 @@ l2:
if (need_toast)
{
/* Note we always use WAL and FSM during updates */
- heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
+ heaptup = heap_toast_insert_or_update(relation, newtup, oldtup, 0);
newtupsize = MAXALIGN(heaptup->t_len);
}
else
@@ -3980,20 +3833,20 @@ l2:
/* It doesn't fit, must use RelationGetBufferForTuple. */
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
buffer, 0, NULL,
- &vmbuffer_new, &vmbuffer,
+ &vmbuffer_new, vmbuffer,
0);
/* We're all done. */
break;
}
/* Acquire VM page pin if needed and we don't have it. */
- if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
- visibilitymap_pin(relation, block, &vmbuffer);
+ if (*vmbuffer == InvalidBuffer && PageIsAllVisible(page))
+ visibilitymap_pin(relation, block, vmbuffer);
/* Re-acquire the lock on the old tuple's page. */
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* Re-check using the up-to-date free space */
pagefree = PageGetHeapFreeSpace(page);
if (newtupsize > pagefree ||
- (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
+ (*vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
{
/*
* Rats, it doesn't fit anymore, or somebody just now set the
@@ -4031,7 +3884,7 @@ l2:
* will include checking the relation level, there is no benefit to a
* separate check for the new tuple.
*/
- CheckForSerializableConflictIn(relation, &oldtup.t_self,
+ CheckForSerializableConflictIn(relation, &oldtup->t_self,
BufferGetBlockNumber(buffer));
/*
@@ -4039,7 +3892,6 @@ l2:
* has enough space for the new tuple. If they are the same buffer, only
* one pin is held.
*/
-
if (newbuf == buffer)
{
/*
@@ -4047,7 +3899,7 @@ l2:
* to do a HOT update. Check if any of the index columns have been
* changed.
*/
- if (!bms_overlap(modified_attrs, hot_attrs))
+ if (!bms_overlap(mix_attrs, hot_attrs))
{
use_hot_update = true;
@@ -4058,7 +3910,7 @@ l2:
* indexes if the columns were updated, or we may fail to detect
* e.g. value bound changes in BRIN minmax indexes.
*/
- if (bms_overlap(modified_attrs, sum_attrs))
+ if (bms_overlap(mix_attrs, sum_attrs))
summarized_update = true;
}
}
@@ -4075,10 +3927,8 @@ l2:
* logged. Pass old key required as true only if the replica identity key
* columns are modified or it has external data.
*/
- old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
- bms_overlap(modified_attrs, id_attrs) ||
- id_has_external,
- &old_key_copied);
+ old_key_tuple = ExtractReplicaIdentity(relation, oldtup, rid_attrs,
+ rep_id_key_required, &old_key_copied);
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -4100,7 +3950,7 @@ l2:
if (use_hot_update)
{
/* Mark the old tuple as HOT-updated */
- HeapTupleSetHotUpdated(&oldtup);
+ HeapTupleSetHotUpdated(oldtup);
/* And mark the new tuple as heap-only */
HeapTupleSetHeapOnly(heaptup);
/* Mark the caller's copy too, in case different from heaptup */
@@ -4109,7 +3959,7 @@ l2:
else
{
/* Make sure tuples are correctly marked as not-HOT */
- HeapTupleClearHotUpdated(&oldtup);
+ HeapTupleClearHotUpdated(oldtup);
HeapTupleClearHeapOnly(heaptup);
HeapTupleClearHeapOnly(newtup);
}
@@ -4118,17 +3968,17 @@ l2:
/* Clear obsolete visibility flags, possibly set by ourselves above... */
- oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
- oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ oldtup->t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ oldtup->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
/* ... and store info about transaction updating this tuple */
Assert(TransactionIdIsValid(xmax_old_tuple));
- HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
- oldtup.t_data->t_infomask |= infomask_old_tuple;
- oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
- HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
+ HeapTupleHeaderSetXmax(oldtup->t_data, xmax_old_tuple);
+ oldtup->t_data->t_infomask |= infomask_old_tuple;
+ oldtup->t_data->t_infomask2 |= infomask2_old_tuple;
+ HeapTupleHeaderSetCmax(oldtup->t_data, cid, iscombo);
/* record address of new tuple in t_ctid of old one */
- oldtup.t_data->t_ctid = heaptup->t_self;
+ oldtup->t_data->t_ctid = heaptup->t_self;
/* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
if (PageIsAllVisible(BufferGetPage(buffer)))
@@ -4136,7 +3986,7 @@ l2:
all_visible_cleared = true;
PageClearAllVisible(BufferGetPage(buffer));
visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
- vmbuffer, VISIBILITYMAP_VALID_BITS);
+ *vmbuffer, VISIBILITYMAP_VALID_BITS);
}
if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
{
@@ -4161,12 +4011,12 @@ l2:
*/
if (RelationIsAccessibleInLogicalDecoding(relation))
{
- log_heap_new_cid(relation, &oldtup);
+ log_heap_new_cid(relation, oldtup);
log_heap_new_cid(relation, heaptup);
}
recptr = log_heap_update(relation, buffer,
- newbuf, &oldtup, heaptup,
+ newbuf, oldtup, heaptup,
old_key_tuple,
all_visible_cleared,
all_visible_cleared_new);
@@ -4191,7 +4041,7 @@ l2:
* both tuple versions in one call to inval.c so we can avoid redundant
* sinval messages.)
*/
- CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
+ CacheInvalidateHeapTuple(relation, oldtup, heaptup);
/* Now we can release the buffer(s) */
if (newbuf != buffer)
@@ -4199,14 +4049,14 @@ l2:
ReleaseBuffer(buffer);
if (BufferIsValid(vmbuffer_new))
ReleaseBuffer(vmbuffer_new);
- if (BufferIsValid(vmbuffer))
- ReleaseBuffer(vmbuffer);
+ if (BufferIsValid(*vmbuffer))
+ ReleaseBuffer(*vmbuffer);
/*
* Release the lmgr tuple lock, if we had it.
*/
if (have_tuple_lock)
- UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
+ UnlockTupleTuplock(relation, &oldtup->t_self, *lockmode);
pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
@@ -4239,13 +4089,6 @@ l2:
if (old_key_tuple != NULL && old_key_copied)
heap_freetuple(old_key_tuple);
- bms_free(hot_attrs);
- bms_free(sum_attrs);
- bms_free(key_attrs);
- bms_free(id_attrs);
- bms_free(modified_attrs);
- bms_free(interesting_attrs);
-
return TM_Ok;
}
@@ -4254,7 +4097,7 @@ l2:
* Confirm adequate lock held during heap_update(), per rules from
* README.tuplock section "Locking to write inplace-updated tables".
*/
-static void
+void
check_lock_if_inplace_updateable_rel(Relation relation,
const ItemPointerData *otid,
HeapTuple newtup)
@@ -4426,7 +4269,7 @@ heap_attr_equals(TupleDesc tupdesc, int attrnum, Datum value1, Datum value2,
* listed as interesting) of the old tuple is a member of external_cols and is
* stored externally.
*/
-static Bitmapset *
+Bitmapset *
HeapDetermineColumnsInfo(Relation relation,
Bitmapset *interesting_cols,
Bitmapset *external_cols,
@@ -4509,25 +4352,175 @@ HeapDetermineColumnsInfo(Relation relation,
}
/*
- * simple_heap_update - replace a tuple
- *
- * This routine may be used to update a tuple when concurrent updates of
- * the target tuple are not expected (for example, because we have a lock
- * on the relation associated with the tuple). Any failure is reported
- * via ereport().
+ * This routine may be used to update a tuple when concurrent updates of the
+ * target tuple are not expected (for example, because we have a lock on the
+ * relation associated with the tuple). Any failure is reported via ereport().
*/
void
-simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup,
+simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tuple,
TU_UpdateIndexes *update_indexes)
{
TM_Result result;
TM_FailureData tmfd;
LockTupleMode lockmode;
+ Buffer buffer;
+ Buffer vmbuffer = InvalidBuffer;
+ Page page;
+ BlockNumber block;
+ Bitmapset *hot_attrs,
+ *sum_attrs,
+ *pk_attrs,
+ *rid_attrs,
+ *mix_attrs,
+ *idx_attrs;
+ ItemId lp;
+ HeapTupleData oldtup;
+ bool rep_id_key_required = false;
+
+ Assert(ItemPointerIsValid(otid));
+
+ /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
+ Assert(HeapTupleHeaderGetNatts(tuple->t_data) <=
+ RelationGetNumberOfAttributes(relation));
+
+ /*
+ * Forbid this during a parallel operation, lest it allocate a combo CID.
+ * Other workers might need that combo CID for visibility checks, and we
+ * have no provision for broadcasting it to them.
+ */
+ if (IsInParallelMode())
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot update tuples during a parallel operation")));
+
+#ifdef USE_ASSERT_CHECKING
+ check_lock_if_inplace_updateable_rel(relation, otid, tuple);
+#endif
+
+ /*
+ * Fetch the list of attributes to be checked for various operations.
+ *
+ * For HOT considerations, this is wasted effort if we fail to update or
+ * have to put the new tuple on a different page. But we must compute the
+ * list before obtaining buffer lock --- in the worst case, if we are
+ * doing an update on one of the relevant system catalogs, we could
+ * deadlock if we try to fetch the list later. In any case, the relcache
+ * caches the data so this is usually pretty cheap.
+ *
+ * We also need columns used by the replica identity and columns that are
+ * considered the "key" of rows in the table.
+ *
+ * Note that we get copies of each bitmap, so we need not worry about
+ * relcache flush happening midway through.
+ */
+ hot_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_HOT_BLOCKING);
+ sum_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_SUMMARIZED);
+ pk_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
+ rid_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+ idx_attrs = bms_copy(hot_attrs);
+ idx_attrs = bms_add_members(idx_attrs, sum_attrs);
+ idx_attrs = bms_add_members(idx_attrs, pk_attrs);
+ idx_attrs = bms_add_members(idx_attrs, rid_attrs);
+
+ block = ItemPointerGetBlockNumber(otid);
+ INJECTION_POINT("heap_update-before-pin", NULL);
+ buffer = ReadBuffer(relation, block);
+ page = BufferGetPage(buffer);
+
+ /*
+ * Before locking the buffer, pin the visibility map page if it appears to
+ * be necessary. Since we haven't got the lock yet, someone else might be
+ * in the middle of changing this, so we'll need to recheck after we have
+ * the lock.
+ */
+ if (PageIsAllVisible(page))
+ visibilitymap_pin(relation, block, &vmbuffer);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
+
+ /*
+ * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
+ * we see LP_NORMAL here. When the otid origin is a syscache, we may have
+ * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
+ * of which indicates concurrent pruning.
+ *
+ * Failing with TM_Updated would be most accurate. However, unlike other
+ * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
+ * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
+ * does matter to SQL statements UPDATE and MERGE, those SQL statements
+ * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
+ * TM_Updated and TM_Deleted affects only the wording of error messages.
+ * Settle on TM_Deleted, for two reasons. First, it avoids complicating
+ * the specification of when tmfd->ctid is valid. Second, it creates
+ * error log evidence that we took this branch.
+ *
+ * Since it's possible to see LP_UNUSED at otid, it's also possible to see
+ * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
+ * unrelated row, we'll fail with "duplicate key value violates unique".
+ * XXX if otid is the live, newer version of the newtup row, we'll discard
+ * changes originating in versions of this catalog row after the version
+ * the caller got from syscache. See syscache-update-pruned.spec.
+ */
+ if (!ItemIdIsNormal(lp))
+ {
+ Assert(RelationSupportsSysCache(RelationGetRelid(relation)));
+
+ UnlockReleaseBuffer(buffer);
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+ *update_indexes = TU_None;
+
+ bms_free(hot_attrs);
+ bms_free(sum_attrs);
+ bms_free(pk_attrs);
+ bms_free(rid_attrs);
+ bms_free(idx_attrs);
+ /* mix_attrs not yet initialized */
+
+ elog(ERROR, "tuple concurrently deleted");
+
+ return;
+ }
+
+ /*
+ * Partially construct the oldtup for HeapDetermineColumnsInfo to work and
+ * then pass that on to heap_update.
+ */
+ oldtup.t_tableOid = RelationGetRelid(relation);
+ oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ oldtup.t_len = ItemIdGetLength(lp);
+ oldtup.t_self = *otid;
+
+ mix_attrs = HeapDetermineColumnsInfo(relation, idx_attrs, rid_attrs,
+ &oldtup, tuple, &rep_id_key_required);
+
+ /*
+ * We'll need to WAL log the replica identity attributes if either they
+ * overlap with the modified indexed attributes or, as we've checked for
+ * just now in HeapDetermineColumnsInfo, they were unmodified external
+ * indexed attributes.
+ */
+ rep_id_key_required = rep_id_key_required || bms_overlap(mix_attrs, rid_attrs);
+
+ result = heap_update(relation, &oldtup, tuple, GetCurrentCommandId(true),
+ InvalidSnapshot, true /* wait for commit */ , &tmfd, &lockmode,
+ buffer, page, block, lp, hot_attrs, sum_attrs, pk_attrs,
+ rid_attrs, mix_attrs, &vmbuffer, rep_id_key_required,
+ update_indexes);
+
+ bms_free(hot_attrs);
+ bms_free(sum_attrs);
+ bms_free(pk_attrs);
+ bms_free(rid_attrs);
+ bms_free(mix_attrs);
+ bms_free(idx_attrs);
- result = heap_update(relation, otid, tup,
- GetCurrentCommandId(true), InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
switch (result)
{
case TM_SelfModified:
@@ -9183,12 +9176,11 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
* the same tuple that was passed in.
*/
static HeapTuple
-ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
- bool *copy)
+ExtractReplicaIdentity(Relation relation, HeapTuple tp, Bitmapset *rid_attrs,
+ bool key_required, bool *copy)
{
TupleDesc desc = RelationGetDescr(relation);
char replident = relation->rd_rel->relreplident;
- Bitmapset *idattrs;
HeapTuple key_tuple;
bool nulls[MaxHeapAttributeNumber];
Datum values[MaxHeapAttributeNumber];
@@ -9219,17 +9211,13 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
if (!key_required)
return NULL;
- /* find out the replica identity columns */
- idattrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_IDENTITY_KEY);
-
/*
* If there's no defined replica identity columns, treat as !key_required.
* (This case should not be reachable from heap_update, since that should
* calculate key_required accurately. But heap_delete just passes
* constant true for key_required, so we can hit this case in deletes.)
*/
- if (bms_is_empty(idattrs))
+ if (bms_is_empty(rid_attrs))
return NULL;
/*
@@ -9242,7 +9230,7 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
for (int i = 0; i < desc->natts; i++)
{
if (bms_is_member(i + 1 - FirstLowInvalidHeapAttributeNumber,
- idattrs))
+ rid_attrs))
Assert(!nulls[i]);
else
nulls[i] = true;
@@ -9251,8 +9239,6 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
key_tuple = heap_form_tuple(desc, values, nulls);
*copy = true;
- bms_free(idattrs);
-
/*
* If the tuple, which by here only contains indexed columns, still has
* toasted columns, force them to be inlined. This is somewhat unlikely
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 09a456e9966..7d8c80d3ff7 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -44,6 +44,7 @@
#include "storage/procarray.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
+#include "utils/injection_point.h"
#include "utils/rel.h"
static void reform_and_rewrite_tuple(HeapTuple tuple,
@@ -312,23 +313,133 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
}
-
static TM_Result
heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd,
LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
{
+ bool rep_id_key_required = false;
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+ HeapTupleData oldtup;
+ Buffer buffer;
+ Buffer vmbuffer = InvalidBuffer;
+ Page page;
+ BlockNumber block;
+ ItemId lp;
+ Bitmapset *hot_attrs,
+ *sum_attrs,
+ *pk_attrs,
+ *rid_attrs,
+ *mix_attrs,
+ *idx_attrs;
TM_Result result;
+ Assert(ItemPointerIsValid(otid));
+
+ /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
+ Assert(HeapTupleHeaderGetNatts(tuple->t_data) <=
+ RelationGetNumberOfAttributes(relation));
+
+ /*
+ * Forbid this during a parallel operation, lest it allocate a combo CID.
+ * Other workers might need that combo CID for visibility checks, and we
+ * have no provision for broadcasting it to them.
+ */
+ if (IsInParallelMode())
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot update tuples during a parallel operation")));
+
+#ifdef USE_ASSERT_CHECKING
+ check_lock_if_inplace_updateable_rel(relation, otid, tuple);
+#endif
+
+ /*
+ * Fetch the list of attributes to be checked for various operations.
+ *
+ * For HOT considerations, this is wasted effort if we fail to update or
+ * have to put the new tuple on a different page. But we must compute the
+ * list before obtaining buffer lock --- in the worst case, if we are
+ * doing an update on one of the relevant system catalogs, we could
+ * deadlock if we try to fetch the list later. In any case, the relcache
+ * caches the data so this is usually pretty cheap.
+ *
+ * We also need columns used by the replica identity and columns that are
+ * considered the "key" of rows in the table.
+ *
+ * Note that we get copies of each bitmap, so we need not worry about
+ * relcache flush happening midway through.
+ */
+ hot_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_HOT_BLOCKING);
+ sum_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_SUMMARIZED);
+ pk_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
+ rid_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+ idx_attrs = bms_copy(hot_attrs);
+ idx_attrs = bms_add_members(idx_attrs, sum_attrs);
+ idx_attrs = bms_add_members(idx_attrs, pk_attrs);
+ idx_attrs = bms_add_members(idx_attrs, rid_attrs);
+
+ block = ItemPointerGetBlockNumber(otid);
+ INJECTION_POINT("heap_update-before-pin", NULL);
+ buffer = ReadBuffer(relation, block);
+ page = BufferGetPage(buffer);
+
+ /*
+ * Before locking the buffer, pin the visibility map page if it appears to
+ * be necessary. Since we haven't got the lock yet, someone else might be
+ * in the middle of changing this, so we'll need to recheck after we have
+ * the lock.
+ */
+ if (PageIsAllVisible(page))
+ visibilitymap_pin(relation, block, &vmbuffer);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
+
+ Assert(ItemIdIsNormal(lp));
+
+ /*
+ * Partially construct the oldtup for HeapDetermineColumnsInfo to work and
+ * then pass that on to heap_update.
+ */
+ oldtup.t_tableOid = RelationGetRelid(relation);
+ oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ oldtup.t_len = ItemIdGetLength(lp);
+ oldtup.t_self = *otid;
+
+ mix_attrs = HeapDetermineColumnsInfo(relation, idx_attrs, rid_attrs,
+ &oldtup, tuple, &rep_id_key_required);
+
+ /*
+ * We'll need to WAL log the replica identity attributes if either they
+ * overlap with the modified indexed attributes or, as we've checked for
+ * just now in HeapDetermineColumnsInfo, they were unmodified external
+ * indexed attributes.
+ */
+ rep_id_key_required = rep_id_key_required || bms_overlap(mix_attrs, rid_attrs);
+
/* Update the tuple with table oid */
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
- result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
- tmfd, lockmode, update_indexes);
+ result = heap_update(relation, &oldtup, tuple, cid, crosscheck, wait, tmfd, lockmode,
+ buffer, page, block, lp, hot_attrs, sum_attrs, pk_attrs,
+ rid_attrs, mix_attrs, &vmbuffer, rep_id_key_required, update_indexes);
+
+ bms_free(hot_attrs);
+ bms_free(sum_attrs);
+ bms_free(pk_attrs);
+ bms_free(rid_attrs);
+ bms_free(mix_attrs);
+ bms_free(idx_attrs);
+
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
/*
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index ce48fac42ba..41193d5b3d2 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -364,11 +364,13 @@ extern TM_Result heap_delete(Relation relation, const ItemPointerData *tid,
TM_FailureData *tmfd, bool changingPart);
extern void heap_finish_speculative(Relation relation, const ItemPointerData *tid);
extern void heap_abort_speculative(Relation relation, const ItemPointerData *tid);
-extern TM_Result heap_update(Relation relation, const ItemPointerData *otid,
- HeapTuple newtup,
- CommandId cid, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes);
+extern TM_Result heap_update(Relation relation, HeapTupleData *oldtup,
+ HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait,
+ TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer,
+ Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs,
+ Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs,
+ Bitmapset *mix_attrs, Buffer *vmbuffer,
+ bool rep_id_key_required, TU_UpdateIndexes *update_indexes);
extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
bool follow_updates,
@@ -430,6 +432,18 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
OffsetNumber *dead, int ndead,
OffsetNumber *unused, int nunused);
+/* in heap/heapam.c */
+extern Bitmapset *HeapDetermineColumnsInfo(Relation relation,
+ Bitmapset *interesting_cols,
+ Bitmapset *external_cols,
+ HeapTuple oldtup, HeapTuple newtup,
+ bool *has_external);
+#ifdef USE_ASSERT_CHECKING
+extern void check_lock_if_inplace_updateable_rel(Relation relation,
+ const ItemPointerData *otid,
+ HeapTuple newtup);
+#endif
+
/* in heap/vacuumlazy.c */
extern void heap_vacuum_rel(Relation rel,
const VacuumParams params, BufferAccessStrategy bstrategy);
--
2.51.2
[application/octet-stream] v27-0004-Identify-if-partial-indexes-are-impacted-by-an-u.patch (3.8K, 3-v27-0004-Identify-if-partial-indexes-are-impacted-by-an-u.patch)
download | inline diff:
From 99e6603d5e82b4e41c4fbb6923e49527f91ec376 Mon Sep 17 00:00:00 2001
From: Greg Burd <[email protected]>
Date: Fri, 5 Dec 2025 13:42:13 -0500
Subject: [PATCH v27 4/4] Identify if partial indexes are impacted by an
update.
The executor now determines which, if any, attributes that are indexed
are both modified and force new index tuples to be inserted ahead of
calling into the table AM update function. Prior to this commit the
test for partial indexes happened after table update, this changes that
to before so that in cases where the before and after tuples both lie
outside the predicate the attributes for the predicate are not included
in the "modified indexed attributes" bitmapset.
---
src/backend/executor/nodeModifyTable.c | 53 ++++++++++++++++++++++++--
1 file changed, 49 insertions(+), 4 deletions(-)
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 8fca5e09a26..5a2c95dc9a7 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -226,9 +226,11 @@ ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
Bitmapset *m_attrs = NULL; /* (possibly) modified indexed attrs */
Bitmapset *p_attrs = NULL; /* (possibly) modified predicate attrs */
Bitmapset *u_attrs = NULL; /* unmodified indexed attrs */
+ Bitmapset *pre_attrs = indexInfo->ii_PredicateAttrs;
bool has_am_compare = (amroutine->amcomparedatums != NULL);
bool supports_ios = (amroutine->amcanreturn != NULL);
bool is_partial = (indexInfo->ii_Predicate != NIL);
+ TupleTableSlot *save_scantuple;
ExprContext *econtext = GetPerTupleExprContext(estate);
int num_datums = supports_ios ?
indexInfo->ii_NumIndexAttrs : indexInfo->ii_NumIndexKeyAttrs;
@@ -237,9 +239,51 @@ ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
if (bms_is_subset(indexInfo->ii_IndexedAttrs, mix_attrs))
continue;
- /* Add partial index attributes */
- if (is_partial)
- p_attrs = bms_add_members(p_attrs, indexInfo->ii_PredicateAttrs);
+ /* Checking partial at this point isn't viable when we're serializable */
+ if (is_partial && IsolationIsSerializable())
+ {
+ p_attrs = bms_add_members(p_attrs, pre_attrs);
+ }
+ /* Check partial index predicate */
+ else if (is_partial)
+ {
+ ExprState *pstate;
+ bool old_qualifies,
+ new_qualifies;
+
+
+ if (!indexInfo->ii_CheckedPredicate)
+ pstate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+ else
+ pstate = indexInfo->ii_PredicateState;
+
+ save_scantuple = econtext->ecxt_scantuple;
+
+ econtext->ecxt_scantuple = old_tts;
+ old_qualifies = ExecQual(pstate, econtext);
+
+ econtext->ecxt_scantuple = new_tts;
+ new_qualifies = ExecQual(pstate, econtext);
+
+ econtext->ecxt_scantuple = save_scantuple;
+
+ indexInfo->ii_CheckedPredicate = true;
+ indexInfo->ii_PredicateState = pstate;
+ indexInfo->ii_PredicateSatisfied = new_qualifies;
+
+ /* Both outside predicate, index doesn't need update */
+ if (!old_qualifies && !new_qualifies)
+ continue;
+
+ /* A transition means we need to update the index */
+ if (old_qualifies != new_qualifies)
+ p_attrs = bms_copy(pre_attrs);
+
+ /*
+ * When both are within the predicate we must update this index,
+ * but only if one of the index key attributes changed.
+ */
+ }
/* Compare the index datums for equality */
for (int j = 0; j < num_datums; j++)
@@ -275,11 +319,12 @@ ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
*/
else if (rel_attrnum == 0)
{
- TupleTableSlot *save_scantuple = econtext->ecxt_scantuple;
Oid expr_type_oid;
Expr *expr = (Expr *) list_nth(indexInfo->ii_Expressions, nth_expr);
ExprState *state;
+ save_scantuple = econtext->ecxt_scantuple;
+
if (indexInfo->ii_ExpressionsState == NIL)
{
/* First time through, set up expression evaluation state */
--
2.51.2
[application/octet-stream] v27-0003-Replace-index_unchanged_by_update-with-ri_Change.patch (8.4K, 4-v27-0003-Replace-index_unchanged_by_update-with-ri_Change.patch)
download | inline diff:
From 9423421a695b4d560cf7bfc7e6f2dcd1c630084d Mon Sep 17 00:00:00 2001
From: Greg Burd <[email protected]>
Date: Fri, 31 Oct 2025 14:55:25 -0400
Subject: [PATCH v27 3/4] Replace index_unchanged_by_update() with
ri_ChangedIndexedCols
In execIndexing on updates we'd like to pass a hint to the indexing code
when the indexed attributes are unchanged. This commit replaces the now
redundant code in index_unchanged_by_update() with the same information
found earlier in ExecWhichIndexesRequireUpdates() and stashed in
ri_ChangedIndexedCols.
---
src/backend/catalog/toasting.c | 2 -
src/backend/executor/execIndexing.c | 156 +---------------------------
src/backend/nodes/makefuncs.c | 2 -
src/include/nodes/execnodes.h | 4 -
4 files changed, 1 insertion(+), 163 deletions(-)
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index ff8da5be5f8..5675c6f8ea9 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -304,8 +304,6 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
indexInfo->ii_Unique = true;
indexInfo->ii_NullsNotDistinct = false;
indexInfo->ii_ReadyForInserts = true;
- indexInfo->ii_CheckedUnchanged = false;
- indexInfo->ii_IndexUnchanged = false;
indexInfo->ii_Concurrent = false;
indexInfo->ii_BrokenHotChain = false;
indexInfo->ii_ParallelWorkers = 0;
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index 1275feffae9..b75e76401d2 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -143,11 +143,6 @@ static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
static bool index_recheck_constraint(Relation index, const Oid *constr_procs,
const Datum *existing_values, const bool *existing_isnull,
const Datum *new_values);
-static bool index_unchanged_by_update(ResultRelInfo *resultRelInfo,
- EState *estate, IndexInfo *indexInfo,
- Relation indexRelation);
-static bool index_expression_changed_walker(Node *node,
- Bitmapset *allUpdatedCols);
static void ExecWithoutOverlapsNotEmpty(Relation rel, NameData attname, Datum attval,
char typtype, Oid atttypid);
@@ -451,10 +446,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
* index. If we're being called as part of an UPDATE statement,
* consider if the 'indexUnchanged' = true hint should be passed.
*/
- indexUnchanged = update && index_unchanged_by_update(resultRelInfo,
- estate,
- indexInfo,
- indexRelation);
+ indexUnchanged = update && bms_is_empty(resultRelInfo->ri_ChangedIndexedCols);
satisfiesConstraint =
index_insert(indexRelation, /* index relation */
@@ -1014,152 +1006,6 @@ index_recheck_constraint(Relation index, const Oid *constr_procs,
return true;
}
-/*
- * Check if ExecInsertIndexTuples() should pass indexUnchanged hint.
- *
- * When the executor performs an UPDATE that requires a new round of index
- * tuples, determine if we should pass 'indexUnchanged' = true hint for one
- * single index.
- */
-static bool
-index_unchanged_by_update(ResultRelInfo *resultRelInfo, EState *estate,
- IndexInfo *indexInfo, Relation indexRelation)
-{
- Bitmapset *updatedCols;
- Bitmapset *extraUpdatedCols;
- Bitmapset *allUpdatedCols;
- bool hasexpression = false;
- List *idxExprs;
-
- /*
- * Check cache first
- */
- if (indexInfo->ii_CheckedUnchanged)
- return indexInfo->ii_IndexUnchanged;
- indexInfo->ii_CheckedUnchanged = true;
-
- /*
- * Check for indexed attribute overlap with updated columns.
- *
- * Only do this for key columns. A change to a non-key column within an
- * INCLUDE index should not be counted here. Non-key column values are
- * opaque payload state to the index AM, a little like an extra table TID.
- *
- * Note that row-level BEFORE triggers won't affect our behavior, since
- * they don't affect the updatedCols bitmaps generally. It doesn't seem
- * worth the trouble of checking which attributes were changed directly.
- */
- updatedCols = ExecGetUpdatedCols(resultRelInfo, estate);
- extraUpdatedCols = ExecGetExtraUpdatedCols(resultRelInfo, estate);
- for (int attr = 0; attr < indexInfo->ii_NumIndexKeyAttrs; attr++)
- {
- int keycol = indexInfo->ii_IndexAttrNumbers[attr];
-
- if (keycol <= 0)
- {
- /*
- * Skip expressions for now, but remember to deal with them later
- * on
- */
- hasexpression = true;
- continue;
- }
-
- if (bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
- updatedCols) ||
- bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
- extraUpdatedCols))
- {
- /* Changed key column -- don't hint for this index */
- indexInfo->ii_IndexUnchanged = false;
- return false;
- }
- }
-
- /*
- * When we get this far and index has no expressions, return true so that
- * index_insert() call will go on to pass 'indexUnchanged' = true hint.
- *
- * The _absence_ of an indexed key attribute that overlaps with updated
- * attributes (in addition to the total absence of indexed expressions)
- * shows that the index as a whole is logically unchanged by UPDATE.
- */
- if (!hasexpression)
- {
- indexInfo->ii_IndexUnchanged = true;
- return true;
- }
-
- /*
- * Need to pass only one bms to expression_tree_walker helper function.
- * Avoid allocating memory in common case where there are no extra cols.
- */
- if (!extraUpdatedCols)
- allUpdatedCols = updatedCols;
- else
- allUpdatedCols = bms_union(updatedCols, extraUpdatedCols);
-
- /*
- * We have to work slightly harder in the event of indexed expressions,
- * but the principle is the same as before: try to find columns (Vars,
- * actually) that overlap with known-updated columns.
- *
- * If we find any matching Vars, don't pass hint for index. Otherwise
- * pass hint.
- */
- idxExprs = RelationGetIndexExpressions(indexRelation);
- hasexpression = index_expression_changed_walker((Node *) idxExprs,
- allUpdatedCols);
- list_free(idxExprs);
- if (extraUpdatedCols)
- bms_free(allUpdatedCols);
-
- if (hasexpression)
- {
- indexInfo->ii_IndexUnchanged = false;
- return false;
- }
-
- /*
- * Deliberately don't consider index predicates. We should even give the
- * hint when result rel's "updated tuple" has no corresponding index
- * tuple, which is possible with a partial index (provided the usual
- * conditions are met).
- */
- indexInfo->ii_IndexUnchanged = true;
- return true;
-}
-
-/*
- * Indexed expression helper for index_unchanged_by_update().
- *
- * Returns true when Var that appears within allUpdatedCols located.
- */
-static bool
-index_expression_changed_walker(Node *node, Bitmapset *allUpdatedCols)
-{
- if (node == NULL)
- return false;
-
- if (IsA(node, Var))
- {
- Var *var = (Var *) node;
-
- if (bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
- allUpdatedCols))
- {
- /* Var was updated -- indicates that we should not hint */
- return true;
- }
-
- /* Still haven't found a reason to not pass the hint */
- return false;
- }
-
- return expression_tree_walker(node, index_expression_changed_walker,
- allUpdatedCols);
-}
-
/*
* ExecWithoutOverlapsNotEmpty - raise an error if the tuple has an empty
* range or multirange in the given attribute.
diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
index dd092bacad9..9d3a5b79d27 100644
--- a/src/backend/nodes/makefuncs.c
+++ b/src/backend/nodes/makefuncs.c
@@ -845,8 +845,6 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions,
n->ii_Unique = unique;
n->ii_NullsNotDistinct = nulls_not_distinct;
n->ii_ReadyForInserts = isready;
- n->ii_CheckedUnchanged = false;
- n->ii_IndexUnchanged = false;
n->ii_Concurrent = concurrent;
n->ii_Summarizing = summarizing;
n->ii_WithoutOverlaps = withoutoverlaps;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index d5af2f34d0f..8e583b1d9d3 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -216,10 +216,6 @@ typedef struct IndexInfo
bool ii_NullsNotDistinct;
/* is it valid for inserts? */
bool ii_ReadyForInserts;
- /* IndexUnchanged status determined yet? */
- bool ii_CheckedUnchanged;
- /* aminsert hint, cached for retail inserts */
- bool ii_IndexUnchanged;
/* are we doing a concurrent index build? */
bool ii_Concurrent;
/* did we detect any broken HOT chains? */
--
2.51.2
[application/octet-stream] v27-0002-Track-changed-indexed-columns-in-the-executor-du.patch (113.5K, 5-v27-0002-Track-changed-indexed-columns-in-the-executor-du.patch)
download | inline diff:
From 06c0a809cb636bb863a39c8db9dd11e036b27ed6 Mon Sep 17 00:00:00 2001
From: Greg Burd <[email protected]>
Date: Sun, 26 Oct 2025 10:49:25 -0400
Subject: [PATCH v27 2/4] Track changed indexed columns in the executor during
UPDATEs
Refactor executor update logic to determine which indexed columns have
actually changed during an UPDATE operation rather than leaving this up
to HeapDetermineColumnsInfo() in heap_update().
ExecWhichIndexesRequireUpdates() replaces HeapDeterminesColumnsInfo()
when invoked from the table AM API via heapam_tuple_update(). The
test for equality remains datumIsEqual() as before.
This change necessitated some logic changes in execReplication() as it
performs updates now must provide the set of attributes that are both
changed and referenced by indexes. Luckilly, this is available within
calls to slot_modify_data() where LogicalRepTupleData is processed and
has a record of updated attributes. In this case rather than using
ExecWhichIndexesRequireUpdates() we can preseve what slot_modify_data()
identifies as the modified set and then intersect that with the set of
indexes on the relation and get the correct set of modified indexed
attributes required on heap_update().
This commit also extends the role index AMs play determining if they
require an update. A new optional index AM API, amcomparedatums(), is
added to allow index access methods to provide custom logic for
comparing datums. Hash and Gin indexes now implement this function. When
not implemented the executor will compare TupleTableSlot datum for
equality using datumIsEqual() as before.
Because heap_update() now requires the caller to provide the modified
indexed columns simple_heap_update() has become a tad more complex. It
is only called from CatalogTupleUpdate() which either updates heap
tuples via their Form_XXX or by calling heap_modify_tuple(). In both
cases the caller does know the modified set of attributes, but sadly
those attributes are lost before being provided to simple_heap_update().
Due to that the "simple" path has to (for now) retain the
HeapDetermineColumnsInfo() logic in order for catalog updates to
potentially take the HOT path.
---
src/backend/access/brin/brin.c | 1 +
src/backend/access/gin/ginutil.c | 90 ++-
src/backend/access/hash/hash.c | 44 ++
src/backend/access/heap/heapam.c | 20 +-
src/backend/access/heap/heapam_handler.c | 76 +-
src/backend/access/nbtree/nbtree.c | 1 +
src/backend/access/table/tableam.c | 5 +-
src/backend/bootstrap/bootstrap.c | 8 +
src/backend/catalog/index.c | 57 ++
src/backend/catalog/indexing.c | 16 +-
src/backend/catalog/toasting.c | 4 +
src/backend/executor/execIndexing.c | 41 +-
src/backend/executor/execMain.c | 1 +
src/backend/executor/execReplication.c | 7 +
src/backend/executor/nodeModifyTable.c | 287 +++++++-
src/backend/nodes/bitmapset.c | 4 +
src/backend/nodes/makefuncs.c | 4 +
src/backend/replication/logical/worker.c | 70 +-
src/backend/utils/cache/relcache.c | 15 +
src/include/access/amapi.h | 28 +
src/include/access/gin.h | 3 +
src/include/access/heapam.h | 6 +-
src/include/access/nbtree.h | 4 +
src/include/access/tableam.h | 8 +-
src/include/catalog/index.h | 1 +
src/include/executor/executor.h | 9 +
src/include/nodes/execnodes.h | 20 +
src/include/utils/rel.h | 1 +
src/include/utils/relcache.h | 1 +
.../expected/insert-conflict-specconflict.out | 20 +
.../regress/expected/heap_hot_updates.out | 650 ++++++++++++++++++
src/test/regress/parallel_schedule | 6 +
src/test/regress/sql/heap_hot_updates.sql | 513 ++++++++++++++
src/tools/pgindent/typedefs.list | 1 +
34 files changed, 1948 insertions(+), 74 deletions(-)
create mode 100644 src/test/regress/expected/heap_hot_updates.out
create mode 100644 src/test/regress/sql/heap_hot_updates.sql
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 6887e421442..aa9fd110802 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -290,6 +290,7 @@ brinhandler(PG_FUNCTION_ARGS)
.amproperty = NULL,
.ambuildphasename = NULL,
.amvalidate = brinvalidate,
+ .amcomparedatums = NULL,
.amadjustmembers = NULL,
.ambeginscan = brinbeginscan,
.amrescan = brinrescan,
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index a546cac18d3..9f994feae5d 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -26,6 +26,7 @@
#include "storage/indexfsm.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
+#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/typcache.h"
@@ -78,6 +79,7 @@ ginhandler(PG_FUNCTION_ARGS)
.amproperty = NULL,
.ambuildphasename = ginbuildphasename,
.amvalidate = ginvalidate,
+ .amcomparedatums = gincomparedatums,
.amadjustmembers = ginadjustmembers,
.ambeginscan = ginbeginscan,
.amrescan = ginrescan,
@@ -478,13 +480,6 @@ cmpEntries(const void *a, const void *b, void *arg)
return res;
}
-
-/*
- * Extract the index key values from an indexable item
- *
- * The resulting key values are sorted, and any duplicates are removed.
- * This avoids generating redundant index entries.
- */
Datum *
ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
Datum value, bool isNull,
@@ -730,3 +725,84 @@ ginbuildphasename(int64 phasenum)
return NULL;
}
}
+
+/*
+ * gincomparedatums - Compare datums to determine if they produce identical keys
+ *
+ * This function extracts keys from both old_datum and new_datum using the
+ * opclass's extractValue function, then compares the extracted key arrays.
+ * Returns true if the key sets are identical (same keys, same counts).
+ *
+ * This enables HOT updates for GIN indexes when the indexed portions of a
+ * value haven't changed, even if the value itself has changed.
+ *
+ * Example: JSONB column with GIN index. If an update changes a non-indexed
+ * key in the JSONB document, the extracted keys are identical and we can
+ * do a HOT update.
+ */
+bool
+gincomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull)
+{
+ GinState ginstate;
+ Datum *old_keys;
+ Datum *new_keys;
+ GinNullCategory *old_categories;
+ GinNullCategory *new_categories;
+ int32 old_nkeys;
+ int32 new_nkeys;
+ MemoryContext tmpcontext;
+ MemoryContext oldcontext;
+ bool result = true;
+
+ /* Handle NULL cases */
+ if (old_isnull != new_isnull)
+ return false;
+ if (old_isnull)
+ return true;
+
+ /* Create temporary context for extraction work */
+ tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
+ "GIN datum comparison",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(tmpcontext);
+
+ initGinState(&ginstate, index);
+
+ /* Extract keys from both datums using existing GIN infrastructure */
+ old_keys = ginExtractEntries(&ginstate, attnum, old_datum, old_isnull,
+ &old_nkeys, &old_categories);
+ new_keys = ginExtractEntries(&ginstate, attnum, new_datum, new_isnull,
+ &new_nkeys, &new_categories);
+
+ /* Different number of keys, definitely different */
+ if (old_nkeys != new_nkeys)
+ {
+ result = false;
+ goto cleanup;
+ }
+
+ /*
+ * Compare the sorted key arrays element-by-element. Since both arrays are
+ * already sorted by ginExtractEntries, we can do a simple O(n)
+ * comparison.
+ */
+ for (int i = 0; i < old_nkeys; i++)
+ {
+ if (ginCompareEntries(&ginstate, attnum,
+ old_keys[i], old_categories[i],
+ new_keys[i], new_categories[i]) != 0)
+ {
+ result = false;
+ break;
+ }
+ }
+
+cleanup:
+ /* Clean up */
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(tmpcontext);
+
+ return result;
+}
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index e88ddb32a05..49a99998083 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -50,6 +50,10 @@ static void hashbuildCallback(Relation index,
void *state);
+static bool hashcomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
+
/*
* Hash handler function: return IndexAmRoutine with access method parameters
* and callbacks.
@@ -98,6 +102,7 @@ hashhandler(PG_FUNCTION_ARGS)
.amproperty = NULL,
.ambuildphasename = NULL,
.amvalidate = hashvalidate,
+ .amcomparedatums = hashcomparedatums,
.amadjustmembers = hashadjustmembers,
.ambeginscan = hashbeginscan,
.amrescan = hashrescan,
@@ -944,3 +949,42 @@ hashtranslatecmptype(CompareType cmptype, Oid opfamily)
return HTEqualStrategyNumber;
return InvalidStrategy;
}
+
+/*
+ * hashcomparedatums - Compare datums to determine if they produce identical keys
+ *
+ * Returns true if the hash values are identical (index doesn't need update).
+ */
+bool
+hashcomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull)
+{
+ uint32 old_hashkey;
+ uint32 new_hashkey;
+
+ /* If both are NULL, they're equal */
+ if (old_isnull && new_isnull)
+ return true;
+
+ /* If NULL status differs, they're not equal */
+ if (old_isnull != new_isnull)
+ return false;
+
+ /*
+ * _hash_datum2hashkey() is used because we know this can't be a cross
+ * type comparison.
+ */
+ old_hashkey = _hash_datum2hashkey(index, old_datum);
+ new_hashkey = _hash_datum2hashkey(index, new_datum);
+
+ /*
+ * If hash keys are identical, the index entry would be the same. Return
+ * true to indicate no index update needed.
+ *
+ * Note: Hash collisions are rare but possible. If hash(x) == hash(y) but
+ * x != y, the hash index still treats them identically, so we correctly
+ * return true.
+ */
+ return (old_hashkey == new_hashkey);
+}
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 2579f21e212..023d0595349 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -3281,12 +3281,12 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid)
* generated by another transaction).
*/
TM_Result
-heap_update(Relation relation, HeapTupleData *oldtup,
- HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer,
- Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs,
- Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs,
- Bitmapset *mix_attrs, Buffer *vmbuffer,
+heap_update(Relation relation, HeapTupleData *oldtup, HeapTuple newtup,
+ CommandId cid, Snapshot crosscheck, bool wait,
+ TM_FailureData *tmfd, LockTupleMode *lockmode,
+ Buffer buffer, Page page, BlockNumber block, ItemId lp,
+ Bitmapset *hot_attrs, Bitmapset *sum_attrs, Bitmapset *pk_attrs,
+ Bitmapset *rid_attrs, const Bitmapset *mix_attrs, Buffer *vmbuffer,
bool rep_id_key_required, TU_UpdateIndexes *update_indexes)
{
TM_Result result;
@@ -4355,8 +4355,9 @@ HeapDetermineColumnsInfo(Relation relation,
* This routine may be used to update a tuple when concurrent updates of the
* target tuple are not expected (for example, because we have a lock on the
* relation associated with the tuple). Any failure is reported via ereport().
+ * Returns the set of modified indexed attributes.
*/
-void
+Bitmapset *
simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tuple,
TU_UpdateIndexes *update_indexes)
{
@@ -4485,7 +4486,7 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
elog(ERROR, "tuple concurrently deleted");
- return;
+ return NULL;
}
/*
@@ -4518,7 +4519,6 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
bms_free(sum_attrs);
bms_free(pk_attrs);
bms_free(rid_attrs);
- bms_free(mix_attrs);
bms_free(idx_attrs);
switch (result)
@@ -4544,6 +4544,8 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
elog(ERROR, "unrecognized heap_update status: %u", result);
break;
}
+
+ return mix_attrs;
}
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 7d8c80d3ff7..d171247145c 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -315,9 +315,12 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
static TM_Result
heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
- CommandId cid, Snapshot snapshot, Snapshot crosscheck,
- bool wait, TM_FailureData *tmfd,
- LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
+ CommandId cid, Snapshot snapshot,
+ Snapshot crosscheck, bool wait,
+ TM_FailureData *tmfd,
+ LockTupleMode *lockmode,
+ const Bitmapset *mix_attrs,
+ TU_UpdateIndexes *update_indexes)
{
bool rep_id_key_required = false;
bool shouldFree = true;
@@ -332,7 +335,6 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
*sum_attrs,
*pk_attrs,
*rid_attrs,
- *mix_attrs,
*idx_attrs;
TM_Result result;
@@ -405,25 +407,66 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
Assert(ItemIdIsNormal(lp));
- /*
- * Partially construct the oldtup for HeapDetermineColumnsInfo to work and
- * then pass that on to heap_update.
- */
oldtup.t_tableOid = RelationGetRelid(relation);
oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
oldtup.t_len = ItemIdGetLength(lp);
oldtup.t_self = *otid;
- mix_attrs = HeapDetermineColumnsInfo(relation, idx_attrs, rid_attrs,
- &oldtup, tuple, &rep_id_key_required);
-
/*
- * We'll need to WAL log the replica identity attributes if either they
- * overlap with the modified indexed attributes or, as we've checked for
- * just now in HeapDetermineColumnsInfo, they were unmodified external
- * indexed attributes.
+ * We'll need to include the replica identity key when either the identity
+ * key attributes overlap with the modified index attributes or when the
+ * replica identity attributes are stored externally. This is required
+ * because for such attributes the flattened value won't be WAL logged as
+ * part of the new tuple so we must determine if we need to extract and
+ * include them as part of the old_key_tuple (see ExtractReplicaIdentity).
*/
- rep_id_key_required = rep_id_key_required || bms_overlap(mix_attrs, rid_attrs);
+ rep_id_key_required = bms_overlap(mix_attrs, rid_attrs);
+ if (!rep_id_key_required)
+ {
+ Bitmapset *attrs;
+ TupleDesc tupdesc = RelationGetDescr(relation);
+ int attidx = -1;
+
+ /*
+ * We don't own idx_attrs so we'll copy it and remove the modified set
+ * to reduce the attributes we need to test in the while loop and
+ * avoid a two branches in the loop.
+ */
+ attrs = bms_difference(idx_attrs, mix_attrs);
+ attrs = bms_int_members(attrs, rid_attrs);
+
+ while ((attidx = bms_next_member(attrs, attidx)) >= 0)
+ {
+ /*
+ * attidx is zero-based, attrnum is the normal attribute number
+ */
+ AttrNumber attrnum = attidx + FirstLowInvalidHeapAttributeNumber;
+ Datum value;
+ bool isnull;
+
+ /*
+ * System attributes are not added into interesting_attrs in
+ * relcache
+ */
+ Assert(attrnum > 0);
+
+ value = heap_getattr(&oldtup, attrnum, tupdesc, &isnull);
+
+ /* No need to check attributes that can't be stored externally */
+ if (isnull ||
+ TupleDescCompactAttr(tupdesc, attrnum - 1)->attlen != -1)
+ continue;
+
+ /* Check if the old tuple's attribute is stored externally */
+ if (VARATT_IS_EXTERNAL((struct varlena *) DatumGetPointer(value)))
+ {
+ rep_id_key_required = true;
+ break;
+ }
+ }
+
+ bms_free(attrs);
+ }
/* Update the tuple with table oid */
slot->tts_tableOid = RelationGetRelid(relation);
@@ -437,7 +480,6 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
bms_free(sum_attrs);
bms_free(pk_attrs);
bms_free(rid_attrs);
- bms_free(mix_attrs);
bms_free(idx_attrs);
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 3dec1ee657d..b975612bbdd 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -156,6 +156,7 @@ bthandler(PG_FUNCTION_ARGS)
.amproperty = btproperty,
.ambuildphasename = btbuildphasename,
.amvalidate = btvalidate,
+ .amcomparedatums = NULL,
.amadjustmembers = btadjustmembers,
.ambeginscan = btbeginscan,
.amrescan = btrescan,
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 87491796523..458d48ca79e 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -367,6 +367,7 @@ void
simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot,
Snapshot snapshot,
+ const Bitmapset *mix_attrs,
TU_UpdateIndexes *update_indexes)
{
TM_Result result;
@@ -377,7 +378,9 @@ simple_table_tuple_update(Relation rel, ItemPointer otid,
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
+ &tmfd, &lockmode,
+ mix_attrs,
+ update_indexes);
switch (result)
{
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index dd57624b4f9..81347c7b47e 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -963,10 +963,18 @@ index_register(Oid heap,
newind->il_info->ii_Expressions =
copyObject(indexInfo->ii_Expressions);
newind->il_info->ii_ExpressionsState = NIL;
+ /* expression attrs will likely be null, but may as well copy it */
+ newind->il_info->ii_ExpressionsAttrs =
+ copyObject(indexInfo->ii_ExpressionsAttrs);
/* predicate will likely be null, but may as well copy it */
newind->il_info->ii_Predicate =
copyObject(indexInfo->ii_Predicate);
newind->il_info->ii_PredicateState = NULL;
+ /* predicate attrs will likely be null, but may as well copy it */
+ newind->il_info->ii_PredicateAttrs =
+ copyObject(indexInfo->ii_PredicateAttrs);
+ newind->il_info->ii_CheckedPredicate = false;
+ newind->il_info->ii_PredicateSatisfied = false;
/* no exclusion constraints at bootstrap time, so no need to copy */
Assert(indexInfo->ii_ExclusionOps == NULL);
Assert(indexInfo->ii_ExclusionProcs == NULL);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 43de42ce39e..fe536c9740f 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -27,6 +27,7 @@
#include "access/heapam.h"
#include "access/multixact.h"
#include "access/relscan.h"
+#include "access/sysattr.h"
#include "access/tableam.h"
#include "access/toast_compression.h"
#include "access/transam.h"
@@ -58,6 +59,7 @@
#include "commands/trigger.h"
#include "executor/executor.h"
#include "miscadmin.h"
+#include "nodes/execnodes.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
@@ -2412,6 +2414,61 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
* ----------------------------------------------------------------
*/
+/* ----------------
+ * BuildUpdateIndexInfo
+ *
+ * For expression indexes updates may not change the indexed value allowing
+ * for a HOT update. Add information to the IndexInfo to allow for checking
+ * if the indexed value has changed.
+ *
+ * Do this processing here rather than in BuildIndexInfo() to not incur the
+ * overhead in the common non-expression cases.
+ * ----------------
+ */
+void
+BuildUpdateIndexInfo(ResultRelInfo *resultRelInfo)
+{
+ for (int j = 0; j < resultRelInfo->ri_NumIndices; j++)
+ {
+ int i;
+ int indnatts;
+ Bitmapset *attrs = NULL;
+ IndexInfo *ii = resultRelInfo->ri_IndexRelationInfo[j];
+
+ indnatts = ii->ii_NumIndexAttrs;
+
+ /* Collect key attributes used by the index, key and including */
+ for (i = 0; i < indnatts; i++)
+ {
+ AttrNumber attnum = ii->ii_IndexAttrNumbers[i];
+
+ if (attnum != 0)
+ attrs = bms_add_member(attrs, attnum - FirstLowInvalidHeapAttributeNumber);
+ }
+
+ /* Collect attributes used in the expression */
+ if (ii->ii_Expressions)
+ pull_varattnos((Node *) ii->ii_Expressions,
+ resultRelInfo->ri_RangeTableIndex,
+ &ii->ii_ExpressionsAttrs);
+
+ /* Collect attributes used in the predicate */
+ if (ii->ii_Predicate)
+ pull_varattnos((Node *) ii->ii_Predicate,
+ resultRelInfo->ri_RangeTableIndex,
+ &ii->ii_PredicateAttrs);
+
+ /*
+ * Combine key, including, and expression, but not partial index
+ * predicate attributes.
+ */
+ ii->ii_IndexedAttrs = bms_union(attrs, ii->ii_ExpressionsAttrs);
+
+ /* All indexes should index *something*! */
+ Assert(!bms_is_empty(ii->ii_IndexedAttrs));
+ }
+}
+
/* ----------------
* BuildIndexInfo
* Construct an IndexInfo record for an open index
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c
index 0a1a68e0644..690a2511023 100644
--- a/src/backend/catalog/indexing.c
+++ b/src/backend/catalog/indexing.c
@@ -102,7 +102,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple,
* Get information from the state structure. Fall out if nothing to do.
*/
numIndexes = indstate->ri_NumIndices;
- if (numIndexes == 0)
+ if (numIndexes == 0 || updateIndexes == TU_None)
return;
relationDescs = indstate->ri_IndexRelationDescs;
indexInfoArray = indstate->ri_IndexRelationInfo;
@@ -314,15 +314,18 @@ CatalogTupleUpdate(Relation heapRel, const ItemPointerData *otid, HeapTuple tup)
{
CatalogIndexState indstate;
TU_UpdateIndexes updateIndexes = TU_All;
+ Bitmapset *updatedAttrs;
CatalogTupleCheckConstraints(heapRel, tup);
indstate = CatalogOpenIndexes(heapRel);
- simple_heap_update(heapRel, otid, tup, &updateIndexes);
-
+ updatedAttrs = simple_heap_update(heapRel, otid, tup, &updateIndexes);
+ ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = updatedAttrs;
CatalogIndexInsert(indstate, tup, updateIndexes);
+
CatalogCloseIndexes(indstate);
+ bms_free(updatedAttrs);
}
/*
@@ -338,12 +341,15 @@ CatalogTupleUpdateWithInfo(Relation heapRel, const ItemPointerData *otid, HeapTu
CatalogIndexState indstate)
{
TU_UpdateIndexes updateIndexes = TU_All;
+ Bitmapset *updatedAttrs;
CatalogTupleCheckConstraints(heapRel, tup);
- simple_heap_update(heapRel, otid, tup, &updateIndexes);
-
+ updatedAttrs = simple_heap_update(heapRel, otid, tup, &updateIndexes);
+ ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = updatedAttrs;
CatalogIndexInsert(indstate, tup, updateIndexes);
+ ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = NULL;
+ bms_free(updatedAttrs);
}
/*
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index c78dcea98c1..ff8da5be5f8 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -292,8 +292,12 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
indexInfo->ii_IndexAttrNumbers[1] = 2;
indexInfo->ii_Expressions = NIL;
indexInfo->ii_ExpressionsState = NIL;
+ indexInfo->ii_ExpressionsAttrs = NULL;
indexInfo->ii_Predicate = NIL;
indexInfo->ii_PredicateState = NULL;
+ indexInfo->ii_PredicateAttrs = NULL;
+ indexInfo->ii_CheckedPredicate = false;
+ indexInfo->ii_PredicateSatisfied = false;
indexInfo->ii_ExclusionOps = NULL;
indexInfo->ii_ExclusionProcs = NULL;
indexInfo->ii_ExclusionStrats = NULL;
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index 6ae0f959592..1275feffae9 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -109,11 +109,15 @@
#include "access/genam.h"
#include "access/relscan.h"
#include "access/tableam.h"
+#include "access/sysattr.h"
#include "access/xact.h"
#include "catalog/index.h"
#include "executor/executor.h"
+#include "nodes/bitmapset.h"
+#include "nodes/execnodes.h"
#include "nodes/nodeFuncs.h"
#include "storage/lmgr.h"
+#include "utils/datum.h"
#include "utils/injection_point.h"
#include "utils/multirangetypes.h"
#include "utils/rangetypes.h"
@@ -324,8 +328,8 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
Relation heapRelation;
IndexInfo **indexInfoArray;
ExprContext *econtext;
- Datum values[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
+ Datum loc_values[INDEX_MAX_KEYS];
+ bool loc_isnull[INDEX_MAX_KEYS];
Assert(ItemPointerIsValid(tupleid));
@@ -349,13 +353,13 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
- /*
- * for each index, form and insert the index tuple
- */
+ /* Insert into each index that needs updating */
for (i = 0; i < numIndices; i++)
{
Relation indexRelation = relationDescs[i];
IndexInfo *indexInfo;
+ Datum *values;
+ bool *isnull;
bool applyNoDupErr;
IndexUniqueCheck checkUnique;
bool indexUnchanged;
@@ -372,7 +376,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
/*
* Skip processing of non-summarizing indexes if we only update
- * summarizing indexes
+ * summarizing indexes or if this index is unchanged.
*/
if (onlySummarizing && !indexInfo->ii_Summarizing)
continue;
@@ -393,8 +397,15 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
indexInfo->ii_PredicateState = predicate;
}
+ /* Check the index predicate if we haven't done so earlier on */
+ if (!indexInfo->ii_CheckedPredicate)
+ {
+ indexInfo->ii_PredicateSatisfied = ExecQual(predicate, econtext);
+ indexInfo->ii_CheckedPredicate = true;
+ }
+
/* Skip this index-update if the predicate isn't satisfied */
- if (!ExecQual(predicate, econtext))
+ if (!indexInfo->ii_PredicateSatisfied)
continue;
}
@@ -402,11 +413,10 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
* FormIndexDatum fills in its values and isnull parameters with the
* appropriate values for the column(s) of the index.
*/
- FormIndexDatum(indexInfo,
- slot,
- estate,
- values,
- isnull);
+ FormIndexDatum(indexInfo, slot, estate, loc_values, loc_isnull);
+
+ values = loc_values;
+ isnull = loc_isnull;
/* Check whether to apply noDupErr to this index */
applyNoDupErr = noDupErr &&
@@ -613,7 +623,12 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
checkedIndex = true;
/* Check for partial index */
- if (indexInfo->ii_Predicate != NIL)
+ if (indexInfo->ii_CheckedPredicate && !indexInfo->ii_PredicateSatisfied)
+ {
+ /* We've already checked and the predicate wasn't satisfied. */
+ continue;
+ }
+ else if (indexInfo->ii_Predicate != NIL)
{
ExprState *predicate;
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index ca14cdabdd0..fc6f7aa8fad 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1282,6 +1282,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
/* The following fields are set later if needed */
resultRelInfo->ri_RowIdAttNo = 0;
resultRelInfo->ri_extraUpdatedCols = NULL;
+ resultRelInfo->ri_ChangedIndexedCols = NULL;
resultRelInfo->ri_projectNew = NULL;
resultRelInfo->ri_newTupleSlot = NULL;
resultRelInfo->ri_oldTupleSlot = NULL;
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 173d2fe548d..0146e6f61ef 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -32,6 +32,7 @@
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/relcache.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
@@ -936,7 +937,13 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
if (rel->rd_rel->relispartition)
ExecPartitionCheck(resultRelInfo, slot, estate, true);
+ /*
+ * We're not going to call ExecCheckIndexedAttrsForChanges here
+ * because we've already identified the changes earlier on thanks to
+ * slot_modify_data.
+ */
simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
+ resultRelInfo->ri_ChangedIndexedCols,
&update_indexes);
conflictindexes = resultRelInfo->ri_onConflictArbiterIndexes;
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 46ff6da8289..8fca5e09a26 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -17,6 +17,7 @@
* ExecModifyTable - retrieve the next tuple from the node
* ExecEndModifyTable - shut down the ModifyTable node
* ExecReScanModifyTable - rescan the ModifyTable node
+ * ExecCheckIndexedAttrsForChanges - find set of updated indexed columns
*
* NOTES
* The ModifyTable node receives input from its outerPlan, which is
@@ -53,12 +54,18 @@
#include "postgres.h"
#include "access/htup_details.h"
+#include "access/attnum.h"
+#include "access/sysattr.h"
#include "access/tableam.h"
+#include "access/tupconvert.h"
+#include "access/tupdesc.h"
#include "access/xact.h"
+#include "catalog/index.h"
#include "commands/trigger.h"
#include "executor/execPartition.h"
#include "executor/executor.h"
#include "executor/nodeModifyTable.h"
+#include "executor/tuptable.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
@@ -68,8 +75,11 @@
#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/datum.h"
+#include "utils/float.h"
#include "utils/injection_point.h"
+#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/relcache.h"
#include "utils/snapmgr.h"
@@ -176,6 +186,224 @@ static TupleTableSlot *ExecMergeNotMatched(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
bool canSetTag);
+/*
+ * ExecCheckIndexedAttrsForChanges
+ *
+ * Determine which indexes need updating by finding the set of modified indexed
+ * attributes.
+ *
+ * For which implement the amcomparedatums() index AM API we'll need to form
+ * index datum and compare each attribute to see if anything actually changed.
+ *
+ * The goal is for the executor to know, ahead of calling into the table AM to
+ * process the update and before calling into the index AM for inserting new
+ * index tuples, which attributes in the new TupleTableSlot, if any, truely
+ * necessitate a new index tuple.
+ *
+ * Returns a Bitmapset of attributes that intersects with indexes which require
+ * a new index tuple.
+ */
+Bitmapset *
+ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
+ EState *estate,
+ TupleTableSlot *old_tts,
+ TupleTableSlot *new_tts)
+{
+ Relation relation = relinfo->ri_RelationDesc;
+ TupleDesc tupdesc = RelationGetDescr(relation);
+ Bitmapset *mix_attrs = NULL; /* modified indexed attributes */
+
+ /* If no indexes, we're done */
+ if (relinfo->ri_NumIndices == 0)
+ return NULL;
+
+ /* Find the indexes that reference this attribute */
+ for (int i = 0; i < relinfo->ri_NumIndices; i++)
+ {
+ Relation index = relinfo->ri_IndexRelationDescs[i];
+ const IndexAmRoutine *amroutine = index->rd_indam;
+ IndexInfo *indexInfo = relinfo->ri_IndexRelationInfo[i];
+ Bitmapset *m_attrs = NULL; /* (possibly) modified indexed attrs */
+ Bitmapset *p_attrs = NULL; /* (possibly) modified predicate attrs */
+ Bitmapset *u_attrs = NULL; /* unmodified indexed attrs */
+ bool has_am_compare = (amroutine->amcomparedatums != NULL);
+ bool supports_ios = (amroutine->amcanreturn != NULL);
+ bool is_partial = (indexInfo->ii_Predicate != NIL);
+ ExprContext *econtext = GetPerTupleExprContext(estate);
+ int num_datums = supports_ios ?
+ indexInfo->ii_NumIndexAttrs : indexInfo->ii_NumIndexKeyAttrs;
+
+ /* If we've reviewed all the attributes on this index, move on */
+ if (bms_is_subset(indexInfo->ii_IndexedAttrs, mix_attrs))
+ continue;
+
+ /* Add partial index attributes */
+ if (is_partial)
+ p_attrs = bms_add_members(p_attrs, indexInfo->ii_PredicateAttrs);
+
+ /* Compare the index datums for equality */
+ for (int j = 0; j < num_datums; j++)
+ {
+ AttrNumber rel_attrnum = indexInfo->ii_IndexAttrNumbers[j];
+ int rel_attridx = rel_attrnum - FirstLowInvalidHeapAttributeNumber;
+ int nth_expr = 0;
+ int16 typlen;
+ bool typbyval;
+ Datum old_value;
+ Datum new_value;
+ bool old_null;
+ bool new_null;
+ bool values_equal = false;
+
+ /* System attributes */
+ if (rel_attrnum < 0)
+ {
+ /* Extract system values from both slots for this attribute */
+ old_value = slot_getsysattr(old_tts, rel_attrnum, &old_null);
+ new_value = slot_getsysattr(new_tts, rel_attrnum, &new_null);
+
+ /* The only allowed system columns are OIDs, so do this */
+ values_equal = (DatumGetObjectId(old_value) == DatumGetObjectId(new_value));
+ goto equality_determined;
+ }
+
+ /*
+ * This is an expression attribute, but in an effort to avoid the
+ * expense of IndexFormDatum we're now faced with testing for
+ * equality so we'll have to exec the expressions and test for
+ * binary equality of the results.
+ */
+ else if (rel_attrnum == 0)
+ {
+ TupleTableSlot *save_scantuple = econtext->ecxt_scantuple;
+ Oid expr_type_oid;
+ Expr *expr = (Expr *) list_nth(indexInfo->ii_Expressions, nth_expr);
+ ExprState *state;
+
+ if (indexInfo->ii_ExpressionsState == NIL)
+ {
+ /* First time through, set up expression evaluation state */
+ indexInfo->ii_ExpressionsState =
+ ExecPrepareExprList(indexInfo->ii_Expressions, estate);
+ }
+
+ state = (ExprState *) list_nth(indexInfo->ii_ExpressionsState, nth_expr);
+
+ econtext->ecxt_scantuple = old_tts;
+ old_value = ExecEvalExprSwitchContext(state,
+ GetPerTupleExprContext(estate),
+ &old_null);
+
+ econtext->ecxt_scantuple = new_tts;
+ new_value = ExecEvalExprSwitchContext(state,
+ GetPerTupleExprContext(estate),
+ &new_null);
+
+ econtext->ecxt_scantuple = save_scantuple;
+
+ /*
+ * NOTE: test for NULL cases here to potentially avoid looking
+ * up the type information. It's a tad redundant, but worth
+ * it.
+ */
+
+ /* A change to/from NULL, so not equal */
+ if (old_null != new_null)
+ {
+ values_equal = false;
+ goto equality_determined;
+ }
+
+ /* Both NULL, no change record as unmodified */
+ if (old_null)
+ {
+ values_equal = true;
+ goto equality_determined;
+ }
+
+ /* Get type OID from the expression */
+ expr_type_oid = exprType((Node *) expr);
+
+ /* Get type information from the OID */
+ get_typlenbyval(expr_type_oid, &typlen, &typbyval);
+ }
+ /* Not a system or expression attribute */
+ else
+ {
+ CompactAttribute *att = TupleDescCompactAttr(tupdesc, rel_attrnum - 1);
+
+ /* Extract values from both slots for this attribute */
+ old_value = slot_getattr(old_tts, rel_attrnum, &old_null);
+ new_value = slot_getattr(new_tts, rel_attrnum, &new_null);
+
+ typlen = att->attlen;
+ typbyval = att->attbyval;
+ }
+
+ /* A change to/from NULL, so not equal */
+ if (old_null != new_null)
+ {
+ values_equal = false;
+ goto equality_determined;
+ }
+
+ /* Both NULL, no change record as unmodified */
+ if (old_null)
+ {
+ values_equal = true;
+ goto equality_determined;
+ }
+
+ if (has_am_compare)
+ {
+ /*
+ * NOTE: For AM comparison, pass the 1-based index attribute
+ * number. The AM's compare function expects the same
+ * numbering as used internally by the AM.
+ */
+ values_equal = amroutine->amcomparedatums(index, j + 1,
+ old_value, old_null,
+ new_value, new_null);
+ }
+ else
+ {
+ values_equal = datumIsEqual(old_value, new_value, typbyval, typlen);
+ }
+
+ equality_determined:;
+ if (!values_equal)
+ if (rel_attrnum == 0)
+ {
+ Expr *expr = (Expr *) list_nth(indexInfo->ii_Expressions, nth_expr);
+
+ pull_varattnos((Node *) expr, relinfo->ri_RangeTableIndex, &m_attrs);
+ }
+ else
+ m_attrs = bms_add_member(m_attrs, rel_attridx);
+ else
+ u_attrs = bms_add_member(u_attrs, rel_attridx);
+
+ if (rel_attrnum == 0)
+ nth_expr++;
+ }
+
+ /*
+ * Here we know all the attributes that might be modified and all
+ * those we know haven't been across all indexes. Take the difference
+ * and add it to the modified indexed attributes set.
+ */
+ m_attrs = bms_del_members(m_attrs, u_attrs);
+ p_attrs = bms_del_members(p_attrs, u_attrs);
+ mix_attrs = bms_add_members(mix_attrs, m_attrs);
+ mix_attrs = bms_add_members(mix_attrs, p_attrs);
+
+ bms_free(m_attrs);
+ bms_free(u_attrs);
+ bms_free(p_attrs);
+ }
+
+ return mix_attrs;
+}
/*
* Verify that the tuples to be produced by INSERT match the
@@ -2168,14 +2396,17 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
*/
static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag, UpdateContext *updateCxt)
+ ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *oldSlot,
+ TupleTableSlot *slot, bool canSetTag, UpdateContext *updateCxt)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
bool partition_constraint_failed;
TM_Result result;
+ /* The set of modified indexed attributes that trigger new index entries */
+ Bitmapset *mix_attrs = NULL;
+
updateCxt->crossPartUpdate = false;
/*
@@ -2292,9 +2523,38 @@ lreplace:
ExecConstraints(resultRelInfo, slot, estate);
/*
- * replace the heap tuple
+ * Identify which, if any, indexed attributes were modified here so that
+ * we might reuse it in a few places.
+ */
+ bms_free(resultRelInfo->ri_ChangedIndexedCols);
+ resultRelInfo->ri_ChangedIndexedCols = NULL;
+
+ /*
+ * During updates we'll need a bit more information in IndexInfo but we've
+ * delayed adding it until here. We check to ensure that there are
+ * indexes, that something has changed that is indexed, and that the first
+ * index doesn't yet have ii_IndexedAttrs set as a way to ensure we only
+ * build this when needed and only once. We don't build this in
+ * ExecOpenIndicies() as it is unnecessary overhead when not performing an
+ * update.
+ */
+ if (resultRelInfo->ri_NumIndices > 0 &&
+ bms_is_empty(resultRelInfo->ri_IndexRelationInfo[0]->ii_IndexedAttrs))
+ BuildUpdateIndexInfo(resultRelInfo);
+
+ /*
+ * Next up we need to find out the set of indexed attributes that have
+ * changed in value and should trigger a new index tuple. We could start
+ * with the set of updated columns via ExecGetUpdatedCols(), but if we do
+ * we will overlook attributes directly modified by heap_modify_tuple()
+ * which are not known to ExecGetUpdatedCols().
+ */
+ mix_attrs = ExecCheckIndexedAttrsForChanges(resultRelInfo, estate, oldSlot, slot);
+
+ /*
+ * Call into the table AM to update the heap tuple.
*
- * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
+ * NOTE: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
* the row to be updated is visible to that snapshot, and throw a
* can't-serialize error if not. This is a special-case behavior needed
* for referential integrity updates in transaction-snapshot mode
@@ -2306,8 +2566,12 @@ lreplace:
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode,
+ mix_attrs,
&updateCxt->updateIndexes);
+ Assert(bms_is_empty(resultRelInfo->ri_ChangedIndexedCols));
+ resultRelInfo->ri_ChangedIndexedCols = mix_attrs;
+
return result;
}
@@ -2325,7 +2589,7 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
ModifyTableState *mtstate = context->mtstate;
List *recheckIndexes = NIL;
- /* insert index entries for tuple if necessary */
+ /* Insert index entries for tuple if necessary */
if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None))
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
slot, context->estate,
@@ -2524,8 +2788,9 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
redo_act:
lockedtid = *tupleid;
- result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
- canSetTag, &updateCxt);
+
+ result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, oldSlot,
+ slot, canSetTag, &updateCxt);
/*
* If ExecUpdateAct reports that a cross-partition update was done,
@@ -3222,8 +3487,8 @@ lmerge_matched:
Assert(oldtuple == NULL);
result = ExecUpdateAct(context, resultRelInfo, tupleid,
- NULL, newslot, canSetTag,
- &updateCxt);
+ NULL, resultRelInfo->ri_oldTupleSlot,
+ newslot, canSetTag, &updateCxt);
/*
* As in ExecUpdate(), if ExecUpdateAct() reports that a
@@ -3248,6 +3513,7 @@ lmerge_matched:
tupleid, NULL, newslot);
mtstate->mt_merge_updated += 1;
}
+
break;
case CMD_DELETE:
@@ -4354,7 +4620,7 @@ ExecModifyTable(PlanState *pstate)
* For UPDATE/DELETE/MERGE, fetch the row identity info for the tuple
* to be updated/deleted/merged. For a heap relation, that's a TID;
* otherwise we may have a wholerow junk attr that carries the old
- * tuple in toto. Keep this in step with the part of
+ * tuple in total. Keep this in step with the part of
* ExecInitModifyTable that sets up ri_RowIdAttNo.
*/
if (operation == CMD_UPDATE || operation == CMD_DELETE ||
@@ -4530,6 +4796,7 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
oldSlot, slot, node->canSetTag);
+
if (tuplock)
UnlockTuple(resultRelInfo->ri_RelationDesc, tupleid,
InplaceUpdateTupleLock);
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index a4765876c31..17f5f66b25f 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -238,6 +238,10 @@ bms_make_singleton(int x)
void
bms_free(Bitmapset *a)
{
+#if USE_ASSERT_CHECKING
+ Assert(bms_is_valid_set(a));
+#endif
+
if (a)
pfree(a);
}
diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
index 2caec621d73..dd092bacad9 100644
--- a/src/backend/nodes/makefuncs.c
+++ b/src/backend/nodes/makefuncs.c
@@ -857,10 +857,14 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions,
/* expressions */
n->ii_Expressions = expressions;
n->ii_ExpressionsState = NIL;
+ n->ii_ExpressionsAttrs = NULL;
/* predicates */
n->ii_Predicate = predicates;
n->ii_PredicateState = NULL;
+ n->ii_PredicateAttrs = NULL;
+ n->ii_CheckedPredicate = false;
+ n->ii_PredicateSatisfied = false;
/* exclusion constraints */
n->ii_ExclusionOps = NULL;
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index ad281e7069b..90b0c2c40e9 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -275,7 +275,6 @@
#include "replication/logicalrelation.h"
#include "replication/logicalworker.h"
#include "replication/origin.h"
-#include "replication/slot.h"
#include "replication/walreceiver.h"
#include "replication/worker_internal.h"
#include "rewrite/rewriteHandler.h"
@@ -285,12 +284,14 @@
#include "storage/procarray.h"
#include "tcop/tcopprot.h"
#include "utils/acl.h"
+#include "utils/datum.h"
#include "utils/guc.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_lsn.h"
#include "utils/rel.h"
+#include "utils/relcache.h"
#include "utils/rls.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
@@ -1110,15 +1111,18 @@ slot_store_data(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
* "slot" is filled with a copy of the tuple in "srcslot", replacing
* columns provided in "tupleData" and leaving others as-is.
*
+ * Returns a bitmap of the modified columns.
+ *
* Caution: unreplaced pass-by-ref columns in "slot" will point into the
* storage for "srcslot". This is OK for current usage, but someday we may
* need to materialize "slot" at the end to make it independent of "srcslot".
*/
-static void
+static Bitmapset *
slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
LogicalRepRelMapEntry *rel,
LogicalRepTupleData *tupleData)
{
+ Bitmapset *modified = NULL;
int natts = slot->tts_tupleDescriptor->natts;
int i;
@@ -1195,6 +1199,27 @@ slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
slot->tts_isnull[i] = true;
}
+ /*
+ * Determine if the replicated value changed the local value by
+ * comparing slots. This is a subset of
+ * ExecCheckIndexedAttrsForChanges.
+ */
+ if (srcslot->tts_isnull[i] != slot->tts_isnull[i])
+ {
+ /* One is NULL, the other is not so the value changed */
+ modified = bms_add_member(modified, i + 1 - FirstLowInvalidHeapAttributeNumber);
+ }
+ else if (!srcslot->tts_isnull[i])
+ {
+ /* Both are not NULL, compare their values */
+
+ if (!datumIsEqual(srcslot->tts_values[i],
+ slot->tts_values[i],
+ att->attbyval,
+ att->attlen))
+ modified = bms_add_member(modified, i + 1 - FirstLowInvalidHeapAttributeNumber);
+ }
+
/* Reset attnum for error callback */
apply_error_callback_arg.remote_attnum = -1;
}
@@ -1202,6 +1227,8 @@ slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
/* And finally, declare that "slot" contains a valid virtual tuple */
ExecStoreVirtualTuple(slot);
+
+ return modified;
}
/*
@@ -2918,6 +2945,7 @@ apply_handle_update_internal(ApplyExecutionData *edata,
ConflictTupleInfo conflicttuple = {0};
bool found;
MemoryContext oldctx;
+ Bitmapset *indexed = NULL;
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL);
ExecOpenIndices(relinfo, false);
@@ -2934,6 +2962,8 @@ apply_handle_update_internal(ApplyExecutionData *edata,
*/
if (found)
{
+ Bitmapset *modified = NULL;
+
/*
* Report the conflict if the tuple was modified by a different
* origin.
@@ -2957,15 +2987,29 @@ apply_handle_update_internal(ApplyExecutionData *edata,
/* Process and store remote tuple in the slot */
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
- slot_modify_data(remoteslot, localslot, relmapentry, newtup);
+ modified = slot_modify_data(remoteslot, localslot, relmapentry, newtup);
MemoryContextSwitchTo(oldctx);
+ /*
+ * Normally we'd call ExecCheckIndexedAttrForChanges but here we have
+ * the record of changed columns in the replication state, so let's
+ * use that instead.
+ */
+ indexed = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
+ INDEX_ATTR_BITMAP_INDEXED);
+
+ bms_free(relinfo->ri_ChangedIndexedCols);
+ relinfo->ri_ChangedIndexedCols = bms_int_members(modified, indexed);
+ bms_free(indexed);
+
EvalPlanQualSetSlot(&epqstate, remoteslot);
InitConflictIndexes(relinfo);
- /* Do the actual update. */
+ /* First check privileges */
TargetPrivilegesCheck(relinfo->ri_RelationDesc, ACL_UPDATE);
+
+ /* Then do the actual update. */
ExecSimpleRelationUpdate(relinfo, estate, &epqstate, localslot,
remoteslot);
}
@@ -3455,6 +3499,8 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
bool found;
EPQState epqstate;
ConflictTupleInfo conflicttuple = {0};
+ Bitmapset *modified = NULL;
+ Bitmapset *indexed;
/* Get the matching local tuple from the partition. */
found = FindReplTupleInLocalRel(edata, partrel,
@@ -3523,8 +3569,8 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
* remoteslot_part.
*/
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
- slot_modify_data(remoteslot_part, localslot, part_entry,
- newtup);
+ modified = slot_modify_data(remoteslot_part, localslot, part_entry,
+ newtup);
MemoryContextSwitchTo(oldctx);
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL);
@@ -3549,6 +3595,18 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
EvalPlanQualSetSlot(&epqstate, remoteslot_part);
TargetPrivilegesCheck(partrelinfo->ri_RelationDesc,
ACL_UPDATE);
+
+ /*
+ * Normally we'd call ExecCheckIndexedAttrForChanges but
+ * here we have the record of changed columns in the
+ * replication state, so let's use that instead.
+ */
+ indexed = RelationGetIndexAttrBitmap(partrelinfo->ri_RelationDesc,
+ INDEX_ATTR_BITMAP_INDEXED);
+ bms_free(partrelinfo->ri_ChangedIndexedCols);
+ partrelinfo->ri_ChangedIndexedCols = bms_int_members(modified, indexed);
+ bms_free(indexed);
+
ExecSimpleRelationUpdate(partrelinfo, estate, &epqstate,
localslot, remoteslot_part);
}
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 6b634c9fff1..8cc97e4fbca 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -2477,6 +2477,7 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
bms_free(relation->rd_idattr);
bms_free(relation->rd_hotblockingattr);
bms_free(relation->rd_summarizedattr);
+ bms_free(relation->rd_indexedattr);
if (relation->rd_pubdesc)
pfree(relation->rd_pubdesc);
if (relation->rd_options)
@@ -5278,6 +5279,7 @@ RelationGetIndexPredicate(Relation relation)
* index (empty if FULL)
* INDEX_ATTR_BITMAP_HOT_BLOCKING Columns that block updates from being HOT
* INDEX_ATTR_BITMAP_SUMMARIZED Columns included in summarizing indexes
+ * INDEX_ATTR_BITMAP_INDEXED Columns referenced by indexes
*
* Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
* we can include system attributes (e.g., OID) in the bitmap representation.
@@ -5302,6 +5304,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Bitmapset *idindexattrs; /* columns in the replica identity */
Bitmapset *hotblockingattrs; /* columns with HOT blocking indexes */
Bitmapset *summarizedattrs; /* columns with summarizing indexes */
+ Bitmapset *indexedattrs; /* columns referenced by indexes */
List *indexoidlist;
List *newindexoidlist;
Oid relpkindex;
@@ -5324,6 +5327,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
return bms_copy(relation->rd_hotblockingattr);
case INDEX_ATTR_BITMAP_SUMMARIZED:
return bms_copy(relation->rd_summarizedattr);
+ case INDEX_ATTR_BITMAP_INDEXED:
+ return bms_copy(relation->rd_indexedattr);
default:
elog(ERROR, "unknown attrKind %u", attrKind);
}
@@ -5368,6 +5373,7 @@ restart:
idindexattrs = NULL;
hotblockingattrs = NULL;
summarizedattrs = NULL;
+ indexedattrs = NULL;
foreach(l, indexoidlist)
{
Oid indexOid = lfirst_oid(l);
@@ -5500,10 +5506,14 @@ restart:
bms_free(idindexattrs);
bms_free(hotblockingattrs);
bms_free(summarizedattrs);
+ bms_free(indexedattrs);
goto restart;
}
+ /* Combine all index attributes */
+ indexedattrs = bms_union(hotblockingattrs, summarizedattrs);
+
/* Don't leak the old values of these bitmaps, if any */
relation->rd_attrsvalid = false;
bms_free(relation->rd_keyattr);
@@ -5516,6 +5526,8 @@ restart:
relation->rd_hotblockingattr = NULL;
bms_free(relation->rd_summarizedattr);
relation->rd_summarizedattr = NULL;
+ bms_free(relation->rd_indexedattr);
+ relation->rd_indexedattr = NULL;
/*
* Now save copies of the bitmaps in the relcache entry. We intentionally
@@ -5530,6 +5542,7 @@ restart:
relation->rd_idattr = bms_copy(idindexattrs);
relation->rd_hotblockingattr = bms_copy(hotblockingattrs);
relation->rd_summarizedattr = bms_copy(summarizedattrs);
+ relation->rd_indexedattr = bms_copy(indexedattrs);
relation->rd_attrsvalid = true;
MemoryContextSwitchTo(oldcxt);
@@ -5546,6 +5559,8 @@ restart:
return hotblockingattrs;
case INDEX_ATTR_BITMAP_SUMMARIZED:
return summarizedattrs;
+ case INDEX_ATTR_BITMAP_INDEXED:
+ return indexedattrs;
default:
elog(ERROR, "unknown attrKind %u", attrKind);
return NULL;
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index ecfbd017d66..2a36b7e4a18 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -211,6 +211,33 @@ typedef void (*ammarkpos_function) (IndexScanDesc scan);
/* restore marked scan position */
typedef void (*amrestrpos_function) (IndexScanDesc scan);
+/*
+ * amcomparedatums - Compare datums to determine if index update is needed
+ *
+ * This function compares old_datum and new_datum to determine if they would
+ * produce different index entries. For extraction-based indexes (GIN, RUM),
+ * this should:
+ * 1. Extract keys from old_datum using the opclass's extractValue function
+ * 2. Extract keys from new_datum using the opclass's extractValue function
+ * 3. Compare the two sets of keys using appropriate equality operators
+ * 4. Return true if the sets are equal (no index update needed)
+ *
+ * The comparison should account for:
+ * - Different numbers of extracted keys
+ * - NULL values
+ * - Type-specific equality (not just binary equality)
+ * - Opclass parameters (e.g., path in bson_rum_single_path_ops)
+ *
+ * For the DocumentDB example with path='a', this would extract values at
+ * path 'a' from both old and new BSON documents and compare them using
+ * BSON's equality operator.
+ */
+/* identify if updated datums would produce one or more index entries */
+typedef bool (*amcomparedatums_function) (Relation indexRelation,
+ int attno,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
+
/*
* Callback function signatures - for parallel index scans.
*/
@@ -313,6 +340,7 @@ typedef struct IndexAmRoutine
amendscan_function amendscan;
ammarkpos_function ammarkpos; /* can be NULL */
amrestrpos_function amrestrpos; /* can be NULL */
+ amcomparedatums_function amcomparedatums; /* can be NULL */
/* interface functions to support parallel index scans */
amestimateparallelscan_function amestimateparallelscan; /* can be NULL */
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index fa1a3b20e09..69771fe947b 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -100,6 +100,9 @@ extern PGDLLIMPORT int gin_pending_list_limit;
extern void ginGetStats(Relation index, GinStatsData *stats);
extern void ginUpdateStats(Relation index, const GinStatsData *stats,
bool is_build);
+extern bool gincomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 41193d5b3d2..029f8e84e8a 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -369,7 +369,7 @@ extern TM_Result heap_update(Relation relation, HeapTupleData *oldtup,
TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer,
Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs,
Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs,
- Bitmapset *mix_attrs, Buffer *vmbuffer,
+ const Bitmapset *mix_attrs, Buffer *vmbuffer,
bool rep_id_key_required, TU_UpdateIndexes *update_indexes);
extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
@@ -404,8 +404,8 @@ extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
extern void simple_heap_insert(Relation relation, HeapTuple tup);
extern void simple_heap_delete(Relation relation, const ItemPointerData *tid);
-extern void simple_heap_update(Relation relation, const ItemPointerData *otid,
- HeapTuple tup, TU_UpdateIndexes *update_indexes);
+extern Bitmapset *simple_heap_update(Relation relation, const ItemPointerData *otid,
+ HeapTuple tup, TU_UpdateIndexes *update_indexes);
extern TransactionId heap_index_delete_tuples(Relation rel,
TM_IndexDeleteOp *delstate);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 77224859685..532656a487f 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1179,6 +1179,10 @@ extern int btgettreeheight(Relation rel);
extern CompareType bttranslatestrategy(StrategyNumber strategy, Oid opfamily);
extern StrategyNumber bttranslatecmptype(CompareType cmptype, Oid opfamily);
+extern bool btcomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
+
/*
* prototypes for internal functions in nbtree.c
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index e2ec5289d4d..4bed0f8e56e 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -549,6 +549,7 @@ typedef struct TableAmRoutine
bool wait,
TM_FailureData *tmfd,
LockTupleMode *lockmode,
+ const Bitmapset *updated_cols,
TU_UpdateIndexes *update_indexes);
/* see table_tuple_lock() for reference about parameters */
@@ -1512,12 +1513,12 @@ static inline TM_Result
table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes)
+ const Bitmapset *mix_cols, TU_UpdateIndexes *update_indexes)
{
return rel->rd_tableam->tuple_update(rel, otid, slot,
cid, snapshot, crosscheck,
- wait, tmfd,
- lockmode, update_indexes);
+ wait, tmfd, lockmode,
+ mix_cols, update_indexes);
}
/*
@@ -2020,6 +2021,7 @@ extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
Snapshot snapshot);
extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot, Snapshot snapshot,
+ const Bitmapset *mix_attrs,
TU_UpdateIndexes *update_indexes);
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index b259c4141ed..14a39beab6e 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -132,6 +132,7 @@ extern bool CompareIndexInfo(const IndexInfo *info1, const IndexInfo *info2,
const AttrMap *attmap);
extern void BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii);
+extern void BuildUpdateIndexInfo(ResultRelInfo *resultRelInfo);
extern void FormIndexDatum(IndexInfo *indexInfo,
TupleTableSlot *slot,
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 5929aabc353..b4c757af618 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -739,6 +739,11 @@ extern Bitmapset *ExecGetAllUpdatedCols(ResultRelInfo *relinfo, EState *estate);
*/
extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
+extern Bitmapset *ExecWhichIndexesRequireUpdates(ResultRelInfo *relinfo,
+ Bitmapset *mix_attrs,
+ EState *estate,
+ TupleTableSlot *old_tts,
+ TupleTableSlot *new_tts);
extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate,
bool update,
@@ -800,5 +805,9 @@ extern ResultRelInfo *ExecLookupResultRelByOid(ModifyTableState *node,
Oid resultoid,
bool missing_ok,
bool update_cache);
+extern Bitmapset *ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
+ EState *estate,
+ TupleTableSlot *old_tts,
+ TupleTableSlot *new_tts);
#endif /* EXECUTOR_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 02265456978..d5af2f34d0f 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -174,15 +174,29 @@ typedef struct IndexInfo
*/
AttrNumber ii_IndexAttrNumbers[INDEX_MAX_KEYS];
+ /*
+ * All key, expression, sumarizing, and partition attributes referenced by
+ * this index
+ */
+ Bitmapset *ii_IndexedAttrs;
+
/* expr trees for expression entries, or NIL if none */
List *ii_Expressions; /* list of Expr */
/* exec state for expressions, or NIL if none */
List *ii_ExpressionsState; /* list of ExprState */
+ /* attributes exclusively referenced by expression indexes */
+ Bitmapset *ii_ExpressionsAttrs;
/* partial-index predicate, or NIL if none */
List *ii_Predicate; /* list of Expr */
/* exec state for expressions, or NIL if none */
ExprState *ii_PredicateState;
+ /* attributes referenced by the predicate */
+ Bitmapset *ii_PredicateAttrs;
+ /* partial index predicate determined yet? */
+ bool ii_CheckedPredicate;
+ /* amupdate hint used to avoid rechecking predicate */
+ bool ii_PredicateSatisfied;
/* Per-column exclusion operators, or NULL if none */
Oid *ii_ExclusionOps; /* array with one entry per column */
@@ -499,6 +513,12 @@ typedef struct ResultRelInfo
/* true if the above has been computed */
bool ri_extraUpdatedCols_valid;
+ /*
+ * For UPDATE a Bitmapset of the attributes that are both indexed and have
+ * changed in value.
+ */
+ Bitmapset *ri_ChangedIndexedCols;
+
/* Projection to generate new tuple in an INSERT/UPDATE */
ProjectionInfo *ri_projectNew;
/* Slot to hold that tuple */
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index d03ab247788..95b38abfd89 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -164,6 +164,7 @@ typedef struct RelationData
Bitmapset *rd_idattr; /* included in replica identity index */
Bitmapset *rd_hotblockingattr; /* cols blocking HOT update */
Bitmapset *rd_summarizedattr; /* cols indexed by summarizing indexes */
+ Bitmapset *rd_indexedattr; /* all cols referenced by indexes */
PublicationDesc *rd_pubdesc; /* publication descriptor, or NULL */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 2700224939a..5834ab7b903 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -71,6 +71,7 @@ typedef enum IndexAttrBitmapKind
INDEX_ATTR_BITMAP_IDENTITY_KEY,
INDEX_ATTR_BITMAP_HOT_BLOCKING,
INDEX_ATTR_BITMAP_SUMMARIZED,
+ INDEX_ATTR_BITMAP_INDEXED,
} IndexAttrBitmapKind;
extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation,
diff --git a/src/test/isolation/expected/insert-conflict-specconflict.out b/src/test/isolation/expected/insert-conflict-specconflict.out
index e34a821c403..54b3981918c 100644
--- a/src/test/isolation/expected/insert-conflict-specconflict.out
+++ b/src/test/isolation/expected/insert-conflict-specconflict.out
@@ -80,6 +80,10 @@ pg_advisory_unlock
t
(1 row)
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
s1: NOTICE: acquiring advisory lock on 2
s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
@@ -172,6 +176,10 @@ pg_advisory_unlock
t
(1 row)
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
s2: NOTICE: acquiring advisory lock on 2
s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
@@ -369,6 +377,10 @@ key|data
step s1_commit: COMMIT;
s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
s2: NOTICE: acquiring advisory lock on 2
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
step s2_upsert: <... completed>
step controller_show: SELECT * FROM upserttest;
key|data
@@ -530,6 +542,14 @@ isolation/insert-conflict-specconflict/s2|transactionid|ExclusiveLock|t
step s2_commit: COMMIT;
s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_4() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 4
+s1: NOTICE: blurt_and_lock_4() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 4
step s1_upsert: <... completed>
step s1_noop:
step controller_show: SELECT * FROM upserttest;
diff --git a/src/test/regress/expected/heap_hot_updates.out b/src/test/regress/expected/heap_hot_updates.out
new file mode 100644
index 00000000000..14276e3cbca
--- /dev/null
+++ b/src/test/regress/expected/heap_hot_updates.out
@@ -0,0 +1,650 @@
+-- ================================================================
+-- Test Suite for Heap-only (HOT) Updates
+-- ================================================================
+-- Setup: Create function to measure HOT updates
+CREATE OR REPLACE FUNCTION check_hot_updates(
+ expected INT,
+ p_table_name TEXT DEFAULT 't',
+ p_schema_name TEXT DEFAULT current_schema()
+)
+RETURNS TABLE (
+ table_name TEXT,
+ total_updates BIGINT,
+ hot_updates BIGINT,
+ hot_update_percentage NUMERIC,
+ matches_expected BOOLEAN
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+ v_relid oid;
+ v_qualified_name TEXT;
+ v_hot_updates BIGINT;
+ v_updates BIGINT;
+ v_xact_hot_updates BIGINT;
+ v_xact_updates BIGINT;
+BEGIN
+ -- Force statistics update
+ PERFORM pg_stat_force_next_flush();
+
+ -- Get table OID
+ v_qualified_name := quote_ident(p_schema_name) || '.' || quote_ident(p_table_name);
+ v_relid := v_qualified_name::regclass;
+
+ IF v_relid IS NULL THEN
+ RAISE EXCEPTION 'Table %.% not found', p_schema_name, p_table_name;
+ END IF;
+
+ -- Get cumulative + transaction stats
+ v_hot_updates := COALESCE(pg_stat_get_tuples_hot_updated(v_relid), 0);
+ v_updates := COALESCE(pg_stat_get_tuples_updated(v_relid), 0);
+ v_xact_hot_updates := COALESCE(pg_stat_get_xact_tuples_hot_updated(v_relid), 0);
+ v_xact_updates := COALESCE(pg_stat_get_xact_tuples_updated(v_relid), 0);
+
+ v_hot_updates := v_hot_updates + v_xact_hot_updates;
+ v_updates := v_updates + v_xact_updates;
+
+ RETURN QUERY
+ SELECT
+ p_table_name::TEXT,
+ v_updates::BIGINT,
+ v_hot_updates::BIGINT,
+ CASE WHEN v_updates > 0
+ THEN ROUND((v_hot_updates::numeric / v_updates::numeric * 100)::numeric, 2)
+ ELSE 0
+ END,
+ (v_hot_updates = expected)::BOOLEAN;
+END;
+$$;
+CREATE COLLATION case_insensitive (
+ provider = libc,
+ locale = 'C'
+);
+-- ================================================================
+-- GIN Index on JSONB
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data);
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "database"]}');
+-- Change tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Change tags again - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+-- Add field without changing existing keys - GIN keys changed (added "note"), NOT HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "note": "test"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 0 | 0.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- GIN Index with Unchanged Keys
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- Create GIN index on specific path
+CREATE INDEX t_gin_idx ON t USING gin((data->'tags'));
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "sql"], "status": "active"}');
+-- Change non-indexed field - GIN keys on 'tags' unchanged, should be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Change indexed tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- GIN with jsonb_path_ops
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data jsonb_path_ops);
+INSERT INTO t VALUES (1, '{"user": {"name": "alice"}, "tags": ["a", "b"]}');
+-- Change value at different path - keys changed, NOT HOT
+UPDATE t SET data = '{"user": {"name": "bob"}, "tags": ["a", "b"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- Mixed Index Types (BRIN + Expression)
+-- ================================================================
+CREATE TABLE t(id INT, value INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_idx ON t USING brin(value);
+CREATE INDEX t_expr_idx ON t((data->'status'));
+INSERT INTO t VALUES (1, 100, '{"status": "active"}');
+-- Update only BRIN column - should be HOT
+UPDATE t SET value = 200 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update only expression column - should NOT be HOT
+UPDATE t SET data = '{"status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update both - should NOT be HOT
+UPDATE t SET value = 300, data = '{"status": "pending"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- GIN Array Index - Order Insensitive Extraction
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ data JSONB
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+-- GIN index on JSONB array (extracts all elements)
+CREATE INDEX t_items_gin ON t USING GIN ((data->'items'));
+INSERT INTO t VALUES (1, '{"items": [1, 2, 3], "status": "active"}');
+-- Update: Reorder array elements
+-- JSONB equality: NOT equal (different arrays)
+-- GIN extraction: Same elements extracted (might allow HOT if not careful)
+UPDATE t SET data = '{"items": [3, 2, 1], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update: Add/remove element
+UPDATE t SET data = '{"items": [1, 2, 3, 4], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- TEST: GIN with TOASTed TEXT (tsvector)
+-- ================================================================
+CREATE TABLE t(id INT, content TEXT, search_vec tsvector)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- Create trigger to maintain tsvector
+CREATE TRIGGER tsvectorupdate_toast
+ BEFORE INSERT OR UPDATE ON t
+ FOR EACH ROW EXECUTE FUNCTION
+ tsvector_update_trigger(search_vec, 'pg_catalog.english', content);
+CREATE INDEX t_gin ON t USING gin(search_vec);
+-- Insert with large content (will be TOASTed)
+INSERT INTO t (id, content) VALUES
+ (1, repeat('important keyword ', 1000) || repeat('filler text ', 10000));
+-- Verify initial state
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('important');
+ count
+-------
+ 1
+(1 row)
+
+-- Expected: 1 row
+-- IMPORTANT: The BEFORE UPDATE trigger modifies search_vec, so by the time
+-- ExecWhichIndexesRequireUpdates() runs, search_vec has already changed.
+-- This means the comparison sees old tsvector vs. trigger-modified tsvector,
+-- not the natural progression. HOT won't happen because the trigger changed
+-- the indexed column.
+-- Update: Even though content keywords unchanged, trigger still fires
+UPDATE t
+SET content = repeat('important keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (trigger modifies search_vec, blocking HOT)
+-- This is actually correct behavior - the trigger updated an indexed column
+-- Update: Change indexed keywords
+UPDATE t
+SET content = repeat('critical keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (index keys changed)
+-- Verify query correctness
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('critical');
+ count
+-------
+ 1
+(1 row)
+
+-- Expected: 1 row
+DROP TABLE t CASCADE;
+-- ================================================================
+-- TEST: GIN with Array of Large Strings
+-- ================================================================
+CREATE TABLE t(id INT, tags TEXT[])
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin ON t USING gin(tags);
+-- Insert with large array elements (might be TOASTed)
+INSERT INTO t (id, tags) VALUES
+ (1, ARRAY[repeat('tag1', 1000), repeat('tag2', 1000)]);
+-- Update: Change to different large values - NOT HOT
+UPDATE t
+SET tags = ARRAY[repeat('tag3', 1000), repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (keys actually changed)
+-- Update: Keep same tag values, just reorder - SHOULD BE HOT
+-- (GIN is order-insensitive: both [tag3,tag4] and [tag4,tag3]
+-- extract to the same sorted key set ['tag3','tag4'])
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000), repeat('tag3', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Expected: 1 HOT (GIN keys semantically identical)
+-- Update: Remove an element - NOT HOT (keys changed)
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Expected: Still 1 HOT (not this one)
+DROP TABLE t CASCADE;
+-- ================================================================
+-- BRIN Index with Partial Predicate
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ value INT,
+ description TEXT
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_partial_idx ON t USING brin(value) WHERE value > 100;
+INSERT INTO t VALUES (1, 50, 'below range');
+-- Test 1: Outside predicate
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Validate: Predicate query returns 0 rows
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+ cnt
+-----
+ 0
+(1 row)
+
+-- Test 2: Transition into predicate
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Validate: Predicate query returns 1 row with correct value
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+ cnt | max_val
+-----+---------
+ 1 | 150
+(1 row)
+
+-- Test 3: Inside predicate, value changes
+UPDATE t SET value = 160, description = 'updated again' WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 3 | 100.00 | t
+(1 row)
+
+-- Validate: Updated value (160) is returned
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+ cnt | max_val
+-----+---------
+ 1 | 160
+(1 row)
+
+-- Test 4: Transition out of predicate
+UPDATE t SET value = 50 WHERE id = 1;
+SELECT * FROM check_hot_updates(4);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 4 | 100.00 | t
+(1 row)
+
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+ cnt
+-----
+ 0
+(1 row)
+
+SELECT id, value, description FROM t;
+ id | value | description
+----+-------+---------------
+ 1 | 50 | updated again
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- HASH Index (Simple Column)
+-- ================================================================
+CREATE TABLE t(id INT, code VARCHAR(20), description TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_idx ON t USING hash(code);
+INSERT INTO t VALUES (1, 'CODE001', 'initial');
+-- Update non-indexed column - should be HOT
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update indexed column - HASH index requires update, NOT HOT
+UPDATE t SET code = 'CODE002' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update both - NOT HOT
+UPDATE t SET code = 'CODE003', description = 'changed' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Back to original code - NOT HOT (different hash bucket location)
+UPDATE t SET code = 'CODE001' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 1 | 25.00 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- HASH Index on Expression
+-- ================================================================
+CREATE TABLE t(id INT, email TEXT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_lower_email_idx ON t USING HASH(lower(email));
+INSERT INTO t VALUES (1, '[email protected]', '{"status": "new"}');
+-- Update non-indexed field - should be HOT
+UPDATE t SET data = '{"status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update email with case change only (same lowercase) - should be HOT
+UPDATE t SET email = '[email protected]' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Update email to different lowercase - NOT HOT
+UPDATE t SET email = '[email protected]' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 2 | 66.67 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Multiple HASH Indexes
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, status VARCHAR, value INT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+CREATE INDEX t_hash_status_idx ON t USING hash(status);
+INSERT INTO t VALUES (1, 'electronics', 'active', 100);
+-- Update non-indexed column - should be HOT
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update one indexed column - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update other indexed column - NOT HOT
+UPDATE t SET status = 'inactive' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Update both indexed columns - NOT HOT
+UPDATE t SET category = 'videos', status = 'pending' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 1 | 25.00 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- BRIN vs HASH Comparison
+-- ================================================================
+CREATE TABLE t_brin(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE TABLE t_hash(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_value_idx ON t_brin USING brin(value);
+CREATE INDEX t_hash_value_idx ON t_hash USING hash(value);
+INSERT INTO t_brin VALUES (1, 100, 'initial');
+INSERT INTO t_hash VALUES (1, 100, 'initial');
+-- Same update on both - different HOT behavior expected
+-- BRIN: might allow HOT (range summary unchanged)
+-- HASH: blocks HOT (hash bucket changed)
+UPDATE t_brin SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1, 't_brin');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t_brin | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Expected: 1 HOT (BRIN allows it for single row)
+UPDATE t_hash SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 't_hash');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t_hash | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (HASH blocks it)
+DROP TABLE t_brin CASCADE;
+DROP TABLE t_hash CASCADE;
+-- ================================================================
+-- HASH Index with NULL Values
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'electronics', 'initial');
+-- Update indexed column to NULL - NOT HOT (hash value changed)
+UPDATE t SET category = NULL WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT
+-- Update indexed column from NULL to value - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Expected: 1 HOT
+DROP TABLE t CASCADE;
+-- ================================================================
+-- BRIN on JSONB Field
+-- ================================================================
+CREATE TABLE t(id INT, metrics JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- BRIN doesn't directly support JSONB, but we can test on expression
+CREATE INDEX t_brin_count_idx ON t USING brin(
+ CAST(metrics->>'count' AS INTEGER)
+);
+INSERT INTO t VALUES (1, '{"count": "100", "timestamp": "2024-01-01"}');
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET metrics = '{"count": "100", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Expected: 1 HOT
+-- Update indexed field - BRIN allows HOT for single row
+UPDATE t SET metrics = '{"count": "150", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Expected: 2 HOT (BRIN permits single-row updates)
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Mixed BRIN + HASH on Same Table
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, timestamp TIMESTAMP, price NUMERIC, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_timestamp_idx ON t USING brin(timestamp);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'books', '2024-01-01 10:00:00', 29.99, 'initial');
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Expected: 1 HOT
+-- Update BRIN indexed column - allows HOT
+UPDATE t SET timestamp = '2024-01-02 10:00:00' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Expected: 2 HOT
+-- Update HASH indexed column - blocks HOT
+UPDATE t SET category = 'videos' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 2 | 66.67 | t
+(1 row)
+
+-- Expected: 2 HOT (HASH blocks it)
+-- Update price (non-indexed) - should be HOT
+UPDATE t SET price = 39.99 WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 3 | 75.00 | t
+(1 row)
+
+-- Expected: 3 HOT
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Index both on a field in a JSONB document, and the document
+-- ================================================================
+CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_idx ON t((docs->'name'));
+CREATE INDEX t_docs_col_idx ON t(docs);
+INSERT INTO t VALUES (1, '{"name": "john", "data": "some data"}');
+-- Update impacts index on whole docment attribute, can't go HOT
+UPDATE t SET docs='{"name": "john", "data": "some other data"}' WHERE id=1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- Cleanup
+DROP FUNCTION check_hot_updates(int, text, text);
+DROP COLLATION case_insensitive;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 021d57f66bb..2d6641992e9 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -125,6 +125,12 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
# ----------
test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate
+
+# ----------
+# Another group of parallel tests, these focused on heap HOT updates
+# ----------
+test: heap_hot_updates
+
# event_trigger depends on create_am and cannot run concurrently with
# any test that runs DDL
# oidjoins is read-only, though, and should run late for best coverage
diff --git a/src/test/regress/sql/heap_hot_updates.sql b/src/test/regress/sql/heap_hot_updates.sql
new file mode 100644
index 00000000000..e047bcddf5c
--- /dev/null
+++ b/src/test/regress/sql/heap_hot_updates.sql
@@ -0,0 +1,513 @@
+-- ================================================================
+-- Test Suite for Heap-only (HOT) Updates
+-- ================================================================
+
+-- Setup: Create function to measure HOT updates
+CREATE OR REPLACE FUNCTION check_hot_updates(
+ expected INT,
+ p_table_name TEXT DEFAULT 't',
+ p_schema_name TEXT DEFAULT current_schema()
+)
+RETURNS TABLE (
+ table_name TEXT,
+ total_updates BIGINT,
+ hot_updates BIGINT,
+ hot_update_percentage NUMERIC,
+ matches_expected BOOLEAN
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+ v_relid oid;
+ v_qualified_name TEXT;
+ v_hot_updates BIGINT;
+ v_updates BIGINT;
+ v_xact_hot_updates BIGINT;
+ v_xact_updates BIGINT;
+BEGIN
+ -- Force statistics update
+ PERFORM pg_stat_force_next_flush();
+
+ -- Get table OID
+ v_qualified_name := quote_ident(p_schema_name) || '.' || quote_ident(p_table_name);
+ v_relid := v_qualified_name::regclass;
+
+ IF v_relid IS NULL THEN
+ RAISE EXCEPTION 'Table %.% not found', p_schema_name, p_table_name;
+ END IF;
+
+ -- Get cumulative + transaction stats
+ v_hot_updates := COALESCE(pg_stat_get_tuples_hot_updated(v_relid), 0);
+ v_updates := COALESCE(pg_stat_get_tuples_updated(v_relid), 0);
+ v_xact_hot_updates := COALESCE(pg_stat_get_xact_tuples_hot_updated(v_relid), 0);
+ v_xact_updates := COALESCE(pg_stat_get_xact_tuples_updated(v_relid), 0);
+
+ v_hot_updates := v_hot_updates + v_xact_hot_updates;
+ v_updates := v_updates + v_xact_updates;
+
+ RETURN QUERY
+ SELECT
+ p_table_name::TEXT,
+ v_updates::BIGINT,
+ v_hot_updates::BIGINT,
+ CASE WHEN v_updates > 0
+ THEN ROUND((v_hot_updates::numeric / v_updates::numeric * 100)::numeric, 2)
+ ELSE 0
+ END,
+ (v_hot_updates = expected)::BOOLEAN;
+END;
+$$;
+
+CREATE COLLATION case_insensitive (
+ provider = libc,
+ locale = 'C'
+);
+
+
+-- ================================================================
+-- GIN Index on JSONB
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data);
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "database"]}');
+
+-- Change tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+-- Change tags again - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+-- Add field without changing existing keys - GIN keys changed (added "note"), NOT HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "note": "test"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+DROP TABLE t;
+
+
+-- ================================================================
+-- GIN Index with Unchanged Keys
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- Create GIN index on specific path
+CREATE INDEX t_gin_idx ON t USING gin((data->'tags'));
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "sql"], "status": "active"}');
+
+-- Change non-indexed field - GIN keys on 'tags' unchanged, should be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Change indexed tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+
+-- ================================================================
+-- GIN with jsonb_path_ops
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data jsonb_path_ops);
+INSERT INTO t VALUES (1, '{"user": {"name": "alice"}, "tags": ["a", "b"]}');
+
+-- Change value at different path - keys changed, NOT HOT
+UPDATE t SET data = '{"user": {"name": "bob"}, "tags": ["a", "b"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+DROP TABLE t;
+
+
+-- ================================================================
+-- Mixed Index Types (BRIN + Expression)
+-- ================================================================
+CREATE TABLE t(id INT, value INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_idx ON t USING brin(value);
+CREATE INDEX t_expr_idx ON t((data->'status'));
+INSERT INTO t VALUES (1, 100, '{"status": "active"}');
+
+-- Update only BRIN column - should be HOT
+UPDATE t SET value = 200 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update only expression column - should NOT be HOT
+UPDATE t SET data = '{"status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update both - should NOT be HOT
+UPDATE t SET value = 300, data = '{"status": "pending"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+
+-- ================================================================
+-- GIN Array Index - Order Insensitive Extraction
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ data JSONB
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+
+-- GIN index on JSONB array (extracts all elements)
+CREATE INDEX t_items_gin ON t USING GIN ((data->'items'));
+
+INSERT INTO t VALUES (1, '{"items": [1, 2, 3], "status": "active"}');
+
+-- Update: Reorder array elements
+-- JSONB equality: NOT equal (different arrays)
+-- GIN extraction: Same elements extracted (might allow HOT if not careful)
+UPDATE t SET data = '{"items": [3, 2, 1], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update: Add/remove element
+UPDATE t SET data = '{"items": [1, 2, 3, 4], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+
+-- ================================================================
+-- TEST: GIN with TOASTed TEXT (tsvector)
+-- ================================================================
+CREATE TABLE t(id INT, content TEXT, search_vec tsvector)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+
+-- Create trigger to maintain tsvector
+CREATE TRIGGER tsvectorupdate_toast
+ BEFORE INSERT OR UPDATE ON t
+ FOR EACH ROW EXECUTE FUNCTION
+ tsvector_update_trigger(search_vec, 'pg_catalog.english', content);
+
+CREATE INDEX t_gin ON t USING gin(search_vec);
+
+-- Insert with large content (will be TOASTed)
+INSERT INTO t (id, content) VALUES
+ (1, repeat('important keyword ', 1000) || repeat('filler text ', 10000));
+
+-- Verify initial state
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('important');
+-- Expected: 1 row
+
+-- IMPORTANT: The BEFORE UPDATE trigger modifies search_vec, so by the time
+-- ExecWhichIndexesRequireUpdates() runs, search_vec has already changed.
+-- This means the comparison sees old tsvector vs. trigger-modified tsvector,
+-- not the natural progression. HOT won't happen because the trigger changed
+-- the indexed column.
+
+-- Update: Even though content keywords unchanged, trigger still fires
+UPDATE t
+SET content = repeat('important keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT (trigger modifies search_vec, blocking HOT)
+-- This is actually correct behavior - the trigger updated an indexed column
+
+-- Update: Change indexed keywords
+UPDATE t
+SET content = repeat('critical keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT (index keys changed)
+
+-- Verify query correctness
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('critical');
+-- Expected: 1 row
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- TEST: GIN with Array of Large Strings
+-- ================================================================
+CREATE TABLE t(id INT, tags TEXT[])
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin ON t USING gin(tags);
+
+-- Insert with large array elements (might be TOASTed)
+INSERT INTO t (id, tags) VALUES
+ (1, ARRAY[repeat('tag1', 1000), repeat('tag2', 1000)]);
+
+-- Update: Change to different large values - NOT HOT
+UPDATE t
+SET tags = ARRAY[repeat('tag3', 1000), repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT (keys actually changed)
+
+-- Update: Keep same tag values, just reorder - SHOULD BE HOT
+-- (GIN is order-insensitive: both [tag3,tag4] and [tag4,tag3]
+-- extract to the same sorted key set ['tag3','tag4'])
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000), repeat('tag3', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT (GIN keys semantically identical)
+
+-- Update: Remove an element - NOT HOT (keys changed)
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: Still 1 HOT (not this one)
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- BRIN Index with Partial Predicate
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ value INT,
+ description TEXT
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+
+CREATE INDEX t_brin_partial_idx ON t USING brin(value) WHERE value > 100;
+
+INSERT INTO t VALUES (1, 50, 'below range');
+
+-- Test 1: Outside predicate
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Validate: Predicate query returns 0 rows
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+
+-- Test 2: Transition into predicate
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+-- Validate: Predicate query returns 1 row with correct value
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+
+-- Test 3: Inside predicate, value changes
+UPDATE t SET value = 160, description = 'updated again' WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+
+-- Validate: Updated value (160) is returned
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+
+-- Test 4: Transition out of predicate
+UPDATE t SET value = 50 WHERE id = 1;
+SELECT * FROM check_hot_updates(4);
+
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+
+SELECT id, value, description FROM t;
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- HASH Index (Simple Column)
+-- ================================================================
+CREATE TABLE t(id INT, code VARCHAR(20), description TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_idx ON t USING hash(code);
+INSERT INTO t VALUES (1, 'CODE001', 'initial');
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update indexed column - HASH index requires update, NOT HOT
+UPDATE t SET code = 'CODE002' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update both - NOT HOT
+UPDATE t SET code = 'CODE003', description = 'changed' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Back to original code - NOT HOT (different hash bucket location)
+UPDATE t SET code = 'CODE001' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- HASH Index on Expression
+-- ================================================================
+CREATE TABLE t(id INT, email TEXT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_lower_email_idx ON t USING HASH(lower(email));
+INSERT INTO t VALUES (1, '[email protected]', '{"status": "new"}');
+
+-- Update non-indexed field - should be HOT
+UPDATE t SET data = '{"status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update email with case change only (same lowercase) - should be HOT
+UPDATE t SET email = '[email protected]' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+-- Update email to different lowercase - NOT HOT
+UPDATE t SET email = '[email protected]' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- Multiple HASH Indexes
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, status VARCHAR, value INT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+CREATE INDEX t_hash_status_idx ON t USING hash(status);
+INSERT INTO t VALUES (1, 'electronics', 'active', 100);
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update one indexed column - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update other indexed column - NOT HOT
+UPDATE t SET status = 'inactive' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update both indexed columns - NOT HOT
+UPDATE t SET category = 'videos', status = 'pending' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- BRIN vs HASH Comparison
+-- ================================================================
+CREATE TABLE t_brin(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE TABLE t_hash(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+
+CREATE INDEX t_brin_value_idx ON t_brin USING brin(value);
+CREATE INDEX t_hash_value_idx ON t_hash USING hash(value);
+
+INSERT INTO t_brin VALUES (1, 100, 'initial');
+INSERT INTO t_hash VALUES (1, 100, 'initial');
+
+-- Same update on both - different HOT behavior expected
+-- BRIN: might allow HOT (range summary unchanged)
+-- HASH: blocks HOT (hash bucket changed)
+UPDATE t_brin SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1, 't_brin');
+-- Expected: 1 HOT (BRIN allows it for single row)
+
+UPDATE t_hash SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 't_hash');
+-- Expected: 0 HOT (HASH blocks it)
+
+DROP TABLE t_brin CASCADE;
+DROP TABLE t_hash CASCADE;
+
+
+-- ================================================================
+-- HASH Index with NULL Values
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'electronics', 'initial');
+
+-- Update indexed column to NULL - NOT HOT (hash value changed)
+UPDATE t SET category = NULL WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT
+
+-- Update indexed column from NULL to value - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- BRIN on JSONB Field
+-- ================================================================
+CREATE TABLE t(id INT, metrics JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- BRIN doesn't directly support JSONB, but we can test on expression
+CREATE INDEX t_brin_count_idx ON t USING brin(
+ CAST(metrics->>'count' AS INTEGER)
+);
+INSERT INTO t VALUES (1, '{"count": "100", "timestamp": "2024-01-01"}');
+
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET metrics = '{"count": "100", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT
+
+-- Update indexed field - BRIN allows HOT for single row
+UPDATE t SET metrics = '{"count": "150", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+-- Expected: 2 HOT (BRIN permits single-row updates)
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- Mixed BRIN + HASH on Same Table
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, timestamp TIMESTAMP, price NUMERIC, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_timestamp_idx ON t USING brin(timestamp);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'books', '2024-01-01 10:00:00', 29.99, 'initial');
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT
+
+-- Update BRIN indexed column - allows HOT
+UPDATE t SET timestamp = '2024-01-02 10:00:00' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+-- Expected: 2 HOT
+
+-- Update HASH indexed column - blocks HOT
+UPDATE t SET category = 'videos' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+-- Expected: 2 HOT (HASH blocks it)
+
+-- Update price (non-indexed) - should be HOT
+UPDATE t SET price = 39.99 WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+-- Expected: 3 HOT
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- Index both on a field in a JSONB document, and the document
+-- ================================================================
+CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_idx ON t((docs->'name'));
+CREATE INDEX t_docs_col_idx ON t(docs);
+INSERT INTO t VALUES (1, '{"name": "john", "data": "some data"}');
+
+-- Update impacts index on whole docment attribute, can't go HOT
+UPDATE t SET docs='{"name": "john", "data": "some other data"}' WHERE id=1;
+SELECT * FROM check_hot_updates(0);
+
+DROP TABLE t CASCADE;
+
+
+-- Cleanup
+DROP FUNCTION check_hot_updates(int, text, text);
+DROP COLLATION case_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 09e7f1d420e..637990ac252 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -399,6 +399,7 @@ CachedFunctionCompileCallback
CachedFunctionDeleteCallback
CachedFunctionHashEntry
CachedFunctionHashKey
+CachedIndexDatum
CachedPlan
CachedPlanSource
CallContext
--
2.51.2
^ permalink raw reply [nested|flat] 2+ messages in thread
* Re: Expanding HOT updates for expression and partial indexes
2026-01-08 20:25 Re: Expanding HOT updates for expression and partial indexes Greg Burd <[email protected]>
@ 2026-01-13 14:54 ` Greg Burd <[email protected]>
0 siblings, 0 replies; 2+ messages in thread
From: Greg Burd @ 2026-01-13 14:54 UTC (permalink / raw)
To: pgsql-hackers
Rebased again to address build failure.
-greg
Attachments:
[application/octet-stream] v28-0001-Prepare-heapam_tuple_update-and-simple_heap_upda.patch (47.8K, 2-v28-0001-Prepare-heapam_tuple_update-and-simple_heap_upda.patch)
download | inline diff:
From 60213e898f9f829d05ffe97df4b6186a233aa840 Mon Sep 17 00:00:00 2001
From: Greg Burd <[email protected]>
Date: Sun, 2 Nov 2025 11:36:20 -0500
Subject: [PATCH v28 1/4] Prepare heapam_tuple_update() and
simple_heap_update() for divergence
This commit lays the foundation for larger changes to come by taking the
first portion of heap_update() through the HeapDeterminColumnsInfo() and
replicating that logic in both heapam_tuple_update() and
simple_heap_upate(). This is done so that these two paths might diverge
in implementation later on. The simple_heap_update() path deals solely
with updates to catalog tuples which could record their modified
attributes rather than relearn them. The remaining calls from the
executor into the table AM update API could include the set of updated
attributes. This is foreshadowing... of course, as that's what the next
commit will start to do.
As part of this reorganization, the handling of replica identity key
attributes has been adjusted. Instead of fetching a second copy of
the bitmap during an update operation, the caller is now required to
provide it. This change applies to both heap_update() and
heap_delete().
---
src/backend/access/heap/heapam.c | 568 +++++++++++------------
src/backend/access/heap/heapam_handler.c | 117 ++++-
src/include/access/heapam.h | 24 +-
3 files changed, 410 insertions(+), 299 deletions(-)
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index f30a56ecf55..8563dadc8da 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -39,18 +39,24 @@
#include "access/syncscan.h"
#include "access/valid.h"
#include "access/visibilitymap.h"
+#include "access/xact.h"
#include "access/xloginsert.h"
+#include "catalog/catalog.h"
#include "catalog/pg_database.h"
#include "catalog/pg_database_d.h"
#include "commands/vacuum.h"
+#include "nodes/bitmapset.h"
#include "pgstat.h"
#include "port/pg_bitutils.h"
+#include "storage/bufmgr.h"
+#include "storage/itemptr.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "storage/procarray.h"
#include "utils/datum.h"
#include "utils/injection_point.h"
#include "utils/inval.h"
+#include "utils/relcache.h"
#include "utils/spccache.h"
#include "utils/syscache.h"
@@ -62,16 +68,8 @@ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
HeapTuple newtup, HeapTuple old_key_tuple,
bool all_visible_cleared, bool new_all_visible_cleared);
#ifdef USE_ASSERT_CHECKING
-static void check_lock_if_inplace_updateable_rel(Relation relation,
- const ItemPointerData *otid,
- HeapTuple newtup);
static void check_inplace_rel_lock(HeapTuple oldtup);
#endif
-static Bitmapset *HeapDetermineColumnsInfo(Relation relation,
- Bitmapset *interesting_cols,
- Bitmapset *external_cols,
- HeapTuple oldtup, HeapTuple newtup,
- bool *has_external);
static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid,
LockTupleMode mode, LockWaitPolicy wait_policy,
bool *have_tuple_lock);
@@ -106,10 +104,10 @@ static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status
static void index_delete_sort(TM_IndexDeleteOp *delstate);
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate);
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup);
-static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
+static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp,
+ Bitmapset *rid_attrs, bool key_required,
bool *copy);
-
/*
* Each tuple lock mode has a corresponding heavyweight lock, and one or two
* corresponding MultiXactStatuses (one to merely lock tuples, another one to
@@ -2861,6 +2859,7 @@ heap_delete(Relation relation, const ItemPointerData *tid,
Buffer buffer;
Buffer vmbuffer = InvalidBuffer;
TransactionId new_xmax;
+ Bitmapset *rid_attrs;
uint16 new_infomask,
new_infomask2;
bool have_tuple_lock = false;
@@ -2873,6 +2872,8 @@ heap_delete(Relation relation, const ItemPointerData *tid,
AssertHasSnapshotForToast(relation);
+ rid_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
/*
* Forbid this during a parallel operation, lest it allocate a combo CID.
* Other workers might need that combo CID for visibility checks, and we
@@ -3076,6 +3077,7 @@ l1:
UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
+ bms_free(rid_attrs);
return result;
}
@@ -3097,7 +3099,10 @@ l1:
* Compute replica identity tuple before entering the critical section so
* we don't PANIC upon a memory allocation failure.
*/
- old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
+ old_key_tuple = ExtractReplicaIdentity(relation, &tp, rid_attrs,
+ true, &old_key_copied);
+ bms_free(rid_attrs);
+ rid_attrs = NULL;
/*
* If this is the first possibly-multixact-able operation in the current
@@ -3309,7 +3314,10 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid)
* heap_update - replace a tuple
*
* See table_tuple_update() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a heap tuple rather than a slot.
+ *
+ * It's required that the caller has acquired the pin and lock on the buffer.
+ * That lock and pin will be managed here, not in the caller.
*
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -3317,30 +3325,21 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid)
* generated by another transaction).
*/
TM_Result
-heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
- CommandId cid, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes)
+heap_update(Relation relation, HeapTupleData *oldtup,
+ HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait,
+ TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer,
+ Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs,
+ Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs,
+ Bitmapset *mix_attrs, Buffer *vmbuffer,
+ bool rep_id_key_required, TU_UpdateIndexes *update_indexes)
{
TM_Result result;
TransactionId xid = GetCurrentTransactionId();
- Bitmapset *hot_attrs;
- Bitmapset *sum_attrs;
- Bitmapset *key_attrs;
- Bitmapset *id_attrs;
- Bitmapset *interesting_attrs;
- Bitmapset *modified_attrs;
- ItemId lp;
- HeapTupleData oldtup;
HeapTuple heaptup;
HeapTuple old_key_tuple = NULL;
bool old_key_copied = false;
- Page page;
- BlockNumber block;
MultiXactStatus mxact_status;
- Buffer buffer,
- newbuf,
- vmbuffer = InvalidBuffer,
+ Buffer newbuf,
vmbuffer_new = InvalidBuffer;
bool need_toast;
Size newtupsize,
@@ -3354,7 +3353,6 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
bool all_visible_cleared_new = false;
bool checked_lockers;
bool locker_remains;
- bool id_has_external = false;
TransactionId xmax_new_tuple,
xmax_old_tuple;
uint16 infomask_old_tuple,
@@ -3362,144 +3360,13 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
infomask_new_tuple,
infomask2_new_tuple;
- Assert(ItemPointerIsValid(otid));
-
- /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
- Assert(HeapTupleHeaderGetNatts(newtup->t_data) <=
- RelationGetNumberOfAttributes(relation));
-
+ Assert(BufferIsLockedByMe(buffer));
+ Assert(ItemIdIsNormal(lp));
AssertHasSnapshotForToast(relation);
- /*
- * Forbid this during a parallel operation, lest it allocate a combo CID.
- * Other workers might need that combo CID for visibility checks, and we
- * have no provision for broadcasting it to them.
- */
- if (IsInParallelMode())
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
- errmsg("cannot update tuples during a parallel operation")));
-
-#ifdef USE_ASSERT_CHECKING
- check_lock_if_inplace_updateable_rel(relation, otid, newtup);
-#endif
-
- /*
- * Fetch the list of attributes to be checked for various operations.
- *
- * For HOT considerations, this is wasted effort if we fail to update or
- * have to put the new tuple on a different page. But we must compute the
- * list before obtaining buffer lock --- in the worst case, if we are
- * doing an update on one of the relevant system catalogs, we could
- * deadlock if we try to fetch the list later. In any case, the relcache
- * caches the data so this is usually pretty cheap.
- *
- * We also need columns used by the replica identity and columns that are
- * considered the "key" of rows in the table.
- *
- * Note that we get copies of each bitmap, so we need not worry about
- * relcache flush happening midway through.
- */
- hot_attrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_HOT_BLOCKING);
- sum_attrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_SUMMARIZED);
- key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
- id_attrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_IDENTITY_KEY);
- interesting_attrs = NULL;
- interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
- interesting_attrs = bms_add_members(interesting_attrs, sum_attrs);
- interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
- interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
-
- block = ItemPointerGetBlockNumber(otid);
- INJECTION_POINT("heap_update-before-pin", NULL);
- buffer = ReadBuffer(relation, block);
- page = BufferGetPage(buffer);
-
- /*
- * Before locking the buffer, pin the visibility map page if it appears to
- * be necessary. Since we haven't got the lock yet, someone else might be
- * in the middle of changing this, so we'll need to recheck after we have
- * the lock.
- */
- if (PageIsAllVisible(page))
- visibilitymap_pin(relation, block, &vmbuffer);
-
- LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
-
- lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
-
- /*
- * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
- * we see LP_NORMAL here. When the otid origin is a syscache, we may have
- * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
- * of which indicates concurrent pruning.
- *
- * Failing with TM_Updated would be most accurate. However, unlike other
- * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
- * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
- * does matter to SQL statements UPDATE and MERGE, those SQL statements
- * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
- * TM_Updated and TM_Deleted affects only the wording of error messages.
- * Settle on TM_Deleted, for two reasons. First, it avoids complicating
- * the specification of when tmfd->ctid is valid. Second, it creates
- * error log evidence that we took this branch.
- *
- * Since it's possible to see LP_UNUSED at otid, it's also possible to see
- * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
- * unrelated row, we'll fail with "duplicate key value violates unique".
- * XXX if otid is the live, newer version of the newtup row, we'll discard
- * changes originating in versions of this catalog row after the version
- * the caller got from syscache. See syscache-update-pruned.spec.
- */
- if (!ItemIdIsNormal(lp))
- {
- Assert(RelationSupportsSysCache(RelationGetRelid(relation)));
-
- UnlockReleaseBuffer(buffer);
- Assert(!have_tuple_lock);
- if (vmbuffer != InvalidBuffer)
- ReleaseBuffer(vmbuffer);
- tmfd->ctid = *otid;
- tmfd->xmax = InvalidTransactionId;
- tmfd->cmax = InvalidCommandId;
- *update_indexes = TU_None;
-
- bms_free(hot_attrs);
- bms_free(sum_attrs);
- bms_free(key_attrs);
- bms_free(id_attrs);
- /* modified_attrs not yet initialized */
- bms_free(interesting_attrs);
- return TM_Deleted;
- }
-
- /*
- * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
- * properly.
- */
- oldtup.t_tableOid = RelationGetRelid(relation);
- oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
- oldtup.t_len = ItemIdGetLength(lp);
- oldtup.t_self = *otid;
-
- /* the new tuple is ready, except for this: */
+ /* The new tuple is ready, except for this */
newtup->t_tableOid = RelationGetRelid(relation);
- /*
- * Determine columns modified by the update. Additionally, identify
- * whether any of the unmodified replica identity key attributes in the
- * old tuple is externally stored or not. This is required because for
- * such attributes the flattened value won't be WAL logged as part of the
- * new tuple so we must include it as part of the old_key_tuple. See
- * ExtractReplicaIdentity.
- */
- modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
- id_attrs, &oldtup,
- newtup, &id_has_external);
-
/*
* If we're not updating any "key" column, we can grab a weaker lock type.
* This allows for more concurrency when we are running simultaneously
@@ -3511,7 +3378,7 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
* is updates that don't manipulate key columns, not those that
* serendipitously arrive at the same key values.
*/
- if (!bms_overlap(modified_attrs, key_attrs))
+ if (!bms_overlap(mix_attrs, pk_attrs))
{
*lockmode = LockTupleNoKeyExclusive;
mxact_status = MultiXactStatusNoKeyUpdate;
@@ -3535,17 +3402,10 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
key_intact = false;
}
- /*
- * Note: beyond this point, use oldtup not otid to refer to old tuple.
- * otid may very well point at newtup->t_self, which we will overwrite
- * with the new tuple's location, so there's great risk of confusion if we
- * use otid anymore.
- */
-
l2:
checked_lockers = false;
locker_remains = false;
- result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
+ result = HeapTupleSatisfiesUpdate(oldtup, cid, buffer);
/* see below about the "no wait" case */
Assert(result != TM_BeingModified || wait);
@@ -3577,8 +3437,8 @@ l2:
*/
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
- infomask = oldtup.t_data->t_infomask;
+ xwait = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+ infomask = oldtup->t_data->t_infomask;
/*
* Now we have to do something about the existing locker. If it's a
@@ -3618,13 +3478,12 @@ l2:
* requesting a lock and already have one; avoids deadlock).
*/
if (!current_is_member)
- heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
+ heap_acquire_tuplock(relation, &oldtup->t_self, *lockmode,
LockWaitBlock, &have_tuple_lock);
/* wait for multixact */
MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
- relation, &oldtup.t_self, XLTW_Update,
- &remain);
+ relation, &oldtup->t_self, XLTW_Update, &remain);
checked_lockers = true;
locker_remains = remain != 0;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3634,9 +3493,9 @@ l2:
* could update this tuple before we get to this point. Check
* for xmax change, and start over if so.
*/
- if (xmax_infomask_changed(oldtup.t_data->t_infomask,
+ if (xmax_infomask_changed(oldtup->t_data->t_infomask,
infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup->t_data),
xwait))
goto l2;
}
@@ -3661,8 +3520,8 @@ l2:
* before this one, which are important to keep in case this
* subxact aborts.
*/
- if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
- update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
+ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup->t_data->t_infomask))
+ update_xact = HeapTupleGetUpdateXid(oldtup->t_data);
else
update_xact = InvalidTransactionId;
@@ -3703,9 +3562,9 @@ l2:
* lock.
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
+ heap_acquire_tuplock(relation, &oldtup->t_self, *lockmode,
LockWaitBlock, &have_tuple_lock);
- XactLockTableWait(xwait, relation, &oldtup.t_self,
+ XactLockTableWait(xwait, relation, &oldtup->t_self,
XLTW_Update);
checked_lockers = true;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3715,20 +3574,20 @@ l2:
* other xact could update this tuple before we get to this point.
* Check for xmax change, and start over if so.
*/
- if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
+ if (xmax_infomask_changed(oldtup->t_data->t_infomask, infomask) ||
!TransactionIdEquals(xwait,
- HeapTupleHeaderGetRawXmax(oldtup.t_data)))
+ HeapTupleHeaderGetRawXmax(oldtup->t_data)))
goto l2;
/* Otherwise check if it committed or aborted */
- UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
- if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
+ UpdateXmaxHintBits(oldtup->t_data, buffer, xwait);
+ if (oldtup->t_data->t_infomask & HEAP_XMAX_INVALID)
can_continue = true;
}
if (can_continue)
result = TM_Ok;
- else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
+ else if (!ItemPointerEquals(&oldtup->t_self, &oldtup->t_data->t_ctid))
result = TM_Updated;
else
result = TM_Deleted;
@@ -3741,39 +3600,33 @@ l2:
result == TM_Updated ||
result == TM_Deleted ||
result == TM_BeingModified);
- Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
+ Assert(!(oldtup->t_data->t_infomask & HEAP_XMAX_INVALID));
Assert(result != TM_Updated ||
- !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
+ !ItemPointerEquals(&oldtup->t_self, &oldtup->t_data->t_ctid));
}
if (crosscheck != InvalidSnapshot && result == TM_Ok)
{
/* Perform additional check for transaction-snapshot mode RI updates */
- if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
+ if (!HeapTupleSatisfiesVisibility(oldtup, crosscheck, buffer))
result = TM_Updated;
}
if (result != TM_Ok)
{
- tmfd->ctid = oldtup.t_data->t_ctid;
- tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
+ tmfd->ctid = oldtup->t_data->t_ctid;
+ tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup->t_data);
if (result == TM_SelfModified)
- tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
+ tmfd->cmax = HeapTupleHeaderGetCmax(oldtup->t_data);
else
tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
if (have_tuple_lock)
- UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
- if (vmbuffer != InvalidBuffer)
- ReleaseBuffer(vmbuffer);
+ UnlockTupleTuplock(relation, &oldtup->t_self, *lockmode);
+ if (*vmbuffer != InvalidBuffer)
+ ReleaseBuffer(*vmbuffer);
*update_indexes = TU_None;
- bms_free(hot_attrs);
- bms_free(sum_attrs);
- bms_free(key_attrs);
- bms_free(id_attrs);
- bms_free(modified_attrs);
- bms_free(interesting_attrs);
return result;
}
@@ -3786,10 +3639,10 @@ l2:
* tuple has been locked or updated under us, but hopefully it won't
* happen very often.
*/
- if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
+ if (*vmbuffer == InvalidBuffer && PageIsAllVisible(page))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- visibilitymap_pin(relation, block, &vmbuffer);
+ visibilitymap_pin(relation, block, vmbuffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto l2;
}
@@ -3800,9 +3653,9 @@ l2:
* If the tuple we're updating is locked, we need to preserve the locking
* info in the old tuple's Xmax. Prepare a new Xmax value for this.
*/
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
- oldtup.t_data->t_infomask,
- oldtup.t_data->t_infomask2,
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup->t_data),
+ oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2,
xid, *lockmode, true,
&xmax_old_tuple, &infomask_old_tuple,
&infomask2_old_tuple);
@@ -3814,12 +3667,12 @@ l2:
* tuple. (In rare cases that might also be InvalidTransactionId and yet
* not have the HEAP_XMAX_INVALID bit set; that's fine.)
*/
- if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
- HEAP_LOCKED_UPGRADED(oldtup.t_data->t_infomask) ||
+ if ((oldtup->t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HEAP_LOCKED_UPGRADED(oldtup->t_data->t_infomask) ||
(checked_lockers && !locker_remains))
xmax_new_tuple = InvalidTransactionId;
else
- xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
+ xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup->t_data);
if (!TransactionIdIsValid(xmax_new_tuple))
{
@@ -3834,7 +3687,7 @@ l2:
* Note that since we're doing an update, the only possibility is that
* the lockers had FOR KEY SHARE lock.
*/
- if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
+ if (oldtup->t_data->t_infomask & HEAP_XMAX_IS_MULTI)
{
GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
&infomask2_new_tuple);
@@ -3862,7 +3715,7 @@ l2:
* Replace cid with a combo CID if necessary. Note that we already put
* the plain cid into the new tuple.
*/
- HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
+ HeapTupleHeaderAdjustCmax(oldtup->t_data, &cid, &iscombo);
/*
* If the toaster needs to be activated, OR if the new tuple will not fit
@@ -3879,12 +3732,12 @@ l2:
relation->rd_rel->relkind != RELKIND_MATVIEW)
{
/* toast table entries should never be recursively toasted */
- Assert(!HeapTupleHasExternal(&oldtup));
+ Assert(!HeapTupleHasExternal(oldtup));
Assert(!HeapTupleHasExternal(newtup));
need_toast = false;
}
else
- need_toast = (HeapTupleHasExternal(&oldtup) ||
+ need_toast = (HeapTupleHasExternal(oldtup) ||
HeapTupleHasExternal(newtup) ||
newtup->t_len > TOAST_TUPLE_THRESHOLD);
@@ -3917,9 +3770,9 @@ l2:
* updating, because the potentially created multixact would otherwise
* be wrong.
*/
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
- oldtup.t_data->t_infomask,
- oldtup.t_data->t_infomask2,
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup->t_data),
+ oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2,
xid, *lockmode, false,
&xmax_lock_old_tuple, &infomask_lock_old_tuple,
&infomask2_lock_old_tuple);
@@ -3929,18 +3782,18 @@ l2:
START_CRIT_SECTION();
/* Clear obsolete visibility flags ... */
- oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
- oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
- HeapTupleClearHotUpdated(&oldtup);
+ oldtup->t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ oldtup->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ HeapTupleClearHotUpdated(oldtup);
/* ... and store info about transaction updating this tuple */
Assert(TransactionIdIsValid(xmax_lock_old_tuple));
- HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
- oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
- oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
- HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
+ HeapTupleHeaderSetXmax(oldtup->t_data, xmax_lock_old_tuple);
+ oldtup->t_data->t_infomask |= infomask_lock_old_tuple;
+ oldtup->t_data->t_infomask2 |= infomask2_lock_old_tuple;
+ HeapTupleHeaderSetCmax(oldtup->t_data, cid, iscombo);
/* temporarily make it look not-updated, but locked */
- oldtup.t_data->t_ctid = oldtup.t_self;
+ oldtup->t_data->t_ctid = oldtup->t_self;
/*
* Clear all-frozen bit on visibility map if needed. We could
@@ -3949,7 +3802,7 @@ l2:
* worthwhile.
*/
if (PageIsAllVisible(page) &&
- visibilitymap_clear(relation, block, vmbuffer,
+ visibilitymap_clear(relation, block, *vmbuffer,
VISIBILITYMAP_ALL_FROZEN))
cleared_all_frozen = true;
@@ -3963,10 +3816,10 @@ l2:
XLogBeginInsert();
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
- xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
+ xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup->t_self);
xlrec.xmax = xmax_lock_old_tuple;
- xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
- oldtup.t_data->t_infomask2);
+ xlrec.infobits_set = compute_infobits(oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2);
xlrec.flags =
cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
XLogRegisterData(&xlrec, SizeOfHeapLock);
@@ -3988,7 +3841,7 @@ l2:
if (need_toast)
{
/* Note we always use WAL and FSM during updates */
- heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
+ heaptup = heap_toast_insert_or_update(relation, newtup, oldtup, 0);
newtupsize = MAXALIGN(heaptup->t_len);
}
else
@@ -4024,20 +3877,20 @@ l2:
/* It doesn't fit, must use RelationGetBufferForTuple. */
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
buffer, 0, NULL,
- &vmbuffer_new, &vmbuffer,
+ &vmbuffer_new, vmbuffer,
0);
/* We're all done. */
break;
}
/* Acquire VM page pin if needed and we don't have it. */
- if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
- visibilitymap_pin(relation, block, &vmbuffer);
+ if (*vmbuffer == InvalidBuffer && PageIsAllVisible(page))
+ visibilitymap_pin(relation, block, vmbuffer);
/* Re-acquire the lock on the old tuple's page. */
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* Re-check using the up-to-date free space */
pagefree = PageGetHeapFreeSpace(page);
if (newtupsize > pagefree ||
- (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
+ (*vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
{
/*
* Rats, it doesn't fit anymore, or somebody just now set the
@@ -4075,7 +3928,7 @@ l2:
* will include checking the relation level, there is no benefit to a
* separate check for the new tuple.
*/
- CheckForSerializableConflictIn(relation, &oldtup.t_self,
+ CheckForSerializableConflictIn(relation, &oldtup->t_self,
BufferGetBlockNumber(buffer));
/*
@@ -4083,7 +3936,6 @@ l2:
* has enough space for the new tuple. If they are the same buffer, only
* one pin is held.
*/
-
if (newbuf == buffer)
{
/*
@@ -4091,7 +3943,7 @@ l2:
* to do a HOT update. Check if any of the index columns have been
* changed.
*/
- if (!bms_overlap(modified_attrs, hot_attrs))
+ if (!bms_overlap(mix_attrs, hot_attrs))
{
use_hot_update = true;
@@ -4102,7 +3954,7 @@ l2:
* indexes if the columns were updated, or we may fail to detect
* e.g. value bound changes in BRIN minmax indexes.
*/
- if (bms_overlap(modified_attrs, sum_attrs))
+ if (bms_overlap(mix_attrs, sum_attrs))
summarized_update = true;
}
}
@@ -4119,10 +3971,8 @@ l2:
* logged. Pass old key required as true only if the replica identity key
* columns are modified or it has external data.
*/
- old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
- bms_overlap(modified_attrs, id_attrs) ||
- id_has_external,
- &old_key_copied);
+ old_key_tuple = ExtractReplicaIdentity(relation, oldtup, rid_attrs,
+ rep_id_key_required, &old_key_copied);
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -4144,7 +3994,7 @@ l2:
if (use_hot_update)
{
/* Mark the old tuple as HOT-updated */
- HeapTupleSetHotUpdated(&oldtup);
+ HeapTupleSetHotUpdated(oldtup);
/* And mark the new tuple as heap-only */
HeapTupleSetHeapOnly(heaptup);
/* Mark the caller's copy too, in case different from heaptup */
@@ -4153,7 +4003,7 @@ l2:
else
{
/* Make sure tuples are correctly marked as not-HOT */
- HeapTupleClearHotUpdated(&oldtup);
+ HeapTupleClearHotUpdated(oldtup);
HeapTupleClearHeapOnly(heaptup);
HeapTupleClearHeapOnly(newtup);
}
@@ -4162,17 +4012,17 @@ l2:
/* Clear obsolete visibility flags, possibly set by ourselves above... */
- oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
- oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ oldtup->t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ oldtup->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
/* ... and store info about transaction updating this tuple */
Assert(TransactionIdIsValid(xmax_old_tuple));
- HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
- oldtup.t_data->t_infomask |= infomask_old_tuple;
- oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
- HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
+ HeapTupleHeaderSetXmax(oldtup->t_data, xmax_old_tuple);
+ oldtup->t_data->t_infomask |= infomask_old_tuple;
+ oldtup->t_data->t_infomask2 |= infomask2_old_tuple;
+ HeapTupleHeaderSetCmax(oldtup->t_data, cid, iscombo);
/* record address of new tuple in t_ctid of old one */
- oldtup.t_data->t_ctid = heaptup->t_self;
+ oldtup->t_data->t_ctid = heaptup->t_self;
/* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
if (PageIsAllVisible(BufferGetPage(buffer)))
@@ -4180,7 +4030,7 @@ l2:
all_visible_cleared = true;
PageClearAllVisible(BufferGetPage(buffer));
visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
- vmbuffer, VISIBILITYMAP_VALID_BITS);
+ *vmbuffer, VISIBILITYMAP_VALID_BITS);
}
if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
{
@@ -4205,12 +4055,12 @@ l2:
*/
if (RelationIsAccessibleInLogicalDecoding(relation))
{
- log_heap_new_cid(relation, &oldtup);
+ log_heap_new_cid(relation, oldtup);
log_heap_new_cid(relation, heaptup);
}
recptr = log_heap_update(relation, buffer,
- newbuf, &oldtup, heaptup,
+ newbuf, oldtup, heaptup,
old_key_tuple,
all_visible_cleared,
all_visible_cleared_new);
@@ -4235,7 +4085,7 @@ l2:
* both tuple versions in one call to inval.c so we can avoid redundant
* sinval messages.)
*/
- CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
+ CacheInvalidateHeapTuple(relation, oldtup, heaptup);
/* Now we can release the buffer(s) */
if (newbuf != buffer)
@@ -4243,14 +4093,14 @@ l2:
ReleaseBuffer(buffer);
if (BufferIsValid(vmbuffer_new))
ReleaseBuffer(vmbuffer_new);
- if (BufferIsValid(vmbuffer))
- ReleaseBuffer(vmbuffer);
+ if (BufferIsValid(*vmbuffer))
+ ReleaseBuffer(*vmbuffer);
/*
* Release the lmgr tuple lock, if we had it.
*/
if (have_tuple_lock)
- UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
+ UnlockTupleTuplock(relation, &oldtup->t_self, *lockmode);
pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
@@ -4283,13 +4133,6 @@ l2:
if (old_key_tuple != NULL && old_key_copied)
heap_freetuple(old_key_tuple);
- bms_free(hot_attrs);
- bms_free(sum_attrs);
- bms_free(key_attrs);
- bms_free(id_attrs);
- bms_free(modified_attrs);
- bms_free(interesting_attrs);
-
return TM_Ok;
}
@@ -4298,7 +4141,7 @@ l2:
* Confirm adequate lock held during heap_update(), per rules from
* README.tuplock section "Locking to write inplace-updated tables".
*/
-static void
+void
check_lock_if_inplace_updateable_rel(Relation relation,
const ItemPointerData *otid,
HeapTuple newtup)
@@ -4470,7 +4313,7 @@ heap_attr_equals(TupleDesc tupdesc, int attrnum, Datum value1, Datum value2,
* listed as interesting) of the old tuple is a member of external_cols and is
* stored externally.
*/
-static Bitmapset *
+Bitmapset *
HeapDetermineColumnsInfo(Relation relation,
Bitmapset *interesting_cols,
Bitmapset *external_cols,
@@ -4553,25 +4396,175 @@ HeapDetermineColumnsInfo(Relation relation,
}
/*
- * simple_heap_update - replace a tuple
- *
- * This routine may be used to update a tuple when concurrent updates of
- * the target tuple are not expected (for example, because we have a lock
- * on the relation associated with the tuple). Any failure is reported
- * via ereport().
+ * This routine may be used to update a tuple when concurrent updates of the
+ * target tuple are not expected (for example, because we have a lock on the
+ * relation associated with the tuple). Any failure is reported via ereport().
*/
void
-simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup,
+simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tuple,
TU_UpdateIndexes *update_indexes)
{
TM_Result result;
TM_FailureData tmfd;
LockTupleMode lockmode;
+ Buffer buffer;
+ Buffer vmbuffer = InvalidBuffer;
+ Page page;
+ BlockNumber block;
+ Bitmapset *hot_attrs,
+ *sum_attrs,
+ *pk_attrs,
+ *rid_attrs,
+ *mix_attrs,
+ *idx_attrs;
+ ItemId lp;
+ HeapTupleData oldtup;
+ bool rep_id_key_required = false;
+
+ Assert(ItemPointerIsValid(otid));
+
+ /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
+ Assert(HeapTupleHeaderGetNatts(tuple->t_data) <=
+ RelationGetNumberOfAttributes(relation));
+
+ /*
+ * Forbid this during a parallel operation, lest it allocate a combo CID.
+ * Other workers might need that combo CID for visibility checks, and we
+ * have no provision for broadcasting it to them.
+ */
+ if (IsInParallelMode())
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot update tuples during a parallel operation")));
+
+#ifdef USE_ASSERT_CHECKING
+ check_lock_if_inplace_updateable_rel(relation, otid, tuple);
+#endif
+
+ /*
+ * Fetch the list of attributes to be checked for various operations.
+ *
+ * For HOT considerations, this is wasted effort if we fail to update or
+ * have to put the new tuple on a different page. But we must compute the
+ * list before obtaining buffer lock --- in the worst case, if we are
+ * doing an update on one of the relevant system catalogs, we could
+ * deadlock if we try to fetch the list later. In any case, the relcache
+ * caches the data so this is usually pretty cheap.
+ *
+ * We also need columns used by the replica identity and columns that are
+ * considered the "key" of rows in the table.
+ *
+ * Note that we get copies of each bitmap, so we need not worry about
+ * relcache flush happening midway through.
+ */
+ hot_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_HOT_BLOCKING);
+ sum_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_SUMMARIZED);
+ pk_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
+ rid_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+ idx_attrs = bms_copy(hot_attrs);
+ idx_attrs = bms_add_members(idx_attrs, sum_attrs);
+ idx_attrs = bms_add_members(idx_attrs, pk_attrs);
+ idx_attrs = bms_add_members(idx_attrs, rid_attrs);
+
+ block = ItemPointerGetBlockNumber(otid);
+ INJECTION_POINT("heap_update-before-pin", NULL);
+ buffer = ReadBuffer(relation, block);
+ page = BufferGetPage(buffer);
+
+ /*
+ * Before locking the buffer, pin the visibility map page if it appears to
+ * be necessary. Since we haven't got the lock yet, someone else might be
+ * in the middle of changing this, so we'll need to recheck after we have
+ * the lock.
+ */
+ if (PageIsAllVisible(page))
+ visibilitymap_pin(relation, block, &vmbuffer);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
+
+ /*
+ * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
+ * we see LP_NORMAL here. When the otid origin is a syscache, we may have
+ * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
+ * of which indicates concurrent pruning.
+ *
+ * Failing with TM_Updated would be most accurate. However, unlike other
+ * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
+ * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
+ * does matter to SQL statements UPDATE and MERGE, those SQL statements
+ * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
+ * TM_Updated and TM_Deleted affects only the wording of error messages.
+ * Settle on TM_Deleted, for two reasons. First, it avoids complicating
+ * the specification of when tmfd->ctid is valid. Second, it creates
+ * error log evidence that we took this branch.
+ *
+ * Since it's possible to see LP_UNUSED at otid, it's also possible to see
+ * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
+ * unrelated row, we'll fail with "duplicate key value violates unique".
+ * XXX if otid is the live, newer version of the newtup row, we'll discard
+ * changes originating in versions of this catalog row after the version
+ * the caller got from syscache. See syscache-update-pruned.spec.
+ */
+ if (!ItemIdIsNormal(lp))
+ {
+ Assert(RelationSupportsSysCache(RelationGetRelid(relation)));
+
+ UnlockReleaseBuffer(buffer);
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+ *update_indexes = TU_None;
+
+ bms_free(hot_attrs);
+ bms_free(sum_attrs);
+ bms_free(pk_attrs);
+ bms_free(rid_attrs);
+ bms_free(idx_attrs);
+ /* mix_attrs not yet initialized */
+
+ elog(ERROR, "tuple concurrently deleted");
+
+ return;
+ }
+
+ /*
+ * Partially construct the oldtup for HeapDetermineColumnsInfo to work and
+ * then pass that on to heap_update.
+ */
+ oldtup.t_tableOid = RelationGetRelid(relation);
+ oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ oldtup.t_len = ItemIdGetLength(lp);
+ oldtup.t_self = *otid;
+
+ mix_attrs = HeapDetermineColumnsInfo(relation, idx_attrs, rid_attrs,
+ &oldtup, tuple, &rep_id_key_required);
+
+ /*
+ * We'll need to WAL log the replica identity attributes if either they
+ * overlap with the modified indexed attributes or, as we've checked for
+ * just now in HeapDetermineColumnsInfo, they were unmodified external
+ * indexed attributes.
+ */
+ rep_id_key_required = rep_id_key_required || bms_overlap(mix_attrs, rid_attrs);
+
+ result = heap_update(relation, &oldtup, tuple, GetCurrentCommandId(true),
+ InvalidSnapshot, true /* wait for commit */ , &tmfd, &lockmode,
+ buffer, page, block, lp, hot_attrs, sum_attrs, pk_attrs,
+ rid_attrs, mix_attrs, &vmbuffer, rep_id_key_required,
+ update_indexes);
+
+ bms_free(hot_attrs);
+ bms_free(sum_attrs);
+ bms_free(pk_attrs);
+ bms_free(rid_attrs);
+ bms_free(mix_attrs);
+ bms_free(idx_attrs);
- result = heap_update(relation, otid, tup,
- GetCurrentCommandId(true), InvalidSnapshot,
- true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
switch (result)
{
case TM_SelfModified:
@@ -9227,12 +9220,11 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
* the same tuple that was passed in.
*/
static HeapTuple
-ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
- bool *copy)
+ExtractReplicaIdentity(Relation relation, HeapTuple tp, Bitmapset *rid_attrs,
+ bool key_required, bool *copy)
{
TupleDesc desc = RelationGetDescr(relation);
char replident = relation->rd_rel->relreplident;
- Bitmapset *idattrs;
HeapTuple key_tuple;
bool nulls[MaxHeapAttributeNumber];
Datum values[MaxHeapAttributeNumber];
@@ -9263,17 +9255,13 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
if (!key_required)
return NULL;
- /* find out the replica identity columns */
- idattrs = RelationGetIndexAttrBitmap(relation,
- INDEX_ATTR_BITMAP_IDENTITY_KEY);
-
/*
* If there's no defined replica identity columns, treat as !key_required.
* (This case should not be reachable from heap_update, since that should
* calculate key_required accurately. But heap_delete just passes
* constant true for key_required, so we can hit this case in deletes.)
*/
- if (bms_is_empty(idattrs))
+ if (bms_is_empty(rid_attrs))
return NULL;
/*
@@ -9286,7 +9274,7 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
for (int i = 0; i < desc->natts; i++)
{
if (bms_is_member(i + 1 - FirstLowInvalidHeapAttributeNumber,
- idattrs))
+ rid_attrs))
Assert(!nulls[i]);
else
nulls[i] = true;
@@ -9295,8 +9283,6 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
key_tuple = heap_form_tuple(desc, values, nulls);
*copy = true;
- bms_free(idattrs);
-
/*
* If the tuple, which by here only contains indexed columns, still has
* toasted columns, force them to be inlined. This is somewhat unlikely
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index cbef73e5d4b..54c021ff209 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -44,6 +44,7 @@
#include "storage/procarray.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
+#include "utils/injection_point.h"
#include "utils/rel.h"
static void reform_and_rewrite_tuple(HeapTuple tuple,
@@ -312,23 +313,133 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
}
-
static TM_Result
heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd,
LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
{
+ bool rep_id_key_required = false;
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+ HeapTupleData oldtup;
+ Buffer buffer;
+ Buffer vmbuffer = InvalidBuffer;
+ Page page;
+ BlockNumber block;
+ ItemId lp;
+ Bitmapset *hot_attrs,
+ *sum_attrs,
+ *pk_attrs,
+ *rid_attrs,
+ *mix_attrs,
+ *idx_attrs;
TM_Result result;
+ Assert(ItemPointerIsValid(otid));
+
+ /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
+ Assert(HeapTupleHeaderGetNatts(tuple->t_data) <=
+ RelationGetNumberOfAttributes(relation));
+
+ /*
+ * Forbid this during a parallel operation, lest it allocate a combo CID.
+ * Other workers might need that combo CID for visibility checks, and we
+ * have no provision for broadcasting it to them.
+ */
+ if (IsInParallelMode())
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
+ errmsg("cannot update tuples during a parallel operation")));
+
+#ifdef USE_ASSERT_CHECKING
+ check_lock_if_inplace_updateable_rel(relation, otid, tuple);
+#endif
+
+ /*
+ * Fetch the list of attributes to be checked for various operations.
+ *
+ * For HOT considerations, this is wasted effort if we fail to update or
+ * have to put the new tuple on a different page. But we must compute the
+ * list before obtaining buffer lock --- in the worst case, if we are
+ * doing an update on one of the relevant system catalogs, we could
+ * deadlock if we try to fetch the list later. In any case, the relcache
+ * caches the data so this is usually pretty cheap.
+ *
+ * We also need columns used by the replica identity and columns that are
+ * considered the "key" of rows in the table.
+ *
+ * Note that we get copies of each bitmap, so we need not worry about
+ * relcache flush happening midway through.
+ */
+ hot_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_HOT_BLOCKING);
+ sum_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_SUMMARIZED);
+ pk_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
+ rid_attrs = RelationGetIndexAttrBitmap(relation,
+ INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+ idx_attrs = bms_copy(hot_attrs);
+ idx_attrs = bms_add_members(idx_attrs, sum_attrs);
+ idx_attrs = bms_add_members(idx_attrs, pk_attrs);
+ idx_attrs = bms_add_members(idx_attrs, rid_attrs);
+
+ block = ItemPointerGetBlockNumber(otid);
+ INJECTION_POINT("heap_update-before-pin", NULL);
+ buffer = ReadBuffer(relation, block);
+ page = BufferGetPage(buffer);
+
+ /*
+ * Before locking the buffer, pin the visibility map page if it appears to
+ * be necessary. Since we haven't got the lock yet, someone else might be
+ * in the middle of changing this, so we'll need to recheck after we have
+ * the lock.
+ */
+ if (PageIsAllVisible(page))
+ visibilitymap_pin(relation, block, &vmbuffer);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
+
+ Assert(ItemIdIsNormal(lp));
+
+ /*
+ * Partially construct the oldtup for HeapDetermineColumnsInfo to work and
+ * then pass that on to heap_update.
+ */
+ oldtup.t_tableOid = RelationGetRelid(relation);
+ oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ oldtup.t_len = ItemIdGetLength(lp);
+ oldtup.t_self = *otid;
+
+ mix_attrs = HeapDetermineColumnsInfo(relation, idx_attrs, rid_attrs,
+ &oldtup, tuple, &rep_id_key_required);
+
+ /*
+ * We'll need to WAL log the replica identity attributes if either they
+ * overlap with the modified indexed attributes or, as we've checked for
+ * just now in HeapDetermineColumnsInfo, they were unmodified external
+ * indexed attributes.
+ */
+ rep_id_key_required = rep_id_key_required || bms_overlap(mix_attrs, rid_attrs);
+
/* Update the tuple with table oid */
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
- result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
- tmfd, lockmode, update_indexes);
+ result = heap_update(relation, &oldtup, tuple, cid, crosscheck, wait, tmfd, lockmode,
+ buffer, page, block, lp, hot_attrs, sum_attrs, pk_attrs,
+ rid_attrs, mix_attrs, &vmbuffer, rep_id_key_required, update_indexes);
+
+ bms_free(hot_attrs);
+ bms_free(sum_attrs);
+ bms_free(pk_attrs);
+ bms_free(rid_attrs);
+ bms_free(mix_attrs);
+ bms_free(idx_attrs);
+
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
/*
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 3c0961ab36b..38f944771db 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -364,11 +364,13 @@ extern TM_Result heap_delete(Relation relation, const ItemPointerData *tid,
TM_FailureData *tmfd, bool changingPart);
extern void heap_finish_speculative(Relation relation, const ItemPointerData *tid);
extern void heap_abort_speculative(Relation relation, const ItemPointerData *tid);
-extern TM_Result heap_update(Relation relation, const ItemPointerData *otid,
- HeapTuple newtup,
- CommandId cid, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes);
+extern TM_Result heap_update(Relation relation, HeapTupleData *oldtup,
+ HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait,
+ TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer,
+ Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs,
+ Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs,
+ Bitmapset *mix_attrs, Buffer *vmbuffer,
+ bool rep_id_key_required, TU_UpdateIndexes *update_indexes);
extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
bool follow_updates,
@@ -430,6 +432,18 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
OffsetNumber *dead, int ndead,
OffsetNumber *unused, int nunused);
+/* in heap/heapam.c */
+extern Bitmapset *HeapDetermineColumnsInfo(Relation relation,
+ Bitmapset *interesting_cols,
+ Bitmapset *external_cols,
+ HeapTuple oldtup, HeapTuple newtup,
+ bool *has_external);
+#ifdef USE_ASSERT_CHECKING
+extern void check_lock_if_inplace_updateable_rel(Relation relation,
+ const ItemPointerData *otid,
+ HeapTuple newtup);
+#endif
+
/* in heap/vacuumlazy.c */
extern void heap_vacuum_rel(Relation rel,
const VacuumParams params, BufferAccessStrategy bstrategy);
--
2.51.2
[application/octet-stream] v28-0004-Identify-if-partial-indexes-are-impacted-by-an-u.patch (3.8K, 3-v28-0004-Identify-if-partial-indexes-are-impacted-by-an-u.patch)
download | inline diff:
From b8e1dc6e625102c9f25c2019e4fc900d1125540f Mon Sep 17 00:00:00 2001
From: Greg Burd <[email protected]>
Date: Fri, 5 Dec 2025 13:42:13 -0500
Subject: [PATCH v28 4/4] Identify if partial indexes are impacted by an
update.
The executor now determines which, if any, attributes that are indexed
are both modified and force new index tuples to be inserted ahead of
calling into the table AM update function. Prior to this commit the
test for partial indexes happened after table update, this changes that
to before so that in cases where the before and after tuples both lie
outside the predicate the attributes for the predicate are not included
in the "modified indexed attributes" bitmapset.
---
src/backend/executor/nodeModifyTable.c | 53 ++++++++++++++++++++++++--
1 file changed, 49 insertions(+), 4 deletions(-)
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index ab0a343bcf6..fee3d75fa36 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -227,9 +227,11 @@ ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
Bitmapset *m_attrs = NULL; /* (possibly) modified indexed attrs */
Bitmapset *p_attrs = NULL; /* (possibly) modified predicate attrs */
Bitmapset *u_attrs = NULL; /* unmodified indexed attrs */
+ Bitmapset *pre_attrs = indexInfo->ii_PredicateAttrs;
bool has_am_compare = (amroutine->amcomparedatums != NULL);
bool supports_ios = (amroutine->amcanreturn != NULL);
bool is_partial = (indexInfo->ii_Predicate != NIL);
+ TupleTableSlot *save_scantuple;
ExprContext *econtext = GetPerTupleExprContext(estate);
int num_datums = supports_ios ?
indexInfo->ii_NumIndexAttrs : indexInfo->ii_NumIndexKeyAttrs;
@@ -238,9 +240,51 @@ ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
if (bms_is_subset(indexInfo->ii_IndexedAttrs, mix_attrs))
continue;
- /* Add partial index attributes */
- if (is_partial)
- p_attrs = bms_add_members(p_attrs, indexInfo->ii_PredicateAttrs);
+ /* Checking partial at this point isn't viable when we're serializable */
+ if (is_partial && IsolationIsSerializable())
+ {
+ p_attrs = bms_add_members(p_attrs, pre_attrs);
+ }
+ /* Check partial index predicate */
+ else if (is_partial)
+ {
+ ExprState *pstate;
+ bool old_qualifies,
+ new_qualifies;
+
+
+ if (!indexInfo->ii_CheckedPredicate)
+ pstate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+ else
+ pstate = indexInfo->ii_PredicateState;
+
+ save_scantuple = econtext->ecxt_scantuple;
+
+ econtext->ecxt_scantuple = old_tts;
+ old_qualifies = ExecQual(pstate, econtext);
+
+ econtext->ecxt_scantuple = new_tts;
+ new_qualifies = ExecQual(pstate, econtext);
+
+ econtext->ecxt_scantuple = save_scantuple;
+
+ indexInfo->ii_CheckedPredicate = true;
+ indexInfo->ii_PredicateState = pstate;
+ indexInfo->ii_PredicateSatisfied = new_qualifies;
+
+ /* Both outside predicate, index doesn't need update */
+ if (!old_qualifies && !new_qualifies)
+ continue;
+
+ /* A transition means we need to update the index */
+ if (old_qualifies != new_qualifies)
+ p_attrs = bms_copy(pre_attrs);
+
+ /*
+ * When both are within the predicate we must update this index,
+ * but only if one of the index key attributes changed.
+ */
+ }
/* Compare the index datums for equality */
for (int j = 0; j < num_datums; j++)
@@ -276,11 +320,12 @@ ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
*/
else if (rel_attrnum == 0)
{
- TupleTableSlot *save_scantuple = econtext->ecxt_scantuple;
Oid expr_type_oid;
Expr *expr = (Expr *) list_nth(indexInfo->ii_Expressions, nth_expr);
ExprState *state;
+ save_scantuple = econtext->ecxt_scantuple;
+
if (indexInfo->ii_ExpressionsState == NIL)
{
/* First time through, set up expression evaluation state */
--
2.51.2
[application/octet-stream] v28-0003-Replace-index_unchanged_by_update-with-ri_Change.patch (8.4K, 4-v28-0003-Replace-index_unchanged_by_update-with-ri_Change.patch)
download | inline diff:
From 327015f9f51ae0e329fc943a58e596b3ab7bea24 Mon Sep 17 00:00:00 2001
From: Greg Burd <[email protected]>
Date: Fri, 31 Oct 2025 14:55:25 -0400
Subject: [PATCH v28 3/4] Replace index_unchanged_by_update() with
ri_ChangedIndexedCols
In execIndexing on updates we'd like to pass a hint to the indexing code
when the indexed attributes are unchanged. This commit replaces the now
redundant code in index_unchanged_by_update() with the same information
found earlier in ExecWhichIndexesRequireUpdates() and stashed in
ri_ChangedIndexedCols.
---
src/backend/catalog/toasting.c | 2 -
src/backend/executor/execIndexing.c | 156 +---------------------------
src/backend/nodes/makefuncs.c | 2 -
src/include/nodes/execnodes.h | 4 -
4 files changed, 1 insertion(+), 163 deletions(-)
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index ff8da5be5f8..5675c6f8ea9 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -304,8 +304,6 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
indexInfo->ii_Unique = true;
indexInfo->ii_NullsNotDistinct = false;
indexInfo->ii_ReadyForInserts = true;
- indexInfo->ii_CheckedUnchanged = false;
- indexInfo->ii_IndexUnchanged = false;
indexInfo->ii_Concurrent = false;
indexInfo->ii_BrokenHotChain = false;
indexInfo->ii_ParallelWorkers = 0;
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index 1275feffae9..b75e76401d2 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -143,11 +143,6 @@ static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
static bool index_recheck_constraint(Relation index, const Oid *constr_procs,
const Datum *existing_values, const bool *existing_isnull,
const Datum *new_values);
-static bool index_unchanged_by_update(ResultRelInfo *resultRelInfo,
- EState *estate, IndexInfo *indexInfo,
- Relation indexRelation);
-static bool index_expression_changed_walker(Node *node,
- Bitmapset *allUpdatedCols);
static void ExecWithoutOverlapsNotEmpty(Relation rel, NameData attname, Datum attval,
char typtype, Oid atttypid);
@@ -451,10 +446,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
* index. If we're being called as part of an UPDATE statement,
* consider if the 'indexUnchanged' = true hint should be passed.
*/
- indexUnchanged = update && index_unchanged_by_update(resultRelInfo,
- estate,
- indexInfo,
- indexRelation);
+ indexUnchanged = update && bms_is_empty(resultRelInfo->ri_ChangedIndexedCols);
satisfiesConstraint =
index_insert(indexRelation, /* index relation */
@@ -1014,152 +1006,6 @@ index_recheck_constraint(Relation index, const Oid *constr_procs,
return true;
}
-/*
- * Check if ExecInsertIndexTuples() should pass indexUnchanged hint.
- *
- * When the executor performs an UPDATE that requires a new round of index
- * tuples, determine if we should pass 'indexUnchanged' = true hint for one
- * single index.
- */
-static bool
-index_unchanged_by_update(ResultRelInfo *resultRelInfo, EState *estate,
- IndexInfo *indexInfo, Relation indexRelation)
-{
- Bitmapset *updatedCols;
- Bitmapset *extraUpdatedCols;
- Bitmapset *allUpdatedCols;
- bool hasexpression = false;
- List *idxExprs;
-
- /*
- * Check cache first
- */
- if (indexInfo->ii_CheckedUnchanged)
- return indexInfo->ii_IndexUnchanged;
- indexInfo->ii_CheckedUnchanged = true;
-
- /*
- * Check for indexed attribute overlap with updated columns.
- *
- * Only do this for key columns. A change to a non-key column within an
- * INCLUDE index should not be counted here. Non-key column values are
- * opaque payload state to the index AM, a little like an extra table TID.
- *
- * Note that row-level BEFORE triggers won't affect our behavior, since
- * they don't affect the updatedCols bitmaps generally. It doesn't seem
- * worth the trouble of checking which attributes were changed directly.
- */
- updatedCols = ExecGetUpdatedCols(resultRelInfo, estate);
- extraUpdatedCols = ExecGetExtraUpdatedCols(resultRelInfo, estate);
- for (int attr = 0; attr < indexInfo->ii_NumIndexKeyAttrs; attr++)
- {
- int keycol = indexInfo->ii_IndexAttrNumbers[attr];
-
- if (keycol <= 0)
- {
- /*
- * Skip expressions for now, but remember to deal with them later
- * on
- */
- hasexpression = true;
- continue;
- }
-
- if (bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
- updatedCols) ||
- bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
- extraUpdatedCols))
- {
- /* Changed key column -- don't hint for this index */
- indexInfo->ii_IndexUnchanged = false;
- return false;
- }
- }
-
- /*
- * When we get this far and index has no expressions, return true so that
- * index_insert() call will go on to pass 'indexUnchanged' = true hint.
- *
- * The _absence_ of an indexed key attribute that overlaps with updated
- * attributes (in addition to the total absence of indexed expressions)
- * shows that the index as a whole is logically unchanged by UPDATE.
- */
- if (!hasexpression)
- {
- indexInfo->ii_IndexUnchanged = true;
- return true;
- }
-
- /*
- * Need to pass only one bms to expression_tree_walker helper function.
- * Avoid allocating memory in common case where there are no extra cols.
- */
- if (!extraUpdatedCols)
- allUpdatedCols = updatedCols;
- else
- allUpdatedCols = bms_union(updatedCols, extraUpdatedCols);
-
- /*
- * We have to work slightly harder in the event of indexed expressions,
- * but the principle is the same as before: try to find columns (Vars,
- * actually) that overlap with known-updated columns.
- *
- * If we find any matching Vars, don't pass hint for index. Otherwise
- * pass hint.
- */
- idxExprs = RelationGetIndexExpressions(indexRelation);
- hasexpression = index_expression_changed_walker((Node *) idxExprs,
- allUpdatedCols);
- list_free(idxExprs);
- if (extraUpdatedCols)
- bms_free(allUpdatedCols);
-
- if (hasexpression)
- {
- indexInfo->ii_IndexUnchanged = false;
- return false;
- }
-
- /*
- * Deliberately don't consider index predicates. We should even give the
- * hint when result rel's "updated tuple" has no corresponding index
- * tuple, which is possible with a partial index (provided the usual
- * conditions are met).
- */
- indexInfo->ii_IndexUnchanged = true;
- return true;
-}
-
-/*
- * Indexed expression helper for index_unchanged_by_update().
- *
- * Returns true when Var that appears within allUpdatedCols located.
- */
-static bool
-index_expression_changed_walker(Node *node, Bitmapset *allUpdatedCols)
-{
- if (node == NULL)
- return false;
-
- if (IsA(node, Var))
- {
- Var *var = (Var *) node;
-
- if (bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
- allUpdatedCols))
- {
- /* Var was updated -- indicates that we should not hint */
- return true;
- }
-
- /* Still haven't found a reason to not pass the hint */
- return false;
- }
-
- return expression_tree_walker(node, index_expression_changed_walker,
- allUpdatedCols);
-}
-
/*
* ExecWithoutOverlapsNotEmpty - raise an error if the tuple has an empty
* range or multirange in the given attribute.
diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
index dd092bacad9..9d3a5b79d27 100644
--- a/src/backend/nodes/makefuncs.c
+++ b/src/backend/nodes/makefuncs.c
@@ -845,8 +845,6 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions,
n->ii_Unique = unique;
n->ii_NullsNotDistinct = nulls_not_distinct;
n->ii_ReadyForInserts = isready;
- n->ii_CheckedUnchanged = false;
- n->ii_IndexUnchanged = false;
n->ii_Concurrent = concurrent;
n->ii_Summarizing = summarizing;
n->ii_WithoutOverlaps = withoutoverlaps;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 8a90afe315d..b158e496b63 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -218,10 +218,6 @@ typedef struct IndexInfo
bool ii_NullsNotDistinct;
/* is it valid for inserts? */
bool ii_ReadyForInserts;
- /* IndexUnchanged status determined yet? */
- bool ii_CheckedUnchanged;
- /* aminsert hint, cached for retail inserts */
- bool ii_IndexUnchanged;
/* are we doing a concurrent index build? */
bool ii_Concurrent;
/* did we detect any broken HOT chains? */
--
2.51.2
[application/octet-stream] v28-0002-Track-changed-indexed-columns-in-the-executor-du.patch (113.5K, 5-v28-0002-Track-changed-indexed-columns-in-the-executor-du.patch)
download | inline diff:
From 985ca49d3b28c62b0d9964e2a2f568e47c259edf Mon Sep 17 00:00:00 2001
From: Greg Burd <[email protected]>
Date: Sun, 26 Oct 2025 10:49:25 -0400
Subject: [PATCH v28 2/4] Track changed indexed columns in the executor during
UPDATEs
Refactor executor update logic to determine which indexed columns have
actually changed during an UPDATE operation rather than leaving this up
to HeapDetermineColumnsInfo() in heap_update().
ExecWhichIndexesRequireUpdates() replaces HeapDeterminesColumnsInfo()
when invoked from the table AM API via heapam_tuple_update(). The
test for equality remains datumIsEqual() as before.
This change necessitated some logic changes in execReplication() as it
performs updates now must provide the set of attributes that are both
changed and referenced by indexes. Luckilly, this is available within
calls to slot_modify_data() where LogicalRepTupleData is processed and
has a record of updated attributes. In this case rather than using
ExecWhichIndexesRequireUpdates() we can preseve what slot_modify_data()
identifies as the modified set and then intersect that with the set of
indexes on the relation and get the correct set of modified indexed
attributes required on heap_update().
This commit also extends the role index AMs play determining if they
require an update. A new optional index AM API, amcomparedatums(), is
added to allow index access methods to provide custom logic for
comparing datums. Hash and Gin indexes now implement this function. When
not implemented the executor will compare TupleTableSlot datum for
equality using datumIsEqual() as before.
Because heap_update() now requires the caller to provide the modified
indexed columns simple_heap_update() has become a tad more complex. It
is only called from CatalogTupleUpdate() which either updates heap
tuples via their Form_XXX or by calling heap_modify_tuple(). In both
cases the caller does know the modified set of attributes, but sadly
those attributes are lost before being provided to simple_heap_update().
Due to that the "simple" path has to (for now) retain the
HeapDetermineColumnsInfo() logic in order for catalog updates to
potentially take the HOT path.
---
src/backend/access/brin/brin.c | 1 +
src/backend/access/gin/ginutil.c | 90 ++-
src/backend/access/hash/hash.c | 44 ++
src/backend/access/heap/heapam.c | 20 +-
src/backend/access/heap/heapam_handler.c | 76 +-
src/backend/access/nbtree/nbtree.c | 1 +
src/backend/access/table/tableam.c | 5 +-
src/backend/bootstrap/bootstrap.c | 8 +
src/backend/catalog/index.c | 57 ++
src/backend/catalog/indexing.c | 16 +-
src/backend/catalog/toasting.c | 4 +
src/backend/executor/execIndexing.c | 41 +-
src/backend/executor/execMain.c | 1 +
src/backend/executor/execReplication.c | 7 +
src/backend/executor/nodeModifyTable.c | 288 +++++++-
src/backend/nodes/bitmapset.c | 4 +
src/backend/nodes/makefuncs.c | 4 +
src/backend/replication/logical/worker.c | 70 +-
src/backend/utils/cache/relcache.c | 15 +
src/include/access/amapi.h | 28 +
src/include/access/gin.h | 3 +
src/include/access/heapam.h | 6 +-
src/include/access/nbtree.h | 4 +
src/include/access/tableam.h | 8 +-
src/include/catalog/index.h | 1 +
src/include/executor/executor.h | 9 +
src/include/nodes/execnodes.h | 20 +
src/include/utils/rel.h | 1 +
src/include/utils/relcache.h | 1 +
.../expected/insert-conflict-specconflict.out | 20 +
.../regress/expected/heap_hot_updates.out | 650 ++++++++++++++++++
src/test/regress/parallel_schedule | 6 +
src/test/regress/sql/heap_hot_updates.sql | 513 ++++++++++++++
src/tools/pgindent/typedefs.list | 1 +
34 files changed, 1949 insertions(+), 74 deletions(-)
create mode 100644 src/test/regress/expected/heap_hot_updates.out
create mode 100644 src/test/regress/sql/heap_hot_updates.sql
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 6887e421442..aa9fd110802 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -290,6 +290,7 @@ brinhandler(PG_FUNCTION_ARGS)
.amproperty = NULL,
.ambuildphasename = NULL,
.amvalidate = brinvalidate,
+ .amcomparedatums = NULL,
.amadjustmembers = NULL,
.ambeginscan = brinbeginscan,
.amrescan = brinrescan,
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index d205093e21d..7645b4ee0f2 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -26,6 +26,7 @@
#include "storage/indexfsm.h"
#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
+#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/typcache.h"
@@ -78,6 +79,7 @@ ginhandler(PG_FUNCTION_ARGS)
.amproperty = NULL,
.ambuildphasename = ginbuildphasename,
.amvalidate = ginvalidate,
+ .amcomparedatums = gincomparedatums,
.amadjustmembers = ginadjustmembers,
.ambeginscan = ginbeginscan,
.amrescan = ginrescan,
@@ -440,13 +442,6 @@ cmpEntries(const void *a, const void *b, void *arg)
return res;
}
-
-/*
- * Extract the index key values from an indexable item
- *
- * The resulting key values are sorted, and any duplicates are removed.
- * This avoids generating redundant index entries.
- */
Datum *
ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
Datum value, bool isNull,
@@ -692,3 +687,84 @@ ginbuildphasename(int64 phasenum)
return NULL;
}
}
+
+/*
+ * gincomparedatums - Compare datums to determine if they produce identical keys
+ *
+ * This function extracts keys from both old_datum and new_datum using the
+ * opclass's extractValue function, then compares the extracted key arrays.
+ * Returns true if the key sets are identical (same keys, same counts).
+ *
+ * This enables HOT updates for GIN indexes when the indexed portions of a
+ * value haven't changed, even if the value itself has changed.
+ *
+ * Example: JSONB column with GIN index. If an update changes a non-indexed
+ * key in the JSONB document, the extracted keys are identical and we can
+ * do a HOT update.
+ */
+bool
+gincomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull)
+{
+ GinState ginstate;
+ Datum *old_keys;
+ Datum *new_keys;
+ GinNullCategory *old_categories;
+ GinNullCategory *new_categories;
+ int32 old_nkeys;
+ int32 new_nkeys;
+ MemoryContext tmpcontext;
+ MemoryContext oldcontext;
+ bool result = true;
+
+ /* Handle NULL cases */
+ if (old_isnull != new_isnull)
+ return false;
+ if (old_isnull)
+ return true;
+
+ /* Create temporary context for extraction work */
+ tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
+ "GIN datum comparison",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(tmpcontext);
+
+ initGinState(&ginstate, index);
+
+ /* Extract keys from both datums using existing GIN infrastructure */
+ old_keys = ginExtractEntries(&ginstate, attnum, old_datum, old_isnull,
+ &old_nkeys, &old_categories);
+ new_keys = ginExtractEntries(&ginstate, attnum, new_datum, new_isnull,
+ &new_nkeys, &new_categories);
+
+ /* Different number of keys, definitely different */
+ if (old_nkeys != new_nkeys)
+ {
+ result = false;
+ goto cleanup;
+ }
+
+ /*
+ * Compare the sorted key arrays element-by-element. Since both arrays are
+ * already sorted by ginExtractEntries, we can do a simple O(n)
+ * comparison.
+ */
+ for (int i = 0; i < old_nkeys; i++)
+ {
+ if (ginCompareEntries(&ginstate, attnum,
+ old_keys[i], old_categories[i],
+ new_keys[i], new_categories[i]) != 0)
+ {
+ result = false;
+ break;
+ }
+ }
+
+cleanup:
+ /* Clean up */
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(tmpcontext);
+
+ return result;
+}
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index e88ddb32a05..49a99998083 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -50,6 +50,10 @@ static void hashbuildCallback(Relation index,
void *state);
+static bool hashcomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
+
/*
* Hash handler function: return IndexAmRoutine with access method parameters
* and callbacks.
@@ -98,6 +102,7 @@ hashhandler(PG_FUNCTION_ARGS)
.amproperty = NULL,
.ambuildphasename = NULL,
.amvalidate = hashvalidate,
+ .amcomparedatums = hashcomparedatums,
.amadjustmembers = hashadjustmembers,
.ambeginscan = hashbeginscan,
.amrescan = hashrescan,
@@ -944,3 +949,42 @@ hashtranslatecmptype(CompareType cmptype, Oid opfamily)
return HTEqualStrategyNumber;
return InvalidStrategy;
}
+
+/*
+ * hashcomparedatums - Compare datums to determine if they produce identical keys
+ *
+ * Returns true if the hash values are identical (index doesn't need update).
+ */
+bool
+hashcomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull)
+{
+ uint32 old_hashkey;
+ uint32 new_hashkey;
+
+ /* If both are NULL, they're equal */
+ if (old_isnull && new_isnull)
+ return true;
+
+ /* If NULL status differs, they're not equal */
+ if (old_isnull != new_isnull)
+ return false;
+
+ /*
+ * _hash_datum2hashkey() is used because we know this can't be a cross
+ * type comparison.
+ */
+ old_hashkey = _hash_datum2hashkey(index, old_datum);
+ new_hashkey = _hash_datum2hashkey(index, new_datum);
+
+ /*
+ * If hash keys are identical, the index entry would be the same. Return
+ * true to indicate no index update needed.
+ *
+ * Note: Hash collisions are rare but possible. If hash(x) == hash(y) but
+ * x != y, the hash index still treats them identically, so we correctly
+ * return true.
+ */
+ return (old_hashkey == new_hashkey);
+}
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 8563dadc8da..992795d73de 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -3325,12 +3325,12 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid)
* generated by another transaction).
*/
TM_Result
-heap_update(Relation relation, HeapTupleData *oldtup,
- HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait,
- TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer,
- Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs,
- Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs,
- Bitmapset *mix_attrs, Buffer *vmbuffer,
+heap_update(Relation relation, HeapTupleData *oldtup, HeapTuple newtup,
+ CommandId cid, Snapshot crosscheck, bool wait,
+ TM_FailureData *tmfd, LockTupleMode *lockmode,
+ Buffer buffer, Page page, BlockNumber block, ItemId lp,
+ Bitmapset *hot_attrs, Bitmapset *sum_attrs, Bitmapset *pk_attrs,
+ Bitmapset *rid_attrs, const Bitmapset *mix_attrs, Buffer *vmbuffer,
bool rep_id_key_required, TU_UpdateIndexes *update_indexes)
{
TM_Result result;
@@ -4399,8 +4399,9 @@ HeapDetermineColumnsInfo(Relation relation,
* This routine may be used to update a tuple when concurrent updates of the
* target tuple are not expected (for example, because we have a lock on the
* relation associated with the tuple). Any failure is reported via ereport().
+ * Returns the set of modified indexed attributes.
*/
-void
+Bitmapset *
simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tuple,
TU_UpdateIndexes *update_indexes)
{
@@ -4529,7 +4530,7 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
elog(ERROR, "tuple concurrently deleted");
- return;
+ return NULL;
}
/*
@@ -4562,7 +4563,6 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
bms_free(sum_attrs);
bms_free(pk_attrs);
bms_free(rid_attrs);
- bms_free(mix_attrs);
bms_free(idx_attrs);
switch (result)
@@ -4588,6 +4588,8 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
elog(ERROR, "unrecognized heap_update status: %u", result);
break;
}
+
+ return mix_attrs;
}
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 54c021ff209..ff0319c4850 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -315,9 +315,12 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
static TM_Result
heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
- CommandId cid, Snapshot snapshot, Snapshot crosscheck,
- bool wait, TM_FailureData *tmfd,
- LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
+ CommandId cid, Snapshot snapshot,
+ Snapshot crosscheck, bool wait,
+ TM_FailureData *tmfd,
+ LockTupleMode *lockmode,
+ const Bitmapset *mix_attrs,
+ TU_UpdateIndexes *update_indexes)
{
bool rep_id_key_required = false;
bool shouldFree = true;
@@ -332,7 +335,6 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
*sum_attrs,
*pk_attrs,
*rid_attrs,
- *mix_attrs,
*idx_attrs;
TM_Result result;
@@ -405,25 +407,66 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
Assert(ItemIdIsNormal(lp));
- /*
- * Partially construct the oldtup for HeapDetermineColumnsInfo to work and
- * then pass that on to heap_update.
- */
oldtup.t_tableOid = RelationGetRelid(relation);
oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
oldtup.t_len = ItemIdGetLength(lp);
oldtup.t_self = *otid;
- mix_attrs = HeapDetermineColumnsInfo(relation, idx_attrs, rid_attrs,
- &oldtup, tuple, &rep_id_key_required);
-
/*
- * We'll need to WAL log the replica identity attributes if either they
- * overlap with the modified indexed attributes or, as we've checked for
- * just now in HeapDetermineColumnsInfo, they were unmodified external
- * indexed attributes.
+ * We'll need to include the replica identity key when either the identity
+ * key attributes overlap with the modified index attributes or when the
+ * replica identity attributes are stored externally. This is required
+ * because for such attributes the flattened value won't be WAL logged as
+ * part of the new tuple so we must determine if we need to extract and
+ * include them as part of the old_key_tuple (see ExtractReplicaIdentity).
*/
- rep_id_key_required = rep_id_key_required || bms_overlap(mix_attrs, rid_attrs);
+ rep_id_key_required = bms_overlap(mix_attrs, rid_attrs);
+ if (!rep_id_key_required)
+ {
+ Bitmapset *attrs;
+ TupleDesc tupdesc = RelationGetDescr(relation);
+ int attidx = -1;
+
+ /*
+ * We don't own idx_attrs so we'll copy it and remove the modified set
+ * to reduce the attributes we need to test in the while loop and
+ * avoid a two branches in the loop.
+ */
+ attrs = bms_difference(idx_attrs, mix_attrs);
+ attrs = bms_int_members(attrs, rid_attrs);
+
+ while ((attidx = bms_next_member(attrs, attidx)) >= 0)
+ {
+ /*
+ * attidx is zero-based, attrnum is the normal attribute number
+ */
+ AttrNumber attrnum = attidx + FirstLowInvalidHeapAttributeNumber;
+ Datum value;
+ bool isnull;
+
+ /*
+ * System attributes are not added into interesting_attrs in
+ * relcache
+ */
+ Assert(attrnum > 0);
+
+ value = heap_getattr(&oldtup, attrnum, tupdesc, &isnull);
+
+ /* No need to check attributes that can't be stored externally */
+ if (isnull ||
+ TupleDescCompactAttr(tupdesc, attrnum - 1)->attlen != -1)
+ continue;
+
+ /* Check if the old tuple's attribute is stored externally */
+ if (VARATT_IS_EXTERNAL((struct varlena *) DatumGetPointer(value)))
+ {
+ rep_id_key_required = true;
+ break;
+ }
+ }
+
+ bms_free(attrs);
+ }
/* Update the tuple with table oid */
slot->tts_tableOid = RelationGetRelid(relation);
@@ -437,7 +480,6 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
bms_free(sum_attrs);
bms_free(pk_attrs);
bms_free(rid_attrs);
- bms_free(mix_attrs);
bms_free(idx_attrs);
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 3dec1ee657d..b975612bbdd 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -156,6 +156,7 @@ bthandler(PG_FUNCTION_ARGS)
.amproperty = btproperty,
.ambuildphasename = btbuildphasename,
.amvalidate = btvalidate,
+ .amcomparedatums = NULL,
.amadjustmembers = btadjustmembers,
.ambeginscan = btbeginscan,
.amrescan = btrescan,
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 87491796523..458d48ca79e 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -367,6 +367,7 @@ void
simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot,
Snapshot snapshot,
+ const Bitmapset *mix_attrs,
TU_UpdateIndexes *update_indexes)
{
TM_Result result;
@@ -377,7 +378,9 @@ simple_table_tuple_update(Relation rel, ItemPointer otid,
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
true /* wait for commit */ ,
- &tmfd, &lockmode, update_indexes);
+ &tmfd, &lockmode,
+ mix_attrs,
+ update_indexes);
switch (result)
{
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index dd57624b4f9..81347c7b47e 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -963,10 +963,18 @@ index_register(Oid heap,
newind->il_info->ii_Expressions =
copyObject(indexInfo->ii_Expressions);
newind->il_info->ii_ExpressionsState = NIL;
+ /* expression attrs will likely be null, but may as well copy it */
+ newind->il_info->ii_ExpressionsAttrs =
+ copyObject(indexInfo->ii_ExpressionsAttrs);
/* predicate will likely be null, but may as well copy it */
newind->il_info->ii_Predicate =
copyObject(indexInfo->ii_Predicate);
newind->il_info->ii_PredicateState = NULL;
+ /* predicate attrs will likely be null, but may as well copy it */
+ newind->il_info->ii_PredicateAttrs =
+ copyObject(indexInfo->ii_PredicateAttrs);
+ newind->il_info->ii_CheckedPredicate = false;
+ newind->il_info->ii_PredicateSatisfied = false;
/* no exclusion constraints at bootstrap time, so no need to copy */
Assert(indexInfo->ii_ExclusionOps == NULL);
Assert(indexInfo->ii_ExclusionProcs == NULL);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 43de42ce39e..fe536c9740f 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -27,6 +27,7 @@
#include "access/heapam.h"
#include "access/multixact.h"
#include "access/relscan.h"
+#include "access/sysattr.h"
#include "access/tableam.h"
#include "access/toast_compression.h"
#include "access/transam.h"
@@ -58,6 +59,7 @@
#include "commands/trigger.h"
#include "executor/executor.h"
#include "miscadmin.h"
+#include "nodes/execnodes.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
@@ -2412,6 +2414,61 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
* ----------------------------------------------------------------
*/
+/* ----------------
+ * BuildUpdateIndexInfo
+ *
+ * For expression indexes updates may not change the indexed value allowing
+ * for a HOT update. Add information to the IndexInfo to allow for checking
+ * if the indexed value has changed.
+ *
+ * Do this processing here rather than in BuildIndexInfo() to not incur the
+ * overhead in the common non-expression cases.
+ * ----------------
+ */
+void
+BuildUpdateIndexInfo(ResultRelInfo *resultRelInfo)
+{
+ for (int j = 0; j < resultRelInfo->ri_NumIndices; j++)
+ {
+ int i;
+ int indnatts;
+ Bitmapset *attrs = NULL;
+ IndexInfo *ii = resultRelInfo->ri_IndexRelationInfo[j];
+
+ indnatts = ii->ii_NumIndexAttrs;
+
+ /* Collect key attributes used by the index, key and including */
+ for (i = 0; i < indnatts; i++)
+ {
+ AttrNumber attnum = ii->ii_IndexAttrNumbers[i];
+
+ if (attnum != 0)
+ attrs = bms_add_member(attrs, attnum - FirstLowInvalidHeapAttributeNumber);
+ }
+
+ /* Collect attributes used in the expression */
+ if (ii->ii_Expressions)
+ pull_varattnos((Node *) ii->ii_Expressions,
+ resultRelInfo->ri_RangeTableIndex,
+ &ii->ii_ExpressionsAttrs);
+
+ /* Collect attributes used in the predicate */
+ if (ii->ii_Predicate)
+ pull_varattnos((Node *) ii->ii_Predicate,
+ resultRelInfo->ri_RangeTableIndex,
+ &ii->ii_PredicateAttrs);
+
+ /*
+ * Combine key, including, and expression, but not partial index
+ * predicate attributes.
+ */
+ ii->ii_IndexedAttrs = bms_union(attrs, ii->ii_ExpressionsAttrs);
+
+ /* All indexes should index *something*! */
+ Assert(!bms_is_empty(ii->ii_IndexedAttrs));
+ }
+}
+
/* ----------------
* BuildIndexInfo
* Construct an IndexInfo record for an open index
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c
index 0a1a68e0644..690a2511023 100644
--- a/src/backend/catalog/indexing.c
+++ b/src/backend/catalog/indexing.c
@@ -102,7 +102,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple,
* Get information from the state structure. Fall out if nothing to do.
*/
numIndexes = indstate->ri_NumIndices;
- if (numIndexes == 0)
+ if (numIndexes == 0 || updateIndexes == TU_None)
return;
relationDescs = indstate->ri_IndexRelationDescs;
indexInfoArray = indstate->ri_IndexRelationInfo;
@@ -314,15 +314,18 @@ CatalogTupleUpdate(Relation heapRel, const ItemPointerData *otid, HeapTuple tup)
{
CatalogIndexState indstate;
TU_UpdateIndexes updateIndexes = TU_All;
+ Bitmapset *updatedAttrs;
CatalogTupleCheckConstraints(heapRel, tup);
indstate = CatalogOpenIndexes(heapRel);
- simple_heap_update(heapRel, otid, tup, &updateIndexes);
-
+ updatedAttrs = simple_heap_update(heapRel, otid, tup, &updateIndexes);
+ ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = updatedAttrs;
CatalogIndexInsert(indstate, tup, updateIndexes);
+
CatalogCloseIndexes(indstate);
+ bms_free(updatedAttrs);
}
/*
@@ -338,12 +341,15 @@ CatalogTupleUpdateWithInfo(Relation heapRel, const ItemPointerData *otid, HeapTu
CatalogIndexState indstate)
{
TU_UpdateIndexes updateIndexes = TU_All;
+ Bitmapset *updatedAttrs;
CatalogTupleCheckConstraints(heapRel, tup);
- simple_heap_update(heapRel, otid, tup, &updateIndexes);
-
+ updatedAttrs = simple_heap_update(heapRel, otid, tup, &updateIndexes);
+ ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = updatedAttrs;
CatalogIndexInsert(indstate, tup, updateIndexes);
+ ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = NULL;
+ bms_free(updatedAttrs);
}
/*
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index c78dcea98c1..ff8da5be5f8 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -292,8 +292,12 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
indexInfo->ii_IndexAttrNumbers[1] = 2;
indexInfo->ii_Expressions = NIL;
indexInfo->ii_ExpressionsState = NIL;
+ indexInfo->ii_ExpressionsAttrs = NULL;
indexInfo->ii_Predicate = NIL;
indexInfo->ii_PredicateState = NULL;
+ indexInfo->ii_PredicateAttrs = NULL;
+ indexInfo->ii_CheckedPredicate = false;
+ indexInfo->ii_PredicateSatisfied = false;
indexInfo->ii_ExclusionOps = NULL;
indexInfo->ii_ExclusionProcs = NULL;
indexInfo->ii_ExclusionStrats = NULL;
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index 6ae0f959592..1275feffae9 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -109,11 +109,15 @@
#include "access/genam.h"
#include "access/relscan.h"
#include "access/tableam.h"
+#include "access/sysattr.h"
#include "access/xact.h"
#include "catalog/index.h"
#include "executor/executor.h"
+#include "nodes/bitmapset.h"
+#include "nodes/execnodes.h"
#include "nodes/nodeFuncs.h"
#include "storage/lmgr.h"
+#include "utils/datum.h"
#include "utils/injection_point.h"
#include "utils/multirangetypes.h"
#include "utils/rangetypes.h"
@@ -324,8 +328,8 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
Relation heapRelation;
IndexInfo **indexInfoArray;
ExprContext *econtext;
- Datum values[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
+ Datum loc_values[INDEX_MAX_KEYS];
+ bool loc_isnull[INDEX_MAX_KEYS];
Assert(ItemPointerIsValid(tupleid));
@@ -349,13 +353,13 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
- /*
- * for each index, form and insert the index tuple
- */
+ /* Insert into each index that needs updating */
for (i = 0; i < numIndices; i++)
{
Relation indexRelation = relationDescs[i];
IndexInfo *indexInfo;
+ Datum *values;
+ bool *isnull;
bool applyNoDupErr;
IndexUniqueCheck checkUnique;
bool indexUnchanged;
@@ -372,7 +376,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
/*
* Skip processing of non-summarizing indexes if we only update
- * summarizing indexes
+ * summarizing indexes or if this index is unchanged.
*/
if (onlySummarizing && !indexInfo->ii_Summarizing)
continue;
@@ -393,8 +397,15 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
indexInfo->ii_PredicateState = predicate;
}
+ /* Check the index predicate if we haven't done so earlier on */
+ if (!indexInfo->ii_CheckedPredicate)
+ {
+ indexInfo->ii_PredicateSatisfied = ExecQual(predicate, econtext);
+ indexInfo->ii_CheckedPredicate = true;
+ }
+
/* Skip this index-update if the predicate isn't satisfied */
- if (!ExecQual(predicate, econtext))
+ if (!indexInfo->ii_PredicateSatisfied)
continue;
}
@@ -402,11 +413,10 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
* FormIndexDatum fills in its values and isnull parameters with the
* appropriate values for the column(s) of the index.
*/
- FormIndexDatum(indexInfo,
- slot,
- estate,
- values,
- isnull);
+ FormIndexDatum(indexInfo, slot, estate, loc_values, loc_isnull);
+
+ values = loc_values;
+ isnull = loc_isnull;
/* Check whether to apply noDupErr to this index */
applyNoDupErr = noDupErr &&
@@ -613,7 +623,12 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
checkedIndex = true;
/* Check for partial index */
- if (indexInfo->ii_Predicate != NIL)
+ if (indexInfo->ii_CheckedPredicate && !indexInfo->ii_PredicateSatisfied)
+ {
+ /* We've already checked and the predicate wasn't satisfied. */
+ continue;
+ }
+ else if (indexInfo->ii_Predicate != NIL)
{
ExprState *predicate;
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index ca14cdabdd0..fc6f7aa8fad 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1282,6 +1282,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
/* The following fields are set later if needed */
resultRelInfo->ri_RowIdAttNo = 0;
resultRelInfo->ri_extraUpdatedCols = NULL;
+ resultRelInfo->ri_ChangedIndexedCols = NULL;
resultRelInfo->ri_projectNew = NULL;
resultRelInfo->ri_newTupleSlot = NULL;
resultRelInfo->ri_oldTupleSlot = NULL;
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 72f2bff7708..c8dbbc2a78b 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -33,6 +33,7 @@
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/relcache.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
@@ -937,7 +938,13 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
if (rel->rd_rel->relispartition)
ExecPartitionCheck(resultRelInfo, slot, estate, true);
+ /*
+ * We're not going to call ExecCheckIndexedAttrsForChanges here
+ * because we've already identified the changes earlier on thanks to
+ * slot_modify_data.
+ */
simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
+ resultRelInfo->ri_ChangedIndexedCols,
&update_indexes);
conflictindexes = resultRelInfo->ri_onConflictArbiterIndexes;
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 46ff6da8289..ab0a343bcf6 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -17,6 +17,7 @@
* ExecModifyTable - retrieve the next tuple from the node
* ExecEndModifyTable - shut down the ModifyTable node
* ExecReScanModifyTable - rescan the ModifyTable node
+ * ExecCheckIndexedAttrsForChanges - find set of updated indexed columns
*
* NOTES
* The ModifyTable node receives input from its outerPlan, which is
@@ -52,13 +53,20 @@
#include "postgres.h"
+#include "access/amapi.h"
#include "access/htup_details.h"
+#include "access/attnum.h"
+#include "access/sysattr.h"
#include "access/tableam.h"
+#include "access/tupconvert.h"
+#include "access/tupdesc.h"
#include "access/xact.h"
+#include "catalog/index.h"
#include "commands/trigger.h"
#include "executor/execPartition.h"
#include "executor/executor.h"
#include "executor/nodeModifyTable.h"
+#include "executor/tuptable.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
@@ -68,8 +76,11 @@
#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/datum.h"
+#include "utils/float.h"
#include "utils/injection_point.h"
+#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/relcache.h"
#include "utils/snapmgr.h"
@@ -176,6 +187,224 @@ static TupleTableSlot *ExecMergeNotMatched(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
bool canSetTag);
+/*
+ * ExecCheckIndexedAttrsForChanges
+ *
+ * Determine which indexes need updating by finding the set of modified indexed
+ * attributes.
+ *
+ * For which implement the amcomparedatums() index AM API we'll need to form
+ * index datum and compare each attribute to see if anything actually changed.
+ *
+ * The goal is for the executor to know, ahead of calling into the table AM to
+ * process the update and before calling into the index AM for inserting new
+ * index tuples, which attributes in the new TupleTableSlot, if any, truely
+ * necessitate a new index tuple.
+ *
+ * Returns a Bitmapset of attributes that intersects with indexes which require
+ * a new index tuple.
+ */
+Bitmapset *
+ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
+ EState *estate,
+ TupleTableSlot *old_tts,
+ TupleTableSlot *new_tts)
+{
+ Relation relation = relinfo->ri_RelationDesc;
+ TupleDesc tupdesc = RelationGetDescr(relation);
+ Bitmapset *mix_attrs = NULL; /* modified indexed attributes */
+
+ /* If no indexes, we're done */
+ if (relinfo->ri_NumIndices == 0)
+ return NULL;
+
+ /* Find the indexes that reference this attribute */
+ for (int i = 0; i < relinfo->ri_NumIndices; i++)
+ {
+ Relation index = relinfo->ri_IndexRelationDescs[i];
+ const IndexAmRoutine *amroutine = index->rd_indam;
+ IndexInfo *indexInfo = relinfo->ri_IndexRelationInfo[i];
+ Bitmapset *m_attrs = NULL; /* (possibly) modified indexed attrs */
+ Bitmapset *p_attrs = NULL; /* (possibly) modified predicate attrs */
+ Bitmapset *u_attrs = NULL; /* unmodified indexed attrs */
+ bool has_am_compare = (amroutine->amcomparedatums != NULL);
+ bool supports_ios = (amroutine->amcanreturn != NULL);
+ bool is_partial = (indexInfo->ii_Predicate != NIL);
+ ExprContext *econtext = GetPerTupleExprContext(estate);
+ int num_datums = supports_ios ?
+ indexInfo->ii_NumIndexAttrs : indexInfo->ii_NumIndexKeyAttrs;
+
+ /* If we've reviewed all the attributes on this index, move on */
+ if (bms_is_subset(indexInfo->ii_IndexedAttrs, mix_attrs))
+ continue;
+
+ /* Add partial index attributes */
+ if (is_partial)
+ p_attrs = bms_add_members(p_attrs, indexInfo->ii_PredicateAttrs);
+
+ /* Compare the index datums for equality */
+ for (int j = 0; j < num_datums; j++)
+ {
+ AttrNumber rel_attrnum = indexInfo->ii_IndexAttrNumbers[j];
+ int rel_attridx = rel_attrnum - FirstLowInvalidHeapAttributeNumber;
+ int nth_expr = 0;
+ int16 typlen;
+ bool typbyval;
+ Datum old_value;
+ Datum new_value;
+ bool old_null;
+ bool new_null;
+ bool values_equal = false;
+
+ /* System attributes */
+ if (rel_attrnum < 0)
+ {
+ /* Extract system values from both slots for this attribute */
+ old_value = slot_getsysattr(old_tts, rel_attrnum, &old_null);
+ new_value = slot_getsysattr(new_tts, rel_attrnum, &new_null);
+
+ /* The only allowed system columns are OIDs, so do this */
+ values_equal = (DatumGetObjectId(old_value) == DatumGetObjectId(new_value));
+ goto equality_determined;
+ }
+
+ /*
+ * This is an expression attribute, but in an effort to avoid the
+ * expense of IndexFormDatum we're now faced with testing for
+ * equality so we'll have to exec the expressions and test for
+ * binary equality of the results.
+ */
+ else if (rel_attrnum == 0)
+ {
+ TupleTableSlot *save_scantuple = econtext->ecxt_scantuple;
+ Oid expr_type_oid;
+ Expr *expr = (Expr *) list_nth(indexInfo->ii_Expressions, nth_expr);
+ ExprState *state;
+
+ if (indexInfo->ii_ExpressionsState == NIL)
+ {
+ /* First time through, set up expression evaluation state */
+ indexInfo->ii_ExpressionsState =
+ ExecPrepareExprList(indexInfo->ii_Expressions, estate);
+ }
+
+ state = (ExprState *) list_nth(indexInfo->ii_ExpressionsState, nth_expr);
+
+ econtext->ecxt_scantuple = old_tts;
+ old_value = ExecEvalExprSwitchContext(state,
+ GetPerTupleExprContext(estate),
+ &old_null);
+
+ econtext->ecxt_scantuple = new_tts;
+ new_value = ExecEvalExprSwitchContext(state,
+ GetPerTupleExprContext(estate),
+ &new_null);
+
+ econtext->ecxt_scantuple = save_scantuple;
+
+ /*
+ * NOTE: test for NULL cases here to potentially avoid looking
+ * up the type information. It's a tad redundant, but worth
+ * it.
+ */
+
+ /* A change to/from NULL, so not equal */
+ if (old_null != new_null)
+ {
+ values_equal = false;
+ goto equality_determined;
+ }
+
+ /* Both NULL, no change record as unmodified */
+ if (old_null)
+ {
+ values_equal = true;
+ goto equality_determined;
+ }
+
+ /* Get type OID from the expression */
+ expr_type_oid = exprType((Node *) expr);
+
+ /* Get type information from the OID */
+ get_typlenbyval(expr_type_oid, &typlen, &typbyval);
+ }
+ /* Not a system or expression attribute */
+ else
+ {
+ CompactAttribute *att = TupleDescCompactAttr(tupdesc, rel_attrnum - 1);
+
+ /* Extract values from both slots for this attribute */
+ old_value = slot_getattr(old_tts, rel_attrnum, &old_null);
+ new_value = slot_getattr(new_tts, rel_attrnum, &new_null);
+
+ typlen = att->attlen;
+ typbyval = att->attbyval;
+ }
+
+ /* A change to/from NULL, so not equal */
+ if (old_null != new_null)
+ {
+ values_equal = false;
+ goto equality_determined;
+ }
+
+ /* Both NULL, no change record as unmodified */
+ if (old_null)
+ {
+ values_equal = true;
+ goto equality_determined;
+ }
+
+ if (has_am_compare)
+ {
+ /*
+ * NOTE: For AM comparison, pass the 1-based index attribute
+ * number. The AM's compare function expects the same
+ * numbering as used internally by the AM.
+ */
+ values_equal = amroutine->amcomparedatums(index, j + 1,
+ old_value, old_null,
+ new_value, new_null);
+ }
+ else
+ {
+ values_equal = datumIsEqual(old_value, new_value, typbyval, typlen);
+ }
+
+ equality_determined:;
+ if (!values_equal)
+ if (rel_attrnum == 0)
+ {
+ Expr *expr = (Expr *) list_nth(indexInfo->ii_Expressions, nth_expr);
+
+ pull_varattnos((Node *) expr, relinfo->ri_RangeTableIndex, &m_attrs);
+ }
+ else
+ m_attrs = bms_add_member(m_attrs, rel_attridx);
+ else
+ u_attrs = bms_add_member(u_attrs, rel_attridx);
+
+ if (rel_attrnum == 0)
+ nth_expr++;
+ }
+
+ /*
+ * Here we know all the attributes that might be modified and all
+ * those we know haven't been across all indexes. Take the difference
+ * and add it to the modified indexed attributes set.
+ */
+ m_attrs = bms_del_members(m_attrs, u_attrs);
+ p_attrs = bms_del_members(p_attrs, u_attrs);
+ mix_attrs = bms_add_members(mix_attrs, m_attrs);
+ mix_attrs = bms_add_members(mix_attrs, p_attrs);
+
+ bms_free(m_attrs);
+ bms_free(u_attrs);
+ bms_free(p_attrs);
+ }
+
+ return mix_attrs;
+}
/*
* Verify that the tuples to be produced by INSERT match the
@@ -2168,14 +2397,17 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
*/
static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
- ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
- bool canSetTag, UpdateContext *updateCxt)
+ ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *oldSlot,
+ TupleTableSlot *slot, bool canSetTag, UpdateContext *updateCxt)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
bool partition_constraint_failed;
TM_Result result;
+ /* The set of modified indexed attributes that trigger new index entries */
+ Bitmapset *mix_attrs = NULL;
+
updateCxt->crossPartUpdate = false;
/*
@@ -2292,9 +2524,38 @@ lreplace:
ExecConstraints(resultRelInfo, slot, estate);
/*
- * replace the heap tuple
+ * Identify which, if any, indexed attributes were modified here so that
+ * we might reuse it in a few places.
+ */
+ bms_free(resultRelInfo->ri_ChangedIndexedCols);
+ resultRelInfo->ri_ChangedIndexedCols = NULL;
+
+ /*
+ * During updates we'll need a bit more information in IndexInfo but we've
+ * delayed adding it until here. We check to ensure that there are
+ * indexes, that something has changed that is indexed, and that the first
+ * index doesn't yet have ii_IndexedAttrs set as a way to ensure we only
+ * build this when needed and only once. We don't build this in
+ * ExecOpenIndicies() as it is unnecessary overhead when not performing an
+ * update.
+ */
+ if (resultRelInfo->ri_NumIndices > 0 &&
+ bms_is_empty(resultRelInfo->ri_IndexRelationInfo[0]->ii_IndexedAttrs))
+ BuildUpdateIndexInfo(resultRelInfo);
+
+ /*
+ * Next up we need to find out the set of indexed attributes that have
+ * changed in value and should trigger a new index tuple. We could start
+ * with the set of updated columns via ExecGetUpdatedCols(), but if we do
+ * we will overlook attributes directly modified by heap_modify_tuple()
+ * which are not known to ExecGetUpdatedCols().
+ */
+ mix_attrs = ExecCheckIndexedAttrsForChanges(resultRelInfo, estate, oldSlot, slot);
+
+ /*
+ * Call into the table AM to update the heap tuple.
*
- * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
+ * NOTE: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
* the row to be updated is visible to that snapshot, and throw a
* can't-serialize error if not. This is a special-case behavior needed
* for referential integrity updates in transaction-snapshot mode
@@ -2306,8 +2567,12 @@ lreplace:
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode,
+ mix_attrs,
&updateCxt->updateIndexes);
+ Assert(bms_is_empty(resultRelInfo->ri_ChangedIndexedCols));
+ resultRelInfo->ri_ChangedIndexedCols = mix_attrs;
+
return result;
}
@@ -2325,7 +2590,7 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
ModifyTableState *mtstate = context->mtstate;
List *recheckIndexes = NIL;
- /* insert index entries for tuple if necessary */
+ /* Insert index entries for tuple if necessary */
if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None))
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
slot, context->estate,
@@ -2524,8 +2789,9 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
redo_act:
lockedtid = *tupleid;
- result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
- canSetTag, &updateCxt);
+
+ result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, oldSlot,
+ slot, canSetTag, &updateCxt);
/*
* If ExecUpdateAct reports that a cross-partition update was done,
@@ -3222,8 +3488,8 @@ lmerge_matched:
Assert(oldtuple == NULL);
result = ExecUpdateAct(context, resultRelInfo, tupleid,
- NULL, newslot, canSetTag,
- &updateCxt);
+ NULL, resultRelInfo->ri_oldTupleSlot,
+ newslot, canSetTag, &updateCxt);
/*
* As in ExecUpdate(), if ExecUpdateAct() reports that a
@@ -3248,6 +3514,7 @@ lmerge_matched:
tupleid, NULL, newslot);
mtstate->mt_merge_updated += 1;
}
+
break;
case CMD_DELETE:
@@ -4354,7 +4621,7 @@ ExecModifyTable(PlanState *pstate)
* For UPDATE/DELETE/MERGE, fetch the row identity info for the tuple
* to be updated/deleted/merged. For a heap relation, that's a TID;
* otherwise we may have a wholerow junk attr that carries the old
- * tuple in toto. Keep this in step with the part of
+ * tuple in total. Keep this in step with the part of
* ExecInitModifyTable that sets up ri_RowIdAttNo.
*/
if (operation == CMD_UPDATE || operation == CMD_DELETE ||
@@ -4530,6 +4797,7 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
oldSlot, slot, node->canSetTag);
+
if (tuplock)
UnlockTuple(resultRelInfo->ri_RelationDesc, tupleid,
InplaceUpdateTupleLock);
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index a4765876c31..17f5f66b25f 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -238,6 +238,10 @@ bms_make_singleton(int x)
void
bms_free(Bitmapset *a)
{
+#if USE_ASSERT_CHECKING
+ Assert(bms_is_valid_set(a));
+#endif
+
if (a)
pfree(a);
}
diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
index 2caec621d73..dd092bacad9 100644
--- a/src/backend/nodes/makefuncs.c
+++ b/src/backend/nodes/makefuncs.c
@@ -857,10 +857,14 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions,
/* expressions */
n->ii_Expressions = expressions;
n->ii_ExpressionsState = NIL;
+ n->ii_ExpressionsAttrs = NULL;
/* predicates */
n->ii_Predicate = predicates;
n->ii_PredicateState = NULL;
+ n->ii_PredicateAttrs = NULL;
+ n->ii_CheckedPredicate = false;
+ n->ii_PredicateSatisfied = false;
/* exclusion constraints */
n->ii_ExclusionOps = NULL;
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index ad281e7069b..90b0c2c40e9 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -275,7 +275,6 @@
#include "replication/logicalrelation.h"
#include "replication/logicalworker.h"
#include "replication/origin.h"
-#include "replication/slot.h"
#include "replication/walreceiver.h"
#include "replication/worker_internal.h"
#include "rewrite/rewriteHandler.h"
@@ -285,12 +284,14 @@
#include "storage/procarray.h"
#include "tcop/tcopprot.h"
#include "utils/acl.h"
+#include "utils/datum.h"
#include "utils/guc.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_lsn.h"
#include "utils/rel.h"
+#include "utils/relcache.h"
#include "utils/rls.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
@@ -1110,15 +1111,18 @@ slot_store_data(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
* "slot" is filled with a copy of the tuple in "srcslot", replacing
* columns provided in "tupleData" and leaving others as-is.
*
+ * Returns a bitmap of the modified columns.
+ *
* Caution: unreplaced pass-by-ref columns in "slot" will point into the
* storage for "srcslot". This is OK for current usage, but someday we may
* need to materialize "slot" at the end to make it independent of "srcslot".
*/
-static void
+static Bitmapset *
slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
LogicalRepRelMapEntry *rel,
LogicalRepTupleData *tupleData)
{
+ Bitmapset *modified = NULL;
int natts = slot->tts_tupleDescriptor->natts;
int i;
@@ -1195,6 +1199,27 @@ slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
slot->tts_isnull[i] = true;
}
+ /*
+ * Determine if the replicated value changed the local value by
+ * comparing slots. This is a subset of
+ * ExecCheckIndexedAttrsForChanges.
+ */
+ if (srcslot->tts_isnull[i] != slot->tts_isnull[i])
+ {
+ /* One is NULL, the other is not so the value changed */
+ modified = bms_add_member(modified, i + 1 - FirstLowInvalidHeapAttributeNumber);
+ }
+ else if (!srcslot->tts_isnull[i])
+ {
+ /* Both are not NULL, compare their values */
+
+ if (!datumIsEqual(srcslot->tts_values[i],
+ slot->tts_values[i],
+ att->attbyval,
+ att->attlen))
+ modified = bms_add_member(modified, i + 1 - FirstLowInvalidHeapAttributeNumber);
+ }
+
/* Reset attnum for error callback */
apply_error_callback_arg.remote_attnum = -1;
}
@@ -1202,6 +1227,8 @@ slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
/* And finally, declare that "slot" contains a valid virtual tuple */
ExecStoreVirtualTuple(slot);
+
+ return modified;
}
/*
@@ -2918,6 +2945,7 @@ apply_handle_update_internal(ApplyExecutionData *edata,
ConflictTupleInfo conflicttuple = {0};
bool found;
MemoryContext oldctx;
+ Bitmapset *indexed = NULL;
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL);
ExecOpenIndices(relinfo, false);
@@ -2934,6 +2962,8 @@ apply_handle_update_internal(ApplyExecutionData *edata,
*/
if (found)
{
+ Bitmapset *modified = NULL;
+
/*
* Report the conflict if the tuple was modified by a different
* origin.
@@ -2957,15 +2987,29 @@ apply_handle_update_internal(ApplyExecutionData *edata,
/* Process and store remote tuple in the slot */
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
- slot_modify_data(remoteslot, localslot, relmapentry, newtup);
+ modified = slot_modify_data(remoteslot, localslot, relmapentry, newtup);
MemoryContextSwitchTo(oldctx);
+ /*
+ * Normally we'd call ExecCheckIndexedAttrForChanges but here we have
+ * the record of changed columns in the replication state, so let's
+ * use that instead.
+ */
+ indexed = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
+ INDEX_ATTR_BITMAP_INDEXED);
+
+ bms_free(relinfo->ri_ChangedIndexedCols);
+ relinfo->ri_ChangedIndexedCols = bms_int_members(modified, indexed);
+ bms_free(indexed);
+
EvalPlanQualSetSlot(&epqstate, remoteslot);
InitConflictIndexes(relinfo);
- /* Do the actual update. */
+ /* First check privileges */
TargetPrivilegesCheck(relinfo->ri_RelationDesc, ACL_UPDATE);
+
+ /* Then do the actual update. */
ExecSimpleRelationUpdate(relinfo, estate, &epqstate, localslot,
remoteslot);
}
@@ -3455,6 +3499,8 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
bool found;
EPQState epqstate;
ConflictTupleInfo conflicttuple = {0};
+ Bitmapset *modified = NULL;
+ Bitmapset *indexed;
/* Get the matching local tuple from the partition. */
found = FindReplTupleInLocalRel(edata, partrel,
@@ -3523,8 +3569,8 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
* remoteslot_part.
*/
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
- slot_modify_data(remoteslot_part, localslot, part_entry,
- newtup);
+ modified = slot_modify_data(remoteslot_part, localslot, part_entry,
+ newtup);
MemoryContextSwitchTo(oldctx);
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1, NIL);
@@ -3549,6 +3595,18 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
EvalPlanQualSetSlot(&epqstate, remoteslot_part);
TargetPrivilegesCheck(partrelinfo->ri_RelationDesc,
ACL_UPDATE);
+
+ /*
+ * Normally we'd call ExecCheckIndexedAttrForChanges but
+ * here we have the record of changed columns in the
+ * replication state, so let's use that instead.
+ */
+ indexed = RelationGetIndexAttrBitmap(partrelinfo->ri_RelationDesc,
+ INDEX_ATTR_BITMAP_INDEXED);
+ bms_free(partrelinfo->ri_ChangedIndexedCols);
+ partrelinfo->ri_ChangedIndexedCols = bms_int_members(modified, indexed);
+ bms_free(indexed);
+
ExecSimpleRelationUpdate(partrelinfo, estate, &epqstate,
localslot, remoteslot_part);
}
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 6b634c9fff1..8cc97e4fbca 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -2477,6 +2477,7 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
bms_free(relation->rd_idattr);
bms_free(relation->rd_hotblockingattr);
bms_free(relation->rd_summarizedattr);
+ bms_free(relation->rd_indexedattr);
if (relation->rd_pubdesc)
pfree(relation->rd_pubdesc);
if (relation->rd_options)
@@ -5278,6 +5279,7 @@ RelationGetIndexPredicate(Relation relation)
* index (empty if FULL)
* INDEX_ATTR_BITMAP_HOT_BLOCKING Columns that block updates from being HOT
* INDEX_ATTR_BITMAP_SUMMARIZED Columns included in summarizing indexes
+ * INDEX_ATTR_BITMAP_INDEXED Columns referenced by indexes
*
* Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
* we can include system attributes (e.g., OID) in the bitmap representation.
@@ -5302,6 +5304,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Bitmapset *idindexattrs; /* columns in the replica identity */
Bitmapset *hotblockingattrs; /* columns with HOT blocking indexes */
Bitmapset *summarizedattrs; /* columns with summarizing indexes */
+ Bitmapset *indexedattrs; /* columns referenced by indexes */
List *indexoidlist;
List *newindexoidlist;
Oid relpkindex;
@@ -5324,6 +5327,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
return bms_copy(relation->rd_hotblockingattr);
case INDEX_ATTR_BITMAP_SUMMARIZED:
return bms_copy(relation->rd_summarizedattr);
+ case INDEX_ATTR_BITMAP_INDEXED:
+ return bms_copy(relation->rd_indexedattr);
default:
elog(ERROR, "unknown attrKind %u", attrKind);
}
@@ -5368,6 +5373,7 @@ restart:
idindexattrs = NULL;
hotblockingattrs = NULL;
summarizedattrs = NULL;
+ indexedattrs = NULL;
foreach(l, indexoidlist)
{
Oid indexOid = lfirst_oid(l);
@@ -5500,10 +5506,14 @@ restart:
bms_free(idindexattrs);
bms_free(hotblockingattrs);
bms_free(summarizedattrs);
+ bms_free(indexedattrs);
goto restart;
}
+ /* Combine all index attributes */
+ indexedattrs = bms_union(hotblockingattrs, summarizedattrs);
+
/* Don't leak the old values of these bitmaps, if any */
relation->rd_attrsvalid = false;
bms_free(relation->rd_keyattr);
@@ -5516,6 +5526,8 @@ restart:
relation->rd_hotblockingattr = NULL;
bms_free(relation->rd_summarizedattr);
relation->rd_summarizedattr = NULL;
+ bms_free(relation->rd_indexedattr);
+ relation->rd_indexedattr = NULL;
/*
* Now save copies of the bitmaps in the relcache entry. We intentionally
@@ -5530,6 +5542,7 @@ restart:
relation->rd_idattr = bms_copy(idindexattrs);
relation->rd_hotblockingattr = bms_copy(hotblockingattrs);
relation->rd_summarizedattr = bms_copy(summarizedattrs);
+ relation->rd_indexedattr = bms_copy(indexedattrs);
relation->rd_attrsvalid = true;
MemoryContextSwitchTo(oldcxt);
@@ -5546,6 +5559,8 @@ restart:
return hotblockingattrs;
case INDEX_ATTR_BITMAP_SUMMARIZED:
return summarizedattrs;
+ case INDEX_ATTR_BITMAP_INDEXED:
+ return indexedattrs;
default:
elog(ERROR, "unknown attrKind %u", attrKind);
return NULL;
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index ecfbd017d66..2a36b7e4a18 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -211,6 +211,33 @@ typedef void (*ammarkpos_function) (IndexScanDesc scan);
/* restore marked scan position */
typedef void (*amrestrpos_function) (IndexScanDesc scan);
+/*
+ * amcomparedatums - Compare datums to determine if index update is needed
+ *
+ * This function compares old_datum and new_datum to determine if they would
+ * produce different index entries. For extraction-based indexes (GIN, RUM),
+ * this should:
+ * 1. Extract keys from old_datum using the opclass's extractValue function
+ * 2. Extract keys from new_datum using the opclass's extractValue function
+ * 3. Compare the two sets of keys using appropriate equality operators
+ * 4. Return true if the sets are equal (no index update needed)
+ *
+ * The comparison should account for:
+ * - Different numbers of extracted keys
+ * - NULL values
+ * - Type-specific equality (not just binary equality)
+ * - Opclass parameters (e.g., path in bson_rum_single_path_ops)
+ *
+ * For the DocumentDB example with path='a', this would extract values at
+ * path 'a' from both old and new BSON documents and compare them using
+ * BSON's equality operator.
+ */
+/* identify if updated datums would produce one or more index entries */
+typedef bool (*amcomparedatums_function) (Relation indexRelation,
+ int attno,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
+
/*
* Callback function signatures - for parallel index scans.
*/
@@ -313,6 +340,7 @@ typedef struct IndexAmRoutine
amendscan_function amendscan;
ammarkpos_function ammarkpos; /* can be NULL */
amrestrpos_function amrestrpos; /* can be NULL */
+ amcomparedatums_function amcomparedatums; /* can be NULL */
/* interface functions to support parallel index scans */
amestimateparallelscan_function amestimateparallelscan; /* can be NULL */
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index fa1a3b20e09..69771fe947b 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -100,6 +100,9 @@ extern PGDLLIMPORT int gin_pending_list_limit;
extern void ginGetStats(Relation index, GinStatsData *stats);
extern void ginUpdateStats(Relation index, const GinStatsData *stats,
bool is_build);
+extern bool gincomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 38f944771db..4888918e479 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -369,7 +369,7 @@ extern TM_Result heap_update(Relation relation, HeapTupleData *oldtup,
TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer,
Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs,
Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs,
- Bitmapset *mix_attrs, Buffer *vmbuffer,
+ const Bitmapset *mix_attrs, Buffer *vmbuffer,
bool rep_id_key_required, TU_UpdateIndexes *update_indexes);
extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
@@ -404,8 +404,8 @@ extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
extern void simple_heap_insert(Relation relation, HeapTuple tup);
extern void simple_heap_delete(Relation relation, const ItemPointerData *tid);
-extern void simple_heap_update(Relation relation, const ItemPointerData *otid,
- HeapTuple tup, TU_UpdateIndexes *update_indexes);
+extern Bitmapset *simple_heap_update(Relation relation, const ItemPointerData *otid,
+ HeapTuple tup, TU_UpdateIndexes *update_indexes);
extern TransactionId heap_index_delete_tuples(Relation rel,
TM_IndexDeleteOp *delstate);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 77224859685..532656a487f 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1179,6 +1179,10 @@ extern int btgettreeheight(Relation rel);
extern CompareType bttranslatestrategy(StrategyNumber strategy, Oid opfamily);
extern StrategyNumber bttranslatecmptype(CompareType cmptype, Oid opfamily);
+extern bool btcomparedatums(Relation index, int attnum,
+ Datum old_datum, bool old_isnull,
+ Datum new_datum, bool new_isnull);
+
/*
* prototypes for internal functions in nbtree.c
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index e2ec5289d4d..4bed0f8e56e 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -549,6 +549,7 @@ typedef struct TableAmRoutine
bool wait,
TM_FailureData *tmfd,
LockTupleMode *lockmode,
+ const Bitmapset *updated_cols,
TU_UpdateIndexes *update_indexes);
/* see table_tuple_lock() for reference about parameters */
@@ -1512,12 +1513,12 @@ static inline TM_Result
table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
- TU_UpdateIndexes *update_indexes)
+ const Bitmapset *mix_cols, TU_UpdateIndexes *update_indexes)
{
return rel->rd_tableam->tuple_update(rel, otid, slot,
cid, snapshot, crosscheck,
- wait, tmfd,
- lockmode, update_indexes);
+ wait, tmfd, lockmode,
+ mix_cols, update_indexes);
}
/*
@@ -2020,6 +2021,7 @@ extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
Snapshot snapshot);
extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot, Snapshot snapshot,
+ const Bitmapset *mix_attrs,
TU_UpdateIndexes *update_indexes);
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index b259c4141ed..14a39beab6e 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -132,6 +132,7 @@ extern bool CompareIndexInfo(const IndexInfo *info1, const IndexInfo *info2,
const AttrMap *attmap);
extern void BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii);
+extern void BuildUpdateIndexInfo(ResultRelInfo *resultRelInfo);
extern void FormIndexDatum(IndexInfo *indexInfo,
TupleTableSlot *slot,
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 5929aabc353..b4c757af618 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -739,6 +739,11 @@ extern Bitmapset *ExecGetAllUpdatedCols(ResultRelInfo *relinfo, EState *estate);
*/
extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
+extern Bitmapset *ExecWhichIndexesRequireUpdates(ResultRelInfo *relinfo,
+ Bitmapset *mix_attrs,
+ EState *estate,
+ TupleTableSlot *old_tts,
+ TupleTableSlot *new_tts);
extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate,
bool update,
@@ -800,5 +805,9 @@ extern ResultRelInfo *ExecLookupResultRelByOid(ModifyTableState *node,
Oid resultoid,
bool missing_ok,
bool update_cache);
+extern Bitmapset *ExecCheckIndexedAttrsForChanges(ResultRelInfo *relinfo,
+ EState *estate,
+ TupleTableSlot *old_tts,
+ TupleTableSlot *new_tts);
#endif /* EXECUTOR_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index f8053d9e572..8a90afe315d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -176,15 +176,29 @@ typedef struct IndexInfo
*/
AttrNumber ii_IndexAttrNumbers[INDEX_MAX_KEYS];
+ /*
+ * All key, expression, sumarizing, and partition attributes referenced by
+ * this index
+ */
+ Bitmapset *ii_IndexedAttrs;
+
/* expr trees for expression entries, or NIL if none */
List *ii_Expressions; /* list of Expr */
/* exec state for expressions, or NIL if none */
List *ii_ExpressionsState; /* list of ExprState */
+ /* attributes exclusively referenced by expression indexes */
+ Bitmapset *ii_ExpressionsAttrs;
/* partial-index predicate, or NIL if none */
List *ii_Predicate; /* list of Expr */
/* exec state for expressions, or NIL if none */
ExprState *ii_PredicateState;
+ /* attributes referenced by the predicate */
+ Bitmapset *ii_PredicateAttrs;
+ /* partial index predicate determined yet? */
+ bool ii_CheckedPredicate;
+ /* amupdate hint used to avoid rechecking predicate */
+ bool ii_PredicateSatisfied;
/* Per-column exclusion operators, or NULL if none */
Oid *ii_ExclusionOps; /* array with one entry per column */
@@ -501,6 +515,12 @@ typedef struct ResultRelInfo
/* true if the above has been computed */
bool ri_extraUpdatedCols_valid;
+ /*
+ * For UPDATE a Bitmapset of the attributes that are both indexed and have
+ * changed in value.
+ */
+ Bitmapset *ri_ChangedIndexedCols;
+
/* Projection to generate new tuple in an INSERT/UPDATE */
ProjectionInfo *ri_projectNew;
/* Slot to hold that tuple */
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index d03ab247788..95b38abfd89 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -164,6 +164,7 @@ typedef struct RelationData
Bitmapset *rd_idattr; /* included in replica identity index */
Bitmapset *rd_hotblockingattr; /* cols blocking HOT update */
Bitmapset *rd_summarizedattr; /* cols indexed by summarizing indexes */
+ Bitmapset *rd_indexedattr; /* all cols referenced by indexes */
PublicationDesc *rd_pubdesc; /* publication descriptor, or NULL */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 2700224939a..5834ab7b903 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -71,6 +71,7 @@ typedef enum IndexAttrBitmapKind
INDEX_ATTR_BITMAP_IDENTITY_KEY,
INDEX_ATTR_BITMAP_HOT_BLOCKING,
INDEX_ATTR_BITMAP_SUMMARIZED,
+ INDEX_ATTR_BITMAP_INDEXED,
} IndexAttrBitmapKind;
extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation,
diff --git a/src/test/isolation/expected/insert-conflict-specconflict.out b/src/test/isolation/expected/insert-conflict-specconflict.out
index e34a821c403..54b3981918c 100644
--- a/src/test/isolation/expected/insert-conflict-specconflict.out
+++ b/src/test/isolation/expected/insert-conflict-specconflict.out
@@ -80,6 +80,10 @@ pg_advisory_unlock
t
(1 row)
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
s1: NOTICE: acquiring advisory lock on 2
s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
@@ -172,6 +176,10 @@ pg_advisory_unlock
t
(1 row)
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
s2: NOTICE: acquiring advisory lock on 2
s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
@@ -369,6 +377,10 @@ key|data
step s1_commit: COMMIT;
s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
s2: NOTICE: acquiring advisory lock on 2
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
+s2: NOTICE: blurt_and_lock_123() called for k1 in session 2
+s2: NOTICE: acquiring advisory lock on 2
step s2_upsert: <... completed>
step controller_show: SELECT * FROM upserttest;
key|data
@@ -530,6 +542,14 @@ isolation/insert-conflict-specconflict/s2|transactionid|ExclusiveLock|t
step s2_commit: COMMIT;
s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_123() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 2
+s1: NOTICE: blurt_and_lock_4() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 4
+s1: NOTICE: blurt_and_lock_4() called for k1 in session 1
+s1: NOTICE: acquiring advisory lock on 4
step s1_upsert: <... completed>
step s1_noop:
step controller_show: SELECT * FROM upserttest;
diff --git a/src/test/regress/expected/heap_hot_updates.out b/src/test/regress/expected/heap_hot_updates.out
new file mode 100644
index 00000000000..14276e3cbca
--- /dev/null
+++ b/src/test/regress/expected/heap_hot_updates.out
@@ -0,0 +1,650 @@
+-- ================================================================
+-- Test Suite for Heap-only (HOT) Updates
+-- ================================================================
+-- Setup: Create function to measure HOT updates
+CREATE OR REPLACE FUNCTION check_hot_updates(
+ expected INT,
+ p_table_name TEXT DEFAULT 't',
+ p_schema_name TEXT DEFAULT current_schema()
+)
+RETURNS TABLE (
+ table_name TEXT,
+ total_updates BIGINT,
+ hot_updates BIGINT,
+ hot_update_percentage NUMERIC,
+ matches_expected BOOLEAN
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+ v_relid oid;
+ v_qualified_name TEXT;
+ v_hot_updates BIGINT;
+ v_updates BIGINT;
+ v_xact_hot_updates BIGINT;
+ v_xact_updates BIGINT;
+BEGIN
+ -- Force statistics update
+ PERFORM pg_stat_force_next_flush();
+
+ -- Get table OID
+ v_qualified_name := quote_ident(p_schema_name) || '.' || quote_ident(p_table_name);
+ v_relid := v_qualified_name::regclass;
+
+ IF v_relid IS NULL THEN
+ RAISE EXCEPTION 'Table %.% not found', p_schema_name, p_table_name;
+ END IF;
+
+ -- Get cumulative + transaction stats
+ v_hot_updates := COALESCE(pg_stat_get_tuples_hot_updated(v_relid), 0);
+ v_updates := COALESCE(pg_stat_get_tuples_updated(v_relid), 0);
+ v_xact_hot_updates := COALESCE(pg_stat_get_xact_tuples_hot_updated(v_relid), 0);
+ v_xact_updates := COALESCE(pg_stat_get_xact_tuples_updated(v_relid), 0);
+
+ v_hot_updates := v_hot_updates + v_xact_hot_updates;
+ v_updates := v_updates + v_xact_updates;
+
+ RETURN QUERY
+ SELECT
+ p_table_name::TEXT,
+ v_updates::BIGINT,
+ v_hot_updates::BIGINT,
+ CASE WHEN v_updates > 0
+ THEN ROUND((v_hot_updates::numeric / v_updates::numeric * 100)::numeric, 2)
+ ELSE 0
+ END,
+ (v_hot_updates = expected)::BOOLEAN;
+END;
+$$;
+CREATE COLLATION case_insensitive (
+ provider = libc,
+ locale = 'C'
+);
+-- ================================================================
+-- GIN Index on JSONB
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data);
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "database"]}');
+-- Change tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Change tags again - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+-- Add field without changing existing keys - GIN keys changed (added "note"), NOT HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "note": "test"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 0 | 0.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- GIN Index with Unchanged Keys
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- Create GIN index on specific path
+CREATE INDEX t_gin_idx ON t USING gin((data->'tags'));
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "sql"], "status": "active"}');
+-- Change non-indexed field - GIN keys on 'tags' unchanged, should be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Change indexed tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- GIN with jsonb_path_ops
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data jsonb_path_ops);
+INSERT INTO t VALUES (1, '{"user": {"name": "alice"}, "tags": ["a", "b"]}');
+-- Change value at different path - keys changed, NOT HOT
+UPDATE t SET data = '{"user": {"name": "bob"}, "tags": ["a", "b"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- Mixed Index Types (BRIN + Expression)
+-- ================================================================
+CREATE TABLE t(id INT, value INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_idx ON t USING brin(value);
+CREATE INDEX t_expr_idx ON t((data->'status'));
+INSERT INTO t VALUES (1, 100, '{"status": "active"}');
+-- Update only BRIN column - should be HOT
+UPDATE t SET value = 200 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update only expression column - should NOT be HOT
+UPDATE t SET data = '{"status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update both - should NOT be HOT
+UPDATE t SET value = 300, data = '{"status": "pending"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- GIN Array Index - Order Insensitive Extraction
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ data JSONB
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+-- GIN index on JSONB array (extracts all elements)
+CREATE INDEX t_items_gin ON t USING GIN ((data->'items'));
+INSERT INTO t VALUES (1, '{"items": [1, 2, 3], "status": "active"}');
+-- Update: Reorder array elements
+-- JSONB equality: NOT equal (different arrays)
+-- GIN extraction: Same elements extracted (might allow HOT if not careful)
+UPDATE t SET data = '{"items": [3, 2, 1], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update: Add/remove element
+UPDATE t SET data = '{"items": [1, 2, 3, 4], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+DROP TABLE t;
+-- ================================================================
+-- TEST: GIN with TOASTed TEXT (tsvector)
+-- ================================================================
+CREATE TABLE t(id INT, content TEXT, search_vec tsvector)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- Create trigger to maintain tsvector
+CREATE TRIGGER tsvectorupdate_toast
+ BEFORE INSERT OR UPDATE ON t
+ FOR EACH ROW EXECUTE FUNCTION
+ tsvector_update_trigger(search_vec, 'pg_catalog.english', content);
+CREATE INDEX t_gin ON t USING gin(search_vec);
+-- Insert with large content (will be TOASTed)
+INSERT INTO t (id, content) VALUES
+ (1, repeat('important keyword ', 1000) || repeat('filler text ', 10000));
+-- Verify initial state
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('important');
+ count
+-------
+ 1
+(1 row)
+
+-- Expected: 1 row
+-- IMPORTANT: The BEFORE UPDATE trigger modifies search_vec, so by the time
+-- ExecWhichIndexesRequireUpdates() runs, search_vec has already changed.
+-- This means the comparison sees old tsvector vs. trigger-modified tsvector,
+-- not the natural progression. HOT won't happen because the trigger changed
+-- the indexed column.
+-- Update: Even though content keywords unchanged, trigger still fires
+UPDATE t
+SET content = repeat('important keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (trigger modifies search_vec, blocking HOT)
+-- This is actually correct behavior - the trigger updated an indexed column
+-- Update: Change indexed keywords
+UPDATE t
+SET content = repeat('critical keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (index keys changed)
+-- Verify query correctness
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('critical');
+ count
+-------
+ 1
+(1 row)
+
+-- Expected: 1 row
+DROP TABLE t CASCADE;
+-- ================================================================
+-- TEST: GIN with Array of Large Strings
+-- ================================================================
+CREATE TABLE t(id INT, tags TEXT[])
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin ON t USING gin(tags);
+-- Insert with large array elements (might be TOASTed)
+INSERT INTO t (id, tags) VALUES
+ (1, ARRAY[repeat('tag1', 1000), repeat('tag2', 1000)]);
+-- Update: Change to different large values - NOT HOT
+UPDATE t
+SET tags = ARRAY[repeat('tag3', 1000), repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (keys actually changed)
+-- Update: Keep same tag values, just reorder - SHOULD BE HOT
+-- (GIN is order-insensitive: both [tag3,tag4] and [tag4,tag3]
+-- extract to the same sorted key set ['tag3','tag4'])
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000), repeat('tag3', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Expected: 1 HOT (GIN keys semantically identical)
+-- Update: Remove an element - NOT HOT (keys changed)
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Expected: Still 1 HOT (not this one)
+DROP TABLE t CASCADE;
+-- ================================================================
+-- BRIN Index with Partial Predicate
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ value INT,
+ description TEXT
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_partial_idx ON t USING brin(value) WHERE value > 100;
+INSERT INTO t VALUES (1, 50, 'below range');
+-- Test 1: Outside predicate
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Validate: Predicate query returns 0 rows
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+ cnt
+-----
+ 0
+(1 row)
+
+-- Test 2: Transition into predicate
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Validate: Predicate query returns 1 row with correct value
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+ cnt | max_val
+-----+---------
+ 1 | 150
+(1 row)
+
+-- Test 3: Inside predicate, value changes
+UPDATE t SET value = 160, description = 'updated again' WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 3 | 100.00 | t
+(1 row)
+
+-- Validate: Updated value (160) is returned
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+ cnt | max_val
+-----+---------
+ 1 | 160
+(1 row)
+
+-- Test 4: Transition out of predicate
+UPDATE t SET value = 50 WHERE id = 1;
+SELECT * FROM check_hot_updates(4);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 4 | 100.00 | t
+(1 row)
+
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+ cnt
+-----
+ 0
+(1 row)
+
+SELECT id, value, description FROM t;
+ id | value | description
+----+-------+---------------
+ 1 | 50 | updated again
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- HASH Index (Simple Column)
+-- ================================================================
+CREATE TABLE t(id INT, code VARCHAR(20), description TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_idx ON t USING hash(code);
+INSERT INTO t VALUES (1, 'CODE001', 'initial');
+-- Update non-indexed column - should be HOT
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update indexed column - HASH index requires update, NOT HOT
+UPDATE t SET code = 'CODE002' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update both - NOT HOT
+UPDATE t SET code = 'CODE003', description = 'changed' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Back to original code - NOT HOT (different hash bucket location)
+UPDATE t SET code = 'CODE001' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 1 | 25.00 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- HASH Index on Expression
+-- ================================================================
+CREATE TABLE t(id INT, email TEXT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_lower_email_idx ON t USING HASH(lower(email));
+INSERT INTO t VALUES (1, '[email protected]', '{"status": "new"}');
+-- Update non-indexed field - should be HOT
+UPDATE t SET data = '{"status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update email with case change only (same lowercase) - should be HOT
+UPDATE t SET email = '[email protected]' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Update email to different lowercase - NOT HOT
+UPDATE t SET email = '[email protected]' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 2 | 66.67 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Multiple HASH Indexes
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, status VARCHAR, value INT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+CREATE INDEX t_hash_status_idx ON t USING hash(status);
+INSERT INTO t VALUES (1, 'electronics', 'active', 100);
+-- Update non-indexed column - should be HOT
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Update one indexed column - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 1 | 50.00 | t
+(1 row)
+
+-- Update other indexed column - NOT HOT
+UPDATE t SET status = 'inactive' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Update both indexed columns - NOT HOT
+UPDATE t SET category = 'videos', status = 'pending' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 1 | 25.00 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- ================================================================
+-- BRIN vs HASH Comparison
+-- ================================================================
+CREATE TABLE t_brin(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE TABLE t_hash(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_value_idx ON t_brin USING brin(value);
+CREATE INDEX t_hash_value_idx ON t_hash USING hash(value);
+INSERT INTO t_brin VALUES (1, 100, 'initial');
+INSERT INTO t_hash VALUES (1, 100, 'initial');
+-- Same update on both - different HOT behavior expected
+-- BRIN: might allow HOT (range summary unchanged)
+-- HASH: blocks HOT (hash bucket changed)
+UPDATE t_brin SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1, 't_brin');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t_brin | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Expected: 1 HOT (BRIN allows it for single row)
+UPDATE t_hash SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 't_hash');
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t_hash | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT (HASH blocks it)
+DROP TABLE t_brin CASCADE;
+DROP TABLE t_hash CASCADE;
+-- ================================================================
+-- HASH Index with NULL Values
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'electronics', 'initial');
+-- Update indexed column to NULL - NOT HOT (hash value changed)
+UPDATE t SET category = NULL WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT
+-- Update indexed column from NULL to value - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 0 | 0.00 | t
+(1 row)
+
+-- Expected: 0 HOT
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 1 | 33.33 | t
+(1 row)
+
+-- Expected: 1 HOT
+DROP TABLE t CASCADE;
+-- ================================================================
+-- BRIN on JSONB Field
+-- ================================================================
+CREATE TABLE t(id INT, metrics JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- BRIN doesn't directly support JSONB, but we can test on expression
+CREATE INDEX t_brin_count_idx ON t USING brin(
+ CAST(metrics->>'count' AS INTEGER)
+);
+INSERT INTO t VALUES (1, '{"count": "100", "timestamp": "2024-01-01"}');
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET metrics = '{"count": "100", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Expected: 1 HOT
+-- Update indexed field - BRIN allows HOT for single row
+UPDATE t SET metrics = '{"count": "150", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Expected: 2 HOT (BRIN permits single-row updates)
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Mixed BRIN + HASH on Same Table
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, timestamp TIMESTAMP, price NUMERIC, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_timestamp_idx ON t USING brin(timestamp);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'books', '2024-01-01 10:00:00', 29.99, 'initial');
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 1 | 100.00 | t
+(1 row)
+
+-- Expected: 1 HOT
+-- Update BRIN indexed column - allows HOT
+UPDATE t SET timestamp = '2024-01-02 10:00:00' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 2 | 2 | 100.00 | t
+(1 row)
+
+-- Expected: 2 HOT
+-- Update HASH indexed column - blocks HOT
+UPDATE t SET category = 'videos' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 3 | 2 | 66.67 | t
+(1 row)
+
+-- Expected: 2 HOT (HASH blocks it)
+-- Update price (non-indexed) - should be HOT
+UPDATE t SET price = 39.99 WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 4 | 3 | 75.00 | t
+(1 row)
+
+-- Expected: 3 HOT
+DROP TABLE t CASCADE;
+-- ================================================================
+-- Index both on a field in a JSONB document, and the document
+-- ================================================================
+CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_idx ON t((docs->'name'));
+CREATE INDEX t_docs_col_idx ON t(docs);
+INSERT INTO t VALUES (1, '{"name": "john", "data": "some data"}');
+-- Update impacts index on whole docment attribute, can't go HOT
+UPDATE t SET docs='{"name": "john", "data": "some other data"}' WHERE id=1;
+SELECT * FROM check_hot_updates(0);
+ table_name | total_updates | hot_updates | hot_update_percentage | matches_expected
+------------+---------------+-------------+-----------------------+------------------
+ t | 1 | 0 | 0.00 | t
+(1 row)
+
+DROP TABLE t CASCADE;
+-- Cleanup
+DROP FUNCTION check_hot_updates(int, text, text);
+DROP COLLATION case_insensitive;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 021d57f66bb..2d6641992e9 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -125,6 +125,12 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
# ----------
test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate
+
+# ----------
+# Another group of parallel tests, these focused on heap HOT updates
+# ----------
+test: heap_hot_updates
+
# event_trigger depends on create_am and cannot run concurrently with
# any test that runs DDL
# oidjoins is read-only, though, and should run late for best coverage
diff --git a/src/test/regress/sql/heap_hot_updates.sql b/src/test/regress/sql/heap_hot_updates.sql
new file mode 100644
index 00000000000..e047bcddf5c
--- /dev/null
+++ b/src/test/regress/sql/heap_hot_updates.sql
@@ -0,0 +1,513 @@
+-- ================================================================
+-- Test Suite for Heap-only (HOT) Updates
+-- ================================================================
+
+-- Setup: Create function to measure HOT updates
+CREATE OR REPLACE FUNCTION check_hot_updates(
+ expected INT,
+ p_table_name TEXT DEFAULT 't',
+ p_schema_name TEXT DEFAULT current_schema()
+)
+RETURNS TABLE (
+ table_name TEXT,
+ total_updates BIGINT,
+ hot_updates BIGINT,
+ hot_update_percentage NUMERIC,
+ matches_expected BOOLEAN
+)
+LANGUAGE plpgsql
+AS $$
+DECLARE
+ v_relid oid;
+ v_qualified_name TEXT;
+ v_hot_updates BIGINT;
+ v_updates BIGINT;
+ v_xact_hot_updates BIGINT;
+ v_xact_updates BIGINT;
+BEGIN
+ -- Force statistics update
+ PERFORM pg_stat_force_next_flush();
+
+ -- Get table OID
+ v_qualified_name := quote_ident(p_schema_name) || '.' || quote_ident(p_table_name);
+ v_relid := v_qualified_name::regclass;
+
+ IF v_relid IS NULL THEN
+ RAISE EXCEPTION 'Table %.% not found', p_schema_name, p_table_name;
+ END IF;
+
+ -- Get cumulative + transaction stats
+ v_hot_updates := COALESCE(pg_stat_get_tuples_hot_updated(v_relid), 0);
+ v_updates := COALESCE(pg_stat_get_tuples_updated(v_relid), 0);
+ v_xact_hot_updates := COALESCE(pg_stat_get_xact_tuples_hot_updated(v_relid), 0);
+ v_xact_updates := COALESCE(pg_stat_get_xact_tuples_updated(v_relid), 0);
+
+ v_hot_updates := v_hot_updates + v_xact_hot_updates;
+ v_updates := v_updates + v_xact_updates;
+
+ RETURN QUERY
+ SELECT
+ p_table_name::TEXT,
+ v_updates::BIGINT,
+ v_hot_updates::BIGINT,
+ CASE WHEN v_updates > 0
+ THEN ROUND((v_hot_updates::numeric / v_updates::numeric * 100)::numeric, 2)
+ ELSE 0
+ END,
+ (v_hot_updates = expected)::BOOLEAN;
+END;
+$$;
+
+CREATE COLLATION case_insensitive (
+ provider = libc,
+ locale = 'C'
+);
+
+
+-- ================================================================
+-- GIN Index on JSONB
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data);
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "database"]}');
+
+-- Change tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+-- Change tags again - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+-- Add field without changing existing keys - GIN keys changed (added "note"), NOT HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "note": "test"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+DROP TABLE t;
+
+
+-- ================================================================
+-- GIN Index with Unchanged Keys
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- Create GIN index on specific path
+CREATE INDEX t_gin_idx ON t USING gin((data->'tags'));
+INSERT INTO t VALUES (1, '{"tags": ["postgres", "sql"], "status": "active"}');
+
+-- Change non-indexed field - GIN keys on 'tags' unchanged, should be HOT
+UPDATE t SET data = '{"tags": ["postgres", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Change indexed tags - GIN keys changed, should NOT be HOT
+UPDATE t SET data = '{"tags": ["mysql", "sql"], "status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+
+-- ================================================================
+-- GIN with jsonb_path_ops
+-- ================================================================
+CREATE TABLE t(id INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin_idx ON t USING gin(data jsonb_path_ops);
+INSERT INTO t VALUES (1, '{"user": {"name": "alice"}, "tags": ["a", "b"]}');
+
+-- Change value at different path - keys changed, NOT HOT
+UPDATE t SET data = '{"user": {"name": "bob"}, "tags": ["a", "b"]}' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+
+DROP TABLE t;
+
+
+-- ================================================================
+-- Mixed Index Types (BRIN + Expression)
+-- ================================================================
+CREATE TABLE t(id INT, value INT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_idx ON t USING brin(value);
+CREATE INDEX t_expr_idx ON t((data->'status'));
+INSERT INTO t VALUES (1, 100, '{"status": "active"}');
+
+-- Update only BRIN column - should be HOT
+UPDATE t SET value = 200 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update only expression column - should NOT be HOT
+UPDATE t SET data = '{"status": "inactive"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update both - should NOT be HOT
+UPDATE t SET value = 300, data = '{"status": "pending"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+
+-- ================================================================
+-- GIN Array Index - Order Insensitive Extraction
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ data JSONB
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+
+-- GIN index on JSONB array (extracts all elements)
+CREATE INDEX t_items_gin ON t USING GIN ((data->'items'));
+
+INSERT INTO t VALUES (1, '{"items": [1, 2, 3], "status": "active"}');
+
+-- Update: Reorder array elements
+-- JSONB equality: NOT equal (different arrays)
+-- GIN extraction: Same elements extracted (might allow HOT if not careful)
+UPDATE t SET data = '{"items": [3, 2, 1], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update: Add/remove element
+UPDATE t SET data = '{"items": [1, 2, 3, 4], "status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t;
+
+
+-- ================================================================
+-- TEST: GIN with TOASTed TEXT (tsvector)
+-- ================================================================
+CREATE TABLE t(id INT, content TEXT, search_vec tsvector)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+
+-- Create trigger to maintain tsvector
+CREATE TRIGGER tsvectorupdate_toast
+ BEFORE INSERT OR UPDATE ON t
+ FOR EACH ROW EXECUTE FUNCTION
+ tsvector_update_trigger(search_vec, 'pg_catalog.english', content);
+
+CREATE INDEX t_gin ON t USING gin(search_vec);
+
+-- Insert with large content (will be TOASTed)
+INSERT INTO t (id, content) VALUES
+ (1, repeat('important keyword ', 1000) || repeat('filler text ', 10000));
+
+-- Verify initial state
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('important');
+-- Expected: 1 row
+
+-- IMPORTANT: The BEFORE UPDATE trigger modifies search_vec, so by the time
+-- ExecWhichIndexesRequireUpdates() runs, search_vec has already changed.
+-- This means the comparison sees old tsvector vs. trigger-modified tsvector,
+-- not the natural progression. HOT won't happen because the trigger changed
+-- the indexed column.
+
+-- Update: Even though content keywords unchanged, trigger still fires
+UPDATE t
+SET content = repeat('important keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT (trigger modifies search_vec, blocking HOT)
+-- This is actually correct behavior - the trigger updated an indexed column
+
+-- Update: Change indexed keywords
+UPDATE t
+SET content = repeat('critical keyword ', 1000) || repeat('different filler ', 10000)
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT (index keys changed)
+
+-- Verify query correctness
+SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('critical');
+-- Expected: 1 row
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- TEST: GIN with Array of Large Strings
+-- ================================================================
+CREATE TABLE t(id INT, tags TEXT[])
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_gin ON t USING gin(tags);
+
+-- Insert with large array elements (might be TOASTed)
+INSERT INTO t (id, tags) VALUES
+ (1, ARRAY[repeat('tag1', 1000), repeat('tag2', 1000)]);
+
+-- Update: Change to different large values - NOT HOT
+UPDATE t
+SET tags = ARRAY[repeat('tag3', 1000), repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT (keys actually changed)
+
+-- Update: Keep same tag values, just reorder - SHOULD BE HOT
+-- (GIN is order-insensitive: both [tag3,tag4] and [tag4,tag3]
+-- extract to the same sorted key set ['tag3','tag4'])
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000), repeat('tag3', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT (GIN keys semantically identical)
+
+-- Update: Remove an element - NOT HOT (keys changed)
+UPDATE t
+SET tags = ARRAY[repeat('tag4', 1000)]
+WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: Still 1 HOT (not this one)
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- BRIN Index with Partial Predicate
+-- ================================================================
+CREATE TABLE t(
+ id INT PRIMARY KEY,
+ value INT,
+ description TEXT
+) WITH (autovacuum_enabled = off, fillfactor = 70);
+
+CREATE INDEX t_brin_partial_idx ON t USING brin(value) WHERE value > 100;
+
+INSERT INTO t VALUES (1, 50, 'below range');
+
+-- Test 1: Outside predicate
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Validate: Predicate query returns 0 rows
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+
+-- Test 2: Transition into predicate
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+-- Validate: Predicate query returns 1 row with correct value
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+
+-- Test 3: Inside predicate, value changes
+UPDATE t SET value = 160, description = 'updated again' WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+
+-- Validate: Updated value (160) is returned
+SELECT COUNT(*) as cnt, MAX(value) as max_val FROM t WHERE value > 100;
+
+-- Test 4: Transition out of predicate
+UPDATE t SET value = 50 WHERE id = 1;
+SELECT * FROM check_hot_updates(4);
+
+SELECT COUNT(*) as cnt FROM t WHERE value > 100;
+
+SELECT id, value, description FROM t;
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- HASH Index (Simple Column)
+-- ================================================================
+CREATE TABLE t(id INT, code VARCHAR(20), description TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_idx ON t USING hash(code);
+INSERT INTO t VALUES (1, 'CODE001', 'initial');
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET description = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update indexed column - HASH index requires update, NOT HOT
+UPDATE t SET code = 'CODE002' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update both - NOT HOT
+UPDATE t SET code = 'CODE003', description = 'changed' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Back to original code - NOT HOT (different hash bucket location)
+UPDATE t SET code = 'CODE001' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- HASH Index on Expression
+-- ================================================================
+CREATE TABLE t(id INT, email TEXT, data JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_lower_email_idx ON t USING HASH(lower(email));
+INSERT INTO t VALUES (1, '[email protected]', '{"status": "new"}');
+
+-- Update non-indexed field - should be HOT
+UPDATE t SET data = '{"status": "active"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update email with case change only (same lowercase) - should be HOT
+UPDATE t SET email = '[email protected]' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+-- Update email to different lowercase - NOT HOT
+UPDATE t SET email = '[email protected]' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- Multiple HASH Indexes
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, status VARCHAR, value INT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+CREATE INDEX t_hash_status_idx ON t USING hash(status);
+INSERT INTO t VALUES (1, 'electronics', 'active', 100);
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update one indexed column - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update other indexed column - NOT HOT
+UPDATE t SET status = 'inactive' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+-- Update both indexed columns - NOT HOT
+UPDATE t SET category = 'videos', status = 'pending' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- BRIN vs HASH Comparison
+-- ================================================================
+CREATE TABLE t_brin(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE TABLE t_hash(id INT, value INT, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+
+CREATE INDEX t_brin_value_idx ON t_brin USING brin(value);
+CREATE INDEX t_hash_value_idx ON t_hash USING hash(value);
+
+INSERT INTO t_brin VALUES (1, 100, 'initial');
+INSERT INTO t_hash VALUES (1, 100, 'initial');
+
+-- Same update on both - different HOT behavior expected
+-- BRIN: might allow HOT (range summary unchanged)
+-- HASH: blocks HOT (hash bucket changed)
+UPDATE t_brin SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(1, 't_brin');
+-- Expected: 1 HOT (BRIN allows it for single row)
+
+UPDATE t_hash SET value = 150 WHERE id = 1;
+SELECT * FROM check_hot_updates(0, 't_hash');
+-- Expected: 0 HOT (HASH blocks it)
+
+DROP TABLE t_brin CASCADE;
+DROP TABLE t_hash CASCADE;
+
+
+-- ================================================================
+-- HASH Index with NULL Values
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'electronics', 'initial');
+
+-- Update indexed column to NULL - NOT HOT (hash value changed)
+UPDATE t SET category = NULL WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT
+
+-- Update indexed column from NULL to value - NOT HOT
+UPDATE t SET category = 'books' WHERE id = 1;
+SELECT * FROM check_hot_updates(0);
+-- Expected: 0 HOT
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- BRIN on JSONB Field
+-- ================================================================
+CREATE TABLE t(id INT, metrics JSONB)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+-- BRIN doesn't directly support JSONB, but we can test on expression
+CREATE INDEX t_brin_count_idx ON t USING brin(
+ CAST(metrics->>'count' AS INTEGER)
+);
+INSERT INTO t VALUES (1, '{"count": "100", "timestamp": "2024-01-01"}');
+
+-- Update non-indexed JSONB field - should be HOT
+UPDATE t SET metrics = '{"count": "100", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT
+
+-- Update indexed field - BRIN allows HOT for single row
+UPDATE t SET metrics = '{"count": "150", "timestamp": "2024-01-02"}' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+-- Expected: 2 HOT (BRIN permits single-row updates)
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- Mixed BRIN + HASH on Same Table
+-- ================================================================
+CREATE TABLE t(id INT, category VARCHAR, timestamp TIMESTAMP, price NUMERIC, data TEXT)
+ WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_brin_timestamp_idx ON t USING brin(timestamp);
+CREATE INDEX t_hash_category_idx ON t USING hash(category);
+INSERT INTO t VALUES (1, 'books', '2024-01-01 10:00:00', 29.99, 'initial');
+
+-- Update non-indexed column - should be HOT
+UPDATE t SET data = 'updated' WHERE id = 1;
+SELECT * FROM check_hot_updates(1);
+-- Expected: 1 HOT
+
+-- Update BRIN indexed column - allows HOT
+UPDATE t SET timestamp = '2024-01-02 10:00:00' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+-- Expected: 2 HOT
+
+-- Update HASH indexed column - blocks HOT
+UPDATE t SET category = 'videos' WHERE id = 1;
+SELECT * FROM check_hot_updates(2);
+-- Expected: 2 HOT (HASH blocks it)
+
+-- Update price (non-indexed) - should be HOT
+UPDATE t SET price = 39.99 WHERE id = 1;
+SELECT * FROM check_hot_updates(3);
+-- Expected: 3 HOT
+
+DROP TABLE t CASCADE;
+
+
+-- ================================================================
+-- Index both on a field in a JSONB document, and the document
+-- ================================================================
+CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) WITH (autovacuum_enabled = off, fillfactor = 70);
+CREATE INDEX t_docs_idx ON t((docs->'name'));
+CREATE INDEX t_docs_col_idx ON t(docs);
+INSERT INTO t VALUES (1, '{"name": "john", "data": "some data"}');
+
+-- Update impacts index on whole docment attribute, can't go HOT
+UPDATE t SET docs='{"name": "john", "data": "some other data"}' WHERE id=1;
+SELECT * FROM check_hot_updates(0);
+
+DROP TABLE t CASCADE;
+
+
+-- Cleanup
+DROP FUNCTION check_hot_updates(int, text, text);
+DROP COLLATION case_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 14dec2d49c1..c38e7b1e9ad 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -400,6 +400,7 @@ CachedFunctionCompileCallback
CachedFunctionDeleteCallback
CachedFunctionHashEntry
CachedFunctionHashKey
+CachedIndexDatum
CachedPlan
CachedPlanSource
CallContext
--
2.51.2
^ permalink raw reply [nested|flat] 2+ messages in thread
end of thread, other threads:[~2026-01-13 14:54 UTC | newest]
Thread overview: 2+ messages (download: mbox mbox.gz follow: Atom feed)
-- links below jump to the message on this page --
2026-01-08 20:25 Re: Expanding HOT updates for expression and partial indexes Greg Burd <[email protected]>
2026-01-13 14:54 ` Greg Burd <[email protected]>
This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox