From 103989dcbe91603da753b7e9647ad12df888cfb4 Mon Sep 17 00:00:00 2001 From: nkey Date: Tue, 24 Dec 2024 19:17:25 +0100 Subject: [PATCH v9 8/9] Concurrently built index validation uses fresh snapshots This commit modifies the validation process for concurrently built indexes to use fresh snapshots instead of a single reference snapshot. The previous approach of using a single reference snapshot could lead to issues with xmin propagation. Specifically, if the index build took a long time, the reference snapshot's xmin could become outdated, causing the index to miss tuples that were deleted by transactions that committed after the reference snapshot was taken. To address this, the validation process now periodically replaces the snapshot with a newer one. This ensures that the index's xmin is kept up-to-date and that all relevant tuples are included in the index. The interval for replacing the snapshot is controlled by the `VALIDATE_INDEX_SNAPSHOT_RESET_INTERVAL` constant, which is currently set to 1000 milliseconds. --- src/backend/access/heap/README.HOT | 15 +++++--- src/backend/access/heap/heapam_handler.c | 45 ++++++++++++++++++------ src/backend/access/nbtree/nbtsort.c | 2 +- src/backend/catalog/index.c | 7 ++-- src/backend/commands/indexcmds.c | 2 +- src/include/access/transam.h | 15 ++++++++ 6 files changed, 66 insertions(+), 20 deletions(-) diff --git a/src/backend/access/heap/README.HOT b/src/backend/access/heap/README.HOT index 829dad1194e..d41609c97cd 100644 --- a/src/backend/access/heap/README.HOT +++ b/src/backend/access/heap/README.HOT @@ -375,6 +375,11 @@ constraint on which updates can be HOT. Other transactions must include such an index when determining HOT-safety of updates, even though they must ignore it for both insertion and searching purposes. +Also, special auxiliary index is created the same way. It marked as +"ready for inserts" without any actual table scan. Its purpose is collect +new tuples inserted into table while our target index is still "not ready +for inserts" + We must do this to avoid making incorrect index entries. For example, suppose we are building an index on column X and we make an index entry for a non-HOT tuple with X=1. Then some other backend, unaware that X is an @@ -394,14 +399,14 @@ As above, we point the index entry at the root of the HOT-update chain but we use the key value from the live tuple. We mark the index open for inserts (but still not ready for reads) then -we again wait for transactions which have the table open. Then we take -a second reference snapshot and validate the index. This searches for -tuples missing from the index, and inserts any missing ones. Again, -the index entries have to have TIDs equal to HOT-chain root TIDs, but +we again wait for transactions which have the table open. Then validate +the index. This searches for tuples missing from the index in auxiliary +index, and inserts any missing ones if them visible to fresh snapshot. +Again, the index entries have to have TIDs equal to HOT-chain root TIDs, but the value to be inserted is the one from the live tuple. Then we wait until every transaction that could have a snapshot older than -the second reference snapshot is finished. This ensures that nobody is +the latest used snapshot is finished. This ensures that nobody is alive any longer who could need to see any tuples that might be missing from the index, as well as ensuring that no one can see any inconsistent rows in a broken HOT chain (the first condition is stronger than the diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index ecec3c1c080..1a041c5a77b 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -1806,27 +1806,35 @@ heapam_index_validate_scan(Relation heapRelation, fetched; bool tuplesort_empty = false, auxtuplesort_empty = false; + instr_time snapshotTime, + currentTime; Assert(!HaveRegisteredOrActiveSnapshot()); Assert(!TransactionIdIsValid(MyProc->xmin)); +#define VALIDATE_INDEX_SNAPSHOT_RESET_INTERVAL 1000 /* - * Now take the "reference snapshot" that will be used by to filter candidate - * tuples. Beware! There might still be snapshots in - * use that treat some transaction as in-progress that our reference - * snapshot treats as committed. If such a recently-committed transaction - * deleted tuples in the table, we will not include them in the index; yet - * those transactions which see the deleting one as still-in-progress will - * expect such tuples to be there once we mark the index as valid. + * Now take the first snapshot that will be used by to filter candidate + * tuples. We are going to replace it by newer snapshot every so often + * to propagate horizon. + * + * Beware! There might still be snapshots in use that treat some transaction + * as in-progress that our temporary snapshot treats as committed. + * + * If such a recently-committed transaction deleted tuples in the table, + * we will not include them in the index; yet those transactions which + * see the deleting one as still-in-progress will expect such tuples to + * be there once we mark the index as valid. * * We solve this by waiting for all endangered transactions to exit before - * we mark the index as valid. + * we mark the index as valid, for that reason limitX is supported. * * We also set ActiveSnapshot to this snap, since functions in indexes may * need a snapshot. */ - snapshot = RegisterSnapshot(GetTransactionSnapshot()); + snapshot = RegisterSnapshot(GetLatestSnapshot()); PushActiveSnapshot(snapshot); + INSTR_TIME_SET_CURRENT(snapshotTime); limitXmin = snapshot->xmin; /* @@ -1868,6 +1876,23 @@ heapam_index_validate_scan(Relation heapRelation, bool ts_isnull; CHECK_FOR_INTERRUPTS(); + INSTR_TIME_SET_CURRENT(currentTime); + INSTR_TIME_SUBTRACT(currentTime, snapshotTime); + if (INSTR_TIME_GET_MILLISEC(currentTime) >= VALIDATE_INDEX_SNAPSHOT_RESET_INTERVAL) + { + PopActiveSnapshot(); + UnregisterSnapshot(snapshot); + /* to make sure we propagate xmin */ + InvalidateCatalogSnapshot(); + Assert(!TransactionIdIsValid(MyProc->xmin)); + + snapshot = RegisterSnapshot(GetLatestSnapshot()); + PushActiveSnapshot(snapshot); + /* xmin should not go backwards, but just for the case*/ + limitXmin = TransactionIdNewer(limitXmin, snapshot->xmin); + INSTR_TIME_SET_CURRENT(snapshotTime); + } + /* * Attempt to fetch the next TID from the auxiliary sort. If it's * empty, we set auxindexcursor to NULL. @@ -2020,7 +2045,7 @@ heapam_index_validate_scan(Relation heapRelation, heapam_index_fetch_end(fetch); /* - * Drop the reference snapshot. We must do this before waiting out other + * Drop the latest snapshot. We must do this before waiting out other * snapshot holders, else we will deadlock against other processes also * doing CREATE INDEX CONCURRENTLY, which would see our snapshot as one * they must wait for. diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 38355601421..60551f82bfa 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -442,7 +442,7 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, * dead tuples) won't get very full, so we give it only work_mem. * * In case of concurrent build dead tuples are not need to be put into index - * since we wait for all snapshots older than reference snapshot during the + * since we wait for all snapshots older than latest snapshot during the * validation phase. */ if (indexInfo->ii_Unique && !indexInfo->ii_Concurrent) diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 8b14f66affc..b4df2b1eee6 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3472,8 +3472,9 @@ IndexCheckExclusion(Relation heapRelation, * insert their new tuples into it. At that moment we clear "indisready" for * auxiliary index, since it is no more required/ * - * We then take a new reference snapshot, any tuples that are valid according - * to this snap, but are not in the index, must be added to the index. + * We then take a new snapshot, any tuples that are valid according + * to this snap, but are not in the index, must be added to the index. In + * order to propagate xmin we reset that snapshot every few so often. * (Any tuples committed live after the snap will be inserted into the * index by their originating transaction. Any tuples committed dead before * the snap need not be indexed, because we will wait out all transactions @@ -3486,7 +3487,7 @@ IndexCheckExclusion(Relation heapRelation, * TIDs of both auxiliary and target indexes, and doing a "merge join" against * the TID lists to see which tuples from auxiliary index are missing from the * target index. Thus we will ensure that all tuples valid according to the - * reference snapshot are in the index. Notice we need to do bulkdelete in the + * latest snapshot are in the index. Notice we need to do bulkdelete in the * particular order: auxiliary first, target last. * * Building a unique index this way is tricky: we might try to insert a diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 02b636a0050..71baeced508 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -4328,7 +4328,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein /* * The index is now valid in the sense that it contains all currently * interesting tuples. But since it might not contain tuples deleted - * just before the reference snap was taken, we have to wait out any + * just before the latest snap was taken, we have to wait out any * transactions that might have older snapshots. * * Because we don't take a snapshot or Xid in this transaction, diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 28a2d287fd5..90d358804e4 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -355,6 +355,21 @@ NormalTransactionIdOlder(TransactionId a, TransactionId b) return b; } +/* return the newer of the two IDs */ +static inline TransactionId +TransactionIdNewer(TransactionId a, TransactionId b) +{ + if (!TransactionIdIsValid(a)) + return b; + + if (!TransactionIdIsValid(b)) + return a; + + if (TransactionIdFollows(a, b)) + return a; + return b; +} + /* return the newer of the two IDs */ static inline FullTransactionId FullTransactionIdNewer(FullTransactionId a, FullTransactionId b) -- 2.43.0