From 003233318e7e92cfd029e1ae2d7ab2959a251cb3 Mon Sep 17 00:00:00 2001 From: nkey Date: Tue, 31 Dec 2024 14:14:38 +0100 Subject: [PATCH v11 08/11] Concurrently built index validation uses fresh snapshots This commit modifies the validation process for concurrently built indexes to use fresh snapshots instead of a single reference snapshot. The previous approach of using a single reference snapshot could lead to issues with xmin propagation. Specifically, if the index build took a long time, the reference snapshot's xmin could become outdated, causing the index to miss tuples that were deleted by transactions that committed after the reference snapshot was taken. To address this, the validation process now periodically replaces the snapshot with a newer one. This ensures that the index's xmin is kept up-to-date and that all relevant tuples are included in the index. The interval for replacing the snapshot is controlled by the `VALIDATE_INDEX_SNAPSHOT_RESET_INTERVAL` constant, which is currently set to 1000 milliseconds. --- doc/src/sgml/ref/create_index.sgml | 11 ++++-- doc/src/sgml/ref/reindex.sgml | 11 +++--- src/backend/access/heap/README.HOT | 15 +++++--- src/backend/access/heap/heapam_handler.c | 45 ++++++++++++++++++------ src/backend/access/nbtree/nbtsort.c | 2 +- src/backend/access/spgist/spgvacuum.c | 12 +++++-- src/backend/catalog/index.c | 19 +++++++--- src/backend/commands/indexcmds.c | 2 +- src/include/access/transam.h | 15 ++++++++ 9 files changed, 100 insertions(+), 32 deletions(-) diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index e33345f6a34..54566223cb0 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -868,9 +868,14 @@ Indexes: - Like any long-running transaction, CREATE INDEX on a - table can affect which tuples can be removed by concurrent - VACUUM on any other table. + Due to the improved implementation using periodically refreshed snapshots and + auxiliary indexes, concurrent index builds have minimal impact on concurrent + VACUUM operations. The system automatically advances its + internal transaction horizon during the build process, allowing + VACUUM to remove dead tuples on other tables without + having to wait for the entire index build to complete. Only during very brief + periods when snapshots are being refreshed might there be any temporary effect + on concurrent VACUUM operations. diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml index 6a05620bd67..64c633e0398 100644 --- a/doc/src/sgml/ref/reindex.sgml +++ b/doc/src/sgml/ref/reindex.sgml @@ -495,10 +495,13 @@ Indexes: - Like any long-running transaction, REINDEX on a table - can affect which tuples can be removed by concurrent - VACUUM on any other table. - + REINDEX CONCURRENTLY has minimal + impact on which tuples can be removed by concurrent VACUUM + operations on other tables. This is achieved through periodic snapshot + refreshes and the use of auxiliary indexes during the rebuild process, + allowing the system to advance its transaction horizon regularly rather than + maintaining a single long-running snapshot. + REINDEX SYSTEM does not support diff --git a/src/backend/access/heap/README.HOT b/src/backend/access/heap/README.HOT index 829dad1194e..d41609c97cd 100644 --- a/src/backend/access/heap/README.HOT +++ b/src/backend/access/heap/README.HOT @@ -375,6 +375,11 @@ constraint on which updates can be HOT. Other transactions must include such an index when determining HOT-safety of updates, even though they must ignore it for both insertion and searching purposes. +Also, special auxiliary index is created the same way. It marked as +"ready for inserts" without any actual table scan. Its purpose is collect +new tuples inserted into table while our target index is still "not ready +for inserts" + We must do this to avoid making incorrect index entries. For example, suppose we are building an index on column X and we make an index entry for a non-HOT tuple with X=1. Then some other backend, unaware that X is an @@ -394,14 +399,14 @@ As above, we point the index entry at the root of the HOT-update chain but we use the key value from the live tuple. We mark the index open for inserts (but still not ready for reads) then -we again wait for transactions which have the table open. Then we take -a second reference snapshot and validate the index. This searches for -tuples missing from the index, and inserts any missing ones. Again, -the index entries have to have TIDs equal to HOT-chain root TIDs, but +we again wait for transactions which have the table open. Then validate +the index. This searches for tuples missing from the index in auxiliary +index, and inserts any missing ones if them visible to fresh snapshot. +Again, the index entries have to have TIDs equal to HOT-chain root TIDs, but the value to be inserted is the one from the live tuple. Then we wait until every transaction that could have a snapshot older than -the second reference snapshot is finished. This ensures that nobody is +the latest used snapshot is finished. This ensures that nobody is alive any longer who could need to see any tuples that might be missing from the index, as well as ensuring that no one can see any inconsistent rows in a broken HOT chain (the first condition is stronger than the diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index d2fa463298b..e974f979b55 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -1804,27 +1804,35 @@ heapam_index_validate_scan(Relation heapRelation, fetched; bool tuplesort_empty = false, auxtuplesort_empty = false; + instr_time snapshotTime, + currentTime; Assert(!HaveRegisteredOrActiveSnapshot()); Assert(!TransactionIdIsValid(MyProc->xmin)); +#define VALIDATE_INDEX_SNAPSHOT_RESET_INTERVAL 1000 /* - * Now take the "reference snapshot" that will be used by to filter candidate - * tuples. Beware! There might still be snapshots in - * use that treat some transaction as in-progress that our reference - * snapshot treats as committed. If such a recently-committed transaction - * deleted tuples in the table, we will not include them in the index; yet - * those transactions which see the deleting one as still-in-progress will - * expect such tuples to be there once we mark the index as valid. + * Now take the first snapshot that will be used by to filter candidate + * tuples. We are going to replace it by newer snapshot every so often + * to propagate horizon. + * + * Beware! There might still be snapshots in use that treat some transaction + * as in-progress that our temporary snapshot treats as committed. + * + * If such a recently-committed transaction deleted tuples in the table, + * we will not include them in the index; yet those transactions which + * see the deleting one as still-in-progress will expect such tuples to + * be there once we mark the index as valid. * * We solve this by waiting for all endangered transactions to exit before - * we mark the index as valid. + * we mark the index as valid, for that reason limitX is supported. * * We also set ActiveSnapshot to this snap, since functions in indexes may * need a snapshot. */ - snapshot = RegisterSnapshot(GetTransactionSnapshot()); + snapshot = RegisterSnapshot(GetLatestSnapshot()); PushActiveSnapshot(snapshot); + INSTR_TIME_SET_CURRENT(snapshotTime); limitXmin = snapshot->xmin; /* @@ -1865,6 +1873,23 @@ heapam_index_validate_scan(Relation heapRelation, bool ts_isnull; CHECK_FOR_INTERRUPTS(); + INSTR_TIME_SET_CURRENT(currentTime); + INSTR_TIME_SUBTRACT(currentTime, snapshotTime); + if (INSTR_TIME_GET_MILLISEC(currentTime) >= VALIDATE_INDEX_SNAPSHOT_RESET_INTERVAL) + { + PopActiveSnapshot(); + UnregisterSnapshot(snapshot); + /* to make sure we propagate xmin */ + InvalidateCatalogSnapshot(); + Assert(!TransactionIdIsValid(MyProc->xmin)); + + snapshot = RegisterSnapshot(GetLatestSnapshot()); + PushActiveSnapshot(snapshot); + /* xmin should not go backwards, but just for the case*/ + limitXmin = TransactionIdNewer(limitXmin, snapshot->xmin); + INSTR_TIME_SET_CURRENT(snapshotTime); + } + /* * Attempt to fetch the next TID from the auxiliary sort. If it's * empty, we set auxindexcursor to NULL. @@ -2007,7 +2032,7 @@ heapam_index_validate_scan(Relation heapRelation, heapam_index_fetch_end(fetch); /* - * Drop the reference snapshot. We must do this before waiting out other + * Drop the latest snapshot. We must do this before waiting out other * snapshot holders, else we will deadlock against other processes also * doing CREATE INDEX CONCURRENTLY, which would see our snapshot as one * they must wait for. diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 8b236c8ccd6..62e975016ad 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -444,7 +444,7 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, * dead tuples) won't get very full, so we give it only work_mem. * * In case of concurrent build dead tuples are not need to be put into index - * since we wait for all snapshots older than reference snapshot during the + * since we wait for all snapshots older than latest snapshot during the * validation phase. */ if (indexInfo->ii_Unique && !indexInfo->ii_Concurrent) diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index 894aefa19e1..6a6b1f8797b 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -190,14 +190,16 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer, * Add target TID to pending list if the redirection could have * happened since VACUUM started. (If xid is invalid, assume it * must have happened before VACUUM started, since REINDEX - * CONCURRENTLY locks out VACUUM.) + * CONCURRENTLY locks out VACUUM, if myXmin is invalid it is + * validation scan.) * * Note: we could make a tighter test by seeing if the xid is * "running" according to the active snapshot; but snapmgr.c * doesn't currently export a suitable API, and it's not entirely * clear that a tighter test is worth the cycles anyway. */ - if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin)) + if (!TransactionIdIsValid(bds->myXmin) || + TransactionIdFollowsOrEquals(dt->xid, bds->myXmin)) spgAddPendingTID(bds, &dt->pointer); } else @@ -811,7 +813,6 @@ spgvacuumscan(spgBulkDeleteState *bds) /* Finish setting up spgBulkDeleteState */ initSpGistState(&bds->spgstate, index); bds->pendingList = NULL; - bds->myXmin = GetActiveSnapshot()->xmin; bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO; /* @@ -925,6 +926,10 @@ spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, bds.stats = stats; bds.callback = callback; bds.callback_state = callback_state; + if (info->validate_index) + bds.myXmin = InvalidTransactionId; + else + bds.myXmin = GetActiveSnapshot()->xmin; spgvacuumscan(&bds); @@ -965,6 +970,7 @@ spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) bds.stats = stats; bds.callback = dummy_callback; bds.callback_state = NULL; + bds.myXmin = GetActiveSnapshot()->xmin; spgvacuumscan(&bds); } diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 0e06334f447..8aa6b0a2830 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3477,8 +3477,9 @@ IndexCheckExclusion(Relation heapRelation, * insert their new tuples into it. At the same moment we clear "indisready" for * auxiliary index, since it is no more required. * - * We then take a new reference snapshot, any tuples that are valid according - * to this snap, but are not in the index, must be added to the index. + * We then take a new snapshot, any tuples that are valid according + * to this snap, but are not in the index, must be added to the index. In + * order to propagate xmin we reset that snapshot every few so often. * (Any tuples committed live after the snap will be inserted into the * index by their originating transaction. Any tuples committed dead before * the snap need not be indexed, because we will wait out all transactions @@ -3491,7 +3492,7 @@ IndexCheckExclusion(Relation heapRelation, * TIDs of both auxiliary and target indexes, and doing a "merge join" against * the TID lists to see which tuples from auxiliary index are missing from the * target index. Thus we will ensure that all tuples valid according to the - * reference snapshot are in the index. Notice we need to do bulkdelete in the + * latest snapshot are in the index. Notice we need to do bulkdelete in the * particular order: auxiliary first, target last. * * Building a unique index this way is tricky: we might try to insert a @@ -3607,19 +3608,29 @@ validate_index(Oid heapId, Oid indexId, Oid auxIndexId) NULL, TUPLESORT_NONE); auxState.htups = auxState.itups = auxState.tups_inserted = 0; + /* tuplesort_begin_datum may require catalog snapshot */ + InvalidateCatalogSnapshot(); + Assert(!TransactionIdIsValid(MyProc->xmin)); + (void) index_bulk_delete(&auxivinfo, NULL, validate_index_callback, &auxState); + Assert(!TransactionIdIsValid(MyProc->xmin)); + state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator, InvalidOid, false, (int) main_work_mem_part, NULL, TUPLESORT_NONE); state.htups = state.itups = state.tups_inserted = 0; + Assert(!TransactionIdIsValid(MyProc->xmin)); + /* ambulkdelete updates progress metrics */ (void) index_bulk_delete(&ivinfo, NULL, validate_index_callback, &state); + Assert(!TransactionIdIsValid(MyProc->xmin)); + /* Execute the sort */ { const int progress_index[] = { @@ -3636,8 +3647,6 @@ validate_index(Oid heapId, Oid indexId, Oid auxIndexId) } tuplesort_performsort(state.tuplesort); tuplesort_performsort(auxState.tuplesort); - - InvalidateCatalogSnapshot(); Assert(!TransactionIdIsValid(MyProc->xmin)); /* diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index cd0d63ded82..e10f6098f58 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -4354,7 +4354,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein /* * The index is now valid in the sense that it contains all currently * interesting tuples. But since it might not contain tuples deleted - * just before the reference snap was taken, we have to wait out any + * just before the latest snap was taken, we have to wait out any * transactions that might have older snapshots. * * Because we don't take a snapshot or Xid in this transaction, diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 0cab8653f1b..3d8db998c0b 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -355,6 +355,21 @@ NormalTransactionIdOlder(TransactionId a, TransactionId b) return b; } +/* return the newer of the two IDs */ +static inline TransactionId +TransactionIdNewer(TransactionId a, TransactionId b) +{ + if (!TransactionIdIsValid(a)) + return b; + + if (!TransactionIdIsValid(b)) + return a; + + if (TransactionIdFollows(a, b)) + return a; + return b; +} + /* return the newer of the two IDs */ static inline FullTransactionId FullTransactionIdNewer(FullTransactionId a, FullTransactionId b) -- 2.43.0