From bf901dbe761f5a3fb2b10c480a66780f59af1913 Mon Sep 17 00:00:00 2001 From: Amit Langote Date: Wed, 4 Dec 2024 16:16:56 +0900 Subject: [PATCH v58 4/4] Defer locking of runtime-prunable relations in cached plans AcquireExecutorLocks() in plancache.c locks all relations in a plan's range table to ensure the plan is safe for execution. However, this approach also locks runtime-prunable relations that will later be pruned during "initial" runtime pruning, introducing unnecessary overhead. This commit defers locking for such relations and ensures that any invalidation caused by this deferral is handled by replanning when necessary. * Locking changes: The planner now tracks "unprunable" relations using the new PlannedStmt.unprunableRelids field, which is computed during set_plan_refs() by subtracting runtime-prunable relation RT indexes (identified from PartitionPruneInfos) from all RT indexes. AcquireExecutorLocks() locks only these unprunable relations. During executor startup, ExecDoInitialPruning() identifies unpruned partitions and acquires locks on them. A new es_unpruned_relids field is added to EState to ensure that subsequent initialization steps process only locked relations. It is initially populated with PlannedStmt.unprunableRelids and updated by ExecDoInitialPruning() with the RT indexes of the unpruned partitions. To populate es_unpruned_relids, PartitionedRelPruneInfo and PartitionedRelPruningData now include a leafpart_rti_map[] to map partition indexes (as determined by get_matching_partitions()) to their corresponding RT indexes. Executor code that works with child result relations and child RowMarks require adjustments because pruned relations are no longer locked, because without such adjustments, the executor could attempt to process result relations or RowMarks for pruned partitions. Specifically, ExecInitModifyTable() trims result relation-related lists resultRelations, withCheckOptionLists, returningLists, and updateColnosLists to include only unpruned partitions by checking es_pruned_relids. It also creates ResultRelInfo structs only for these unpruned partitions. Similarly, child RowMarks whose owning relations are pruned are now ignored, again by checking es_unpruned_relids, ensuring only those associated with unpruned relations are processed. Finally, ExecCheckPermissions() now includes an Assert to verify that all relations undergoing permission checks have been properly locked. This safeguard helps catch any cases where relations that should have been added to the unprunableRelids set were missed during planning. * Changed related to handling plan invalidation: Deferring locks introduces a window where prunable relations may be altered by concurrent DDL, invalidating the plan. To ensure correctness, a new ExecutorStartCachedPlan() function that wraps ExecutorStart() is added to detect and handle invalid plans caused by deferred locking. When invalidation occurs, ExecutorStartCachedPlan() updates all plans in the CachedPlan using the new UpdateCachedPlan() function and retries execution with the refreshed plan. UpdateCachedPlan() replaces stale plans in CachedPlan.stmt_list. To enable this, a new CachedPlan.stmt_context is introduced as a child context of CachedPlan.context. This separates PlannedStmts from the parent context, allowing UpdateCachedPlan() to free old PlannedStmts when replacing them with new plans, while preserving the CachedPlan structure, including the List containing the statements. * Testing Tests using the delay_execution module verify scenarios where a cached plan becomes invalid due to changes in prunable relations after deferred locks are taken. * Note to extension authors: ExecutorStart_hook implementations should verify plan validity after calling standard_ExecutorStart() to ensure they are not working with an invalid plan. The following check can be used: /* The plan may have become invalid during ExecutorStart() */ if (!ExecPlanStillValid(queryDesc->estate)) return; Additionally, any RT index inspected by an extension should be checked against EState.es_unpruned_relids before processing the relation, particularly if the relation could be a child relation subject to initial partition pruning. This is necessary because extensions can no longer assume that all range table relations are locked; only those in es_unpruned_relids are. For reference, see how InitPlan() processes entries from PlannedStmt.rowMarks. Reviewed-by: Robert Haas Reviewed-by: Tomas Vondra Discussion: https://postgr.es/m/CA+HiwqFGkMSge6TgC9KQzde0ohpAycLQuV7ooitEEpbKB0O_mg@mail.gmail.com --- contrib/auto_explain/auto_explain.c | 4 + .../pg_stat_statements/pg_stat_statements.c | 4 + src/backend/commands/copyto.c | 2 +- src/backend/commands/createas.c | 2 +- src/backend/commands/explain.c | 16 +- src/backend/commands/extension.c | 1 + src/backend/commands/matview.c | 2 +- src/backend/commands/portalcmds.c | 1 + src/backend/commands/prepare.c | 9 +- src/backend/commands/trigger.c | 14 + src/backend/executor/README | 35 ++- src/backend/executor/execMain.c | 124 +++++++- src/backend/executor/execParallel.c | 9 +- src/backend/executor/execPartition.c | 88 +++++- src/backend/executor/execUtils.c | 1 + src/backend/executor/functions.c | 1 + src/backend/executor/nodeAppend.c | 8 +- src/backend/executor/nodeLockRows.c | 9 +- src/backend/executor/nodeMergeAppend.c | 2 +- src/backend/executor/nodeModifyTable.c | 70 ++++- src/backend/executor/spi.c | 23 +- src/backend/optimizer/plan/planner.c | 2 + src/backend/optimizer/plan/setrefs.c | 29 +- src/backend/partitioning/partprune.c | 22 ++ src/backend/tcop/postgres.c | 4 +- src/backend/tcop/pquery.c | 39 ++- src/backend/utils/cache/plancache.c | 204 +++++++++++-- src/backend/utils/mmgr/portalmem.c | 4 +- src/include/commands/explain.h | 6 +- src/include/commands/trigger.h | 1 + src/include/executor/execPartition.h | 6 +- src/include/executor/execdesc.h | 2 + src/include/executor/executor.h | 28 ++ src/include/nodes/execnodes.h | 13 + src/include/nodes/pathnodes.h | 8 + src/include/nodes/plannodes.h | 7 + src/include/utils/plancache.h | 50 +++- src/include/utils/portal.h | 4 +- src/test/modules/delay_execution/Makefile | 3 +- .../modules/delay_execution/delay_execution.c | 63 +++- .../expected/cached-plan-inval.out | 282 ++++++++++++++++++ src/test/modules/delay_execution/meson.build | 1 + .../specs/cached-plan-inval.spec | 80 +++++ src/test/regress/expected/partition_prune.out | 44 +++ src/test/regress/sql/partition_prune.sql | 18 ++ 45 files changed, 1237 insertions(+), 108 deletions(-) create mode 100644 src/test/modules/delay_execution/expected/cached-plan-inval.out create mode 100644 src/test/modules/delay_execution/specs/cached-plan-inval.spec diff --git a/contrib/auto_explain/auto_explain.c b/contrib/auto_explain/auto_explain.c index 623a674f99..8b5eaf3ef3 100644 --- a/contrib/auto_explain/auto_explain.c +++ b/contrib/auto_explain/auto_explain.c @@ -298,6 +298,10 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags) else standard_ExecutorStart(queryDesc, eflags); + /* The plan may have become invalid during standard_ExecutorStart() */ + if (!ExecPlanStillValid(queryDesc->estate)) + return; + if (auto_explain_enabled()) { /* diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 49c657b3e0..b11691ae26 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -994,6 +994,10 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags) else standard_ExecutorStart(queryDesc, eflags); + /* The plan may have become invalid during standard_ExecutorStart() */ + if (!ExecPlanStillValid(queryDesc->estate)) + return; + /* * If query has queryId zero, don't track it. This prevents double * counting of optimizable statements that are directly contained in diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index f55e6d9675..27b6f6f069 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -556,7 +556,7 @@ BeginCopyTo(ParseState *pstate, ((DR_copy *) dest)->cstate = cstate; /* Create a QueryDesc requesting no output */ - cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + cstate->queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 5c92e48a56..0cc74dd45a 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -332,7 +332,7 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, UpdateActiveSnapshotCommandId(); /* Create a QueryDesc, redirecting output to our tuple receiver */ - queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index a3f1d53d7a..b5c734e75c 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -512,7 +512,8 @@ standard_ExplainOneQuery(Query *query, int cursorOptions, } /* run it (if needed) and produce output */ - ExplainOnePlan(plan, into, es, queryString, params, queryEnv, + ExplainOnePlan(plan, NULL, NULL, -1, into, es, queryString, params, + queryEnv, &planduration, (es->buffers ? &bufusage : NULL), es->memory ? &mem_counters : NULL); } @@ -634,7 +635,9 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, * to call it. */ void -ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan, + CachedPlanSource *plansource, int query_index, + IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv, const instr_time *planduration, const BufferUsage *bufusage, @@ -690,7 +693,7 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, dest = None_Receiver; /* Create a QueryDesc for the query */ - queryDesc = CreateQueryDesc(plannedstmt, queryString, + queryDesc = CreateQueryDesc(plannedstmt, cplan, queryString, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, instrument_option); @@ -704,8 +707,11 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, if (into) eflags |= GetIntoRelEFlags(into); - /* call ExecutorStart to prepare the plan for execution */ - ExecutorStart(queryDesc, eflags); + /* Prepare the plan for execution. */ + if (queryDesc->cplan) + ExecutorStartCachedPlan(queryDesc, eflags, plansource, query_index); + else + ExecutorStart(queryDesc, eflags); /* Execute the plan for statistics if asked for */ if (es->analyze) diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index af6bd8ff42..7d4a3c5b8d 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -907,6 +907,7 @@ execute_sql_string(const char *sql, const char *filename) QueryDesc *qdesc; qdesc = CreateQueryDesc(stmt, + NULL, sql, GetActiveSnapshot(), NULL, dest, NULL, NULL, 0); diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 010097873d..69be74b4bd 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -438,7 +438,7 @@ refresh_matview_datafill(DestReceiver *dest, Query *query, UpdateActiveSnapshotCommandId(); /* Create a QueryDesc, redirecting output to our tuple receiver */ - queryDesc = CreateQueryDesc(plan, queryString, + queryDesc = CreateQueryDesc(plan, NULL, queryString, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index ac52ca25e9..48cf0b84e5 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -117,6 +117,7 @@ PerformCursorOpen(ParseState *pstate, DeclareCursorStmt *cstmt, ParamListInfo pa queryString, CMDTAG_SELECT, /* cursor's query is always a SELECT */ list_make1(plan), + NULL, NULL); /*---------- diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index a93f970a29..45fd63d2b1 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -202,7 +202,8 @@ ExecuteQuery(ParseState *pstate, query_string, entry->plansource->commandTag, plan_list, - cplan); + cplan, + entry->plansource); /* * For CREATE TABLE ... AS EXECUTE, we must verify that the prepared @@ -582,6 +583,7 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, MemoryContextCounters mem_counters; MemoryContext planner_ctx = NULL; MemoryContext saved_ctx = NULL; + int query_index = 0; if (es->memory) { @@ -654,7 +656,8 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, PlannedStmt *pstmt = lfirst_node(PlannedStmt, p); if (pstmt->commandType != CMD_UTILITY) - ExplainOnePlan(pstmt, into, es, query_string, paramLI, pstate->p_queryEnv, + ExplainOnePlan(pstmt, cplan, entry->plansource, query_index, + into, es, query_string, paramLI, pstate->p_queryEnv, &planduration, (es->buffers ? &bufusage : NULL), es->memory ? &mem_counters : NULL); else @@ -665,6 +668,8 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, /* Separate plans with an appropriate separator */ if (lnext(plan_list, p) != NULL) ExplainSeparatePlans(es); + + query_index++; } if (estate) diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 09356e46d1..79572ec8f1 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -5123,6 +5123,20 @@ AfterTriggerEndQuery(EState *estate) afterTriggers.query_depth--; } +/* ---------- + * AfterTriggerAbortQuery() + * + * Called by ExecutorEnd() if the query execution was aborted due to the + * plan becoming invalid during initialization. + * ---------- + */ +void +AfterTriggerAbortQuery(void) +{ + /* Revert the actions of AfterTriggerBeginQuery(). */ + afterTriggers.query_depth--; +} + /* * AfterTriggerFreeQuery diff --git a/src/backend/executor/README b/src/backend/executor/README index 642d63be61..449c6068ae 100644 --- a/src/backend/executor/README +++ b/src/backend/executor/README @@ -280,6 +280,28 @@ are typically reset to empty once per tuple. Per-tuple contexts are usually associated with ExprContexts, and commonly each PlanState node has its own ExprContext to evaluate its qual and targetlist expressions in. +Relation Locking +---------------- + +Typically, when the executor initializes a plan tree for execution, it doesn't +lock non-index relations if the plan tree is freshly generated and not derived +from a CachedPlan. This is because such locks have already been established +during the query's parsing, rewriting, and planning phases. However, with a +cached plan tree, some relations may remain unlocked. The function +AcquireExecutorLocks() only locks unprunable relations in the plan, deferring +the locking of prunable ones to executor initialization. This avoids +unnecessary locking of relations that will be pruned during "initial" runtime +pruning in ExecDoInitialPruning(). + +This approach creates a window where a cached plan tree with child tables +could become outdated if another backend modifies these tables before +ExecDoInitialPruning() locks them. As a result, the executor has the added duty +to verify the plan tree's validity whenever it locks a child table after +doing initial pruning. This validation is done by checking the CachedPlan.is_valid +flag. If the plan tree is outdated (is_valid = false), the executor stops +further initialization, cleans up anything in EState that would have been +allocated up to that point, and retries execution after recreating the +invalid plan in the CachedPlan. Query Processing Control Flow ----------------------------- @@ -288,11 +310,13 @@ This is a sketch of control flow for full query processing: CreateQueryDesc - ExecutorStart + ExecutorStart or ExecutorStartCachedPlan CreateExecutorState creates per-query context - switch to per-query context to run ExecInitNode + switch to per-query context to run ExecDoInitialPruning and ExecInitNode AfterTriggerBeginQuery + ExecDoInitialPruning + does initial pruning and locks surviving partitions if needed ExecInitNode --- recursively scans plan tree ExecInitNode recurse into subsidiary nodes @@ -316,7 +340,12 @@ This is a sketch of control flow for full query processing: FreeQueryDesc -Per above comments, it's not really critical for ExecEndNode to free any +As mentioned in the "Relation Locking" section, if the plan tree is found to +be stale after locking partitions in ExecDoInitialPruning(), the control is +immediately returned to ExecutorStartCachedPlan(), which will create a new plan +tree and perform the steps starting from CreateExecutorState() again. + +Per above comments, it's not really critical for ExecEndPlan to free any memory; it'll all go away in FreeExecutorState anyway. However, we do need to be careful to close relations, drop buffer pins, etc, so we do need to scan the plan state tree to find these sorts of resources. diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 5dc46f2e95..9543d9490c 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -55,11 +55,13 @@ #include "parser/parse_relation.h" #include "pgstat.h" #include "rewrite/rewriteHandler.h" +#include "storage/lmgr.h" #include "tcop/utility.h" #include "utils/acl.h" #include "utils/backend_status.h" #include "utils/lsyscache.h" #include "utils/partcache.h" +#include "utils/plancache.h" #include "utils/rls.h" #include "utils/snapmgr.h" @@ -137,6 +139,62 @@ ExecutorStart(QueryDesc *queryDesc, int eflags) standard_ExecutorStart(queryDesc, eflags); } +/* + * ExecutorStartCachedPlan + * Start execution for a given query in the CachedPlanSource, replanning + * if the plan is invalidated due to deferred locks taken during the + * plan's initialization + * + * This function handles cases where the CachedPlan given in queryDesc->cplan + * might become invalid during the initialization of the plan given in + * queryDesc->plannedstmt, particularly when prunable relations in it are + * locked after performing initial pruning. If the locks invalidate the plan, + * the function calls UpdateCachedPlan() to replan all queries in the + * CachedPlan, and then retries initialization. + * + * The function repeats the process until ExecutorStart() successfully + * initializes the plan, that is without the CachedPlan becoming invalid. + */ +void +ExecutorStartCachedPlan(QueryDesc *queryDesc, int eflags, + CachedPlanSource *plansource, + int query_index) +{ + if (unlikely(queryDesc->cplan == NULL)) + elog(ERROR, "ExecutorStartCachedPlan(): missing CachedPlan"); + if (unlikely(plansource == NULL)) + elog(ERROR, "ExecutorStartCachedPlan(): missing CachedPlanSource"); + + /* + * Loop and retry with an updated plan until no further invalidation + * occurs. + */ + while (1) + { + ExecutorStart(queryDesc, eflags); + if (!CachedPlanValid(queryDesc->cplan)) + { + /* + * Clean up the current execution state before creating the new + * plan to retry ExecutorStart(). Mark execution as aborted to + * ensure that AFTER trigger state is properly reset. + */ + queryDesc->estate->es_aborted = true; + ExecutorEnd(queryDesc); + + /* Retry ExecutorStart() with an updated plan tree. */ + queryDesc->plannedstmt = UpdateCachedPlan(plansource, query_index, + queryDesc->queryEnv); + } + else + /* + * Exit the loop if the plan is initialized successfully and no + * sinval messages were received that invalidated the CachedPlan. + */ + break; + } +} + void standard_ExecutorStart(QueryDesc *queryDesc, int eflags) { @@ -320,6 +378,7 @@ standard_ExecutorRun(QueryDesc *queryDesc, estate = queryDesc->estate; Assert(estate != NULL); + Assert(!estate->es_aborted); Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); /* caller must ensure the query's snapshot is active */ @@ -426,8 +485,11 @@ standard_ExecutorFinish(QueryDesc *queryDesc) Assert(estate != NULL); Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); - /* This should be run once and only once per Executor instance */ - Assert(!estate->es_finished); + /* + * This should be run once and only once per Executor instance and never + * if the execution was aborted. + */ + Assert(!estate->es_finished && !estate->es_aborted); /* Switch into per-query memory context */ oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); @@ -490,11 +552,10 @@ standard_ExecutorEnd(QueryDesc *queryDesc) (PgStat_Counter) estate->es_parallel_workers_launched); /* - * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This - * Assert is needed because ExecutorFinish is new as of 9.1, and callers - * might forget to call it. + * Check that ExecutorFinish was called, unless in EXPLAIN-only mode or if + * execution was aborted. */ - Assert(estate->es_finished || + Assert(estate->es_finished || estate->es_aborted || (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); /* @@ -508,6 +569,14 @@ standard_ExecutorEnd(QueryDesc *queryDesc) UnregisterSnapshot(estate->es_snapshot); UnregisterSnapshot(estate->es_crosscheck_snapshot); + /* + * Reset AFTER trigger module if the query execution was aborted. + */ + if (estate->es_aborted && + !(estate->es_top_eflags & + (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY))) + AfterTriggerAbortQuery(); + /* * Must switch out of context before destroying it */ @@ -606,6 +675,21 @@ ExecCheckPermissions(List *rangeTable, List *rteperminfos, (rte->rtekind == RTE_SUBQUERY && rte->relkind == RELKIND_VIEW)); + /* + * Ensure that we have at least an AccessShareLock on relations + * whose permissions need to be checked. + * + * Skip this check in a parallel worker because locks won't be + * taken until ExecInitNode() performs plan initialization. + * + * XXX: ExecCheckPermissions() in a parallel worker may be + * redundant with the checks done in the leader process, so this + * should be reviewed to ensure it’s necessary. + */ + Assert(IsParallelWorker() || + CheckRelationOidLockedByMe(rte->relid, AccessShareLock, + true)); + (void) getRTEPermissionInfo(rteperminfos, rte); /* Many-to-one mapping not allowed */ Assert(!bms_is_member(rte->perminfoindex, indexset)); @@ -838,6 +922,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) { CmdType operation = queryDesc->operation; PlannedStmt *plannedstmt = queryDesc->plannedstmt; + CachedPlan *cachedplan = queryDesc->cplan; Plan *plan = plannedstmt->planTree; List *rangeTable = plannedstmt->rtable; EState *estate = queryDesc->estate; @@ -857,7 +942,9 @@ InitPlan(QueryDesc *queryDesc, int eflags) ExecInitRangeTable(estate, rangeTable, plannedstmt->permInfos); estate->es_plannedstmt = plannedstmt; + estate->es_cachedplan = cachedplan; estate->es_part_prune_infos = plannedstmt->partPruneInfos; + estate->es_unpruned_relids = bms_copy(plannedstmt->unprunableRelids); /* * Perform runtime "initial" pruning to identify which child subplans, @@ -867,9 +954,15 @@ InitPlan(QueryDesc *queryDesc, int eflags) * executed, are saved in es_part_prune_results. These results correspond * to each PartitionPruneInfo entry, and the es_part_prune_results list is * parallel to es_part_prune_infos. + * + * This will also add the RT indexes of surviving leaf partitions to + * es_unpruned_relids. */ ExecDoInitialPruning(estate); + if (!ExecPlanStillValid(estate)) + return; + /* * Next, build the ExecRowMark array from the PlanRowMark(s), if any. */ @@ -884,8 +977,13 @@ InitPlan(QueryDesc *queryDesc, int eflags) Relation relation; ExecRowMark *erm; - /* ignore "parent" rowmarks; they are irrelevant at runtime */ - if (rc->isParent) + /* + * Ignore "parent" rowmarks, because they are irrelevant at + * runtime. Also ignore the rowmarks belonging to child tables + * that have been pruned in ExecDoInitialPruning(). + */ + if (rc->isParent || + !bms_is_member(rc->rti, estate->es_unpruned_relids)) continue; /* get relation's OID (will produce InvalidOid if subquery) */ @@ -2857,6 +2955,9 @@ EvalPlanQualStart(EPQState *epqstate, Plan *planTree) * the snapshot, rangetable, and external Param info. They need their own * copies of local state, including a tuple table, es_param_exec_vals, * result-rel info, etc. + * + * es_cachedplan is not copied because EPQ plan execution does not acquire + * any new locks that could invalidate the CachedPlan. */ rcestate->es_direction = ForwardScanDirection; rcestate->es_snapshot = parentestate->es_snapshot; @@ -2928,6 +3029,13 @@ EvalPlanQualStart(EPQState *epqstate, Plan *planTree) } } + /* + * Copy es_unpruned_relids so that RowMarks of pruned relations are + * ignored in ExecInitLockRows() and ExecInitModifyTable() when + * initializing the plan trees below. + */ + rcestate->es_unpruned_relids = parentestate->es_unpruned_relids; + /* * Initialize private state information for each SubPlan. We must do this * before running ExecInitNode on the main query tree, since diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index b01a2fdfdd..0c2da25fab 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -1257,8 +1257,15 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver, paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMLISTINFO, false); paramLI = RestoreParamList(¶mspace); - /* Create a QueryDesc for the query. */ + /* + * Create a QueryDesc for the query. We pass NULL for cachedplan, because + * we don't have a pointer to the CachedPlan in the leader's process. It's + * fine because the only reason the executor needs to see it is to decide + * if it should take locks on certain relations, but parallel workers + * always take locks anyway. + */ return CreateQueryDesc(pstmt, + NULL, queryString, GetActiveSnapshot(), InvalidSnapshot, receiver, paramLI, NULL, instrument_options); diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 46dd1c77a3..93cdae6f89 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -26,6 +26,7 @@ #include "partitioning/partdesc.h" #include "partitioning/partprune.h" #include "rewrite/rewriteManip.h" +#include "storage/lmgr.h" #include "utils/acl.h" #include "utils/lsyscache.h" #include "utils/partcache.h" @@ -194,7 +195,8 @@ static void find_matching_subplans_recurse(PlanState *parent_plan, PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, - Bitmapset **validsubplans); + Bitmapset **validsubplans, + Bitmapset **validsubplan_rtis); /* @@ -1764,7 +1766,8 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) * ExecDoInitialPruning: * Perform runtime "initial" pruning, if necessary, to determine the set * of child subnodes that need to be initialized during ExecInitNode() for - * all plan nodes that contain a PartitionPruneInfo. + * all plan nodes that contain a PartitionPruneInfo. This also locks the + * leaf partitions whose subnodes will be initialized if needed. * * ExecInitPartitionExecPruning: * Updates the PartitionPruneState found at given part_prune_index in @@ -1785,11 +1788,13 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) *------------------------------------------------------------------------- */ + /* * ExecDoInitialPruning * Perform runtime "initial" pruning, if necessary, to determine the set * of child subnodes that need to be initialized during ExecInitNode() for - * plan nodes that support partition pruning. + * plan nodes that support partition pruning. This also locks the leaf + * partitions whose subnodes will be initialized if needed. * * This function iterates over each PartitionPruneInfo entry in * estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState @@ -1810,6 +1815,7 @@ void ExecDoInitialPruning(EState *estate) { ListCell *lc; + List *locked_relids = NIL; foreach(lc, estate->es_part_prune_infos) { @@ -1827,10 +1833,48 @@ ExecDoInitialPruning(EState *estate) * bitmapset or NULL as described in the header comment. */ if (prunestate->do_initial_prune) - validsubplans = ExecFindMatchingSubPlans(prunestate, true); + { + Bitmapset *validsubplan_rtis = NULL; + + validsubplans = ExecFindMatchingSubPlans(prunestate, true, + &validsubplan_rtis); + if (ExecShouldLockRelations(estate)) + { + int rtindex = -1; + + rtindex = -1; + while ((rtindex = bms_next_member(validsubplan_rtis, + rtindex)) >= 0) + { + RangeTblEntry *rte = exec_rt_fetch(rtindex, estate); + + Assert(rte->rtekind == RTE_RELATION && + rte->rellockmode != NoLock); + LockRelationOid(rte->relid, rte->rellockmode); + locked_relids = lappend_int(locked_relids, rtindex); + } + } + estate->es_unpruned_relids = bms_add_members(estate->es_unpruned_relids, + validsubplan_rtis); + } + estate->es_part_prune_results = lappend(estate->es_part_prune_results, validsubplans); } + + /* + * Release the useless locks if the plan won't be executed. This is the + * same as what CheckCachedPlan() in plancache.c does. + */ + if (!ExecPlanStillValid(estate)) + { + foreach(lc, locked_relids) + { + RangeTblEntry *rte = exec_rt_fetch(lfirst_int(lc), estate); + + UnlockRelationOid(rte->relid, rte->rellockmode); + } + } } /* @@ -2042,8 +2086,8 @@ CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo) * The set of partitions that exist now might not be the same that * existed when the plan was made. The normal case is that it is; * optimize for that case with a quick comparison, and just copy - * the subplan_map and make subpart_map point to the one in - * PruneInfo. + * the subplan_map and make subpart_map, leafpart_rti_map point to + * the ones in PruneInfo. * * For the case where they aren't identical, we could have more * partitions on either side; or even exactly the same number of @@ -2062,6 +2106,7 @@ CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo) sizeof(int) * partdesc->nparts) == 0) { pprune->subpart_map = pinfo->subpart_map; + pprune->leafpart_rti_map = pinfo->leafpart_rti_map; memcpy(pprune->subplan_map, pinfo->subplan_map, sizeof(int) * pinfo->nparts); } @@ -2082,6 +2127,7 @@ CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo) * mismatches. */ pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts); + pprune->leafpart_rti_map = palloc(sizeof(int) * partdesc->nparts); for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++) { @@ -2099,6 +2145,8 @@ CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo) pinfo->subplan_map[pd_idx]; pprune->subpart_map[pp_idx] = pinfo->subpart_map[pd_idx]; + pprune->leafpart_rti_map[pp_idx] = + pinfo->leafpart_rti_map[pd_idx]; pd_idx++; continue; } @@ -2136,6 +2184,7 @@ CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo) pprune->subpart_map[pp_idx] = -1; pprune->subplan_map[pp_idx] = -1; + pprune->leafpart_rti_map[pp_idx] = 0; } } @@ -2414,10 +2463,15 @@ PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate, * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This * differentiates the initial executor-time pruning step from later * runtime pruning. + * + * The caller must pass a non-NULL validsubplan_rtis during initial pruning + * to collect the RT indexes of leaf partitions whose subnodes will be + * executed. These RT indexes are later added to EState.es_unpruned_relids. */ Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, - bool initial_prune) + bool initial_prune, + Bitmapset **validsubplan_rtis) { Bitmapset *result = NULL; MemoryContext oldcontext; @@ -2429,6 +2483,7 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, * evaluated *and* there are steps in which to do so. */ Assert(initial_prune || prunestate->do_exec_prune); + Assert(validsubplan_rtis != NULL || !initial_prune); /* * Switch to a temp context to avoid leaking memory in the executor's @@ -2453,7 +2508,7 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, pprune = &prunedata->partrelprunedata[0]; find_matching_subplans_recurse(prunestate->parent_plan, prunedata, pprune, initial_prune, - &result); + &result, validsubplan_rtis); /* Expression eval may have used space in ExprContext too */ if (pprune->exec_context.initialized) @@ -2470,6 +2525,8 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + if (validsubplan_rtis) + *validsubplan_rtis = bms_copy(*validsubplan_rtis); MemoryContextReset(prunestate->prune_context); @@ -2480,14 +2537,17 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, * find_matching_subplans_recurse * Recursive worker function for ExecFindMatchingSubPlans * - * Adds valid (non-prunable) subplan IDs to *validsubplans + * Adds valid (non-prunable) subplan IDs to *validsubplans and the RT indexes + * of their corresponding leaf partitions to *validsubplan_rtis if + * it's non-NULL. */ static void find_matching_subplans_recurse(PlanState *parent_plan, PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, - Bitmapset **validsubplans) + Bitmapset **validsubplans, + Bitmapset **validsubplan_rtis) { Bitmapset *partset; int i; @@ -2530,8 +2590,13 @@ find_matching_subplans_recurse(PlanState *parent_plan, while ((i = bms_next_member(partset, i)) >= 0) { if (pprune->subplan_map[i] >= 0) + { *validsubplans = bms_add_member(*validsubplans, pprune->subplan_map[i]); + if (validsubplan_rtis) + *validsubplan_rtis = bms_add_member(*validsubplan_rtis, + pprune->leafpart_rti_map[i]); + } else { int partidx = pprune->subpart_map[i]; @@ -2540,7 +2605,8 @@ find_matching_subplans_recurse(PlanState *parent_plan, find_matching_subplans_recurse(parent_plan, prunedata, &prunedata->partrelprunedata[partidx], - initial_prune, validsubplans); + initial_prune, validsubplans, + validsubplan_rtis); else { /* diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index bc905a0cdc..b7c914d66c 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -147,6 +147,7 @@ CreateExecutorState(void) estate->es_top_eflags = 0; estate->es_instrument = 0; estate->es_finished = false; + estate->es_aborted = false; estate->es_exprcontexts = NIL; diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index 8d1fda2ddc..058c10b4d4 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -840,6 +840,7 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache) dest = None_Receiver; es->qd = CreateQueryDesc(es->stmt, + NULL, fcache->src, GetActiveSnapshot(), InvalidSnapshot, diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index b77ff84840..89e05b19d0 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -581,7 +581,7 @@ choose_next_subplan_locally(AppendState *node) else if (!node->as_valid_subplans_identified) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); node->as_valid_subplans_identified = true; } @@ -648,7 +648,7 @@ choose_next_subplan_for_leader(AppendState *node) if (!node->as_valid_subplans_identified) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); node->as_valid_subplans_identified = true; /* @@ -724,7 +724,7 @@ choose_next_subplan_for_worker(AppendState *node) else if (!node->as_valid_subplans_identified) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); node->as_valid_subplans_identified = true; mark_invalid_subplans_as_finished(node); @@ -877,7 +877,7 @@ ExecAppendAsyncBegin(AppendState *node) if (!node->as_valid_subplans_identified) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); node->as_valid_subplans_identified = true; classify_matching_subplans(node); diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index 41754ddfea..cfead7ded2 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -347,8 +347,13 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags) ExecRowMark *erm; ExecAuxRowMark *aerm; - /* ignore "parent" rowmarks; they are irrelevant at runtime */ - if (rc->isParent) + /* + * Ignore "parent" rowmarks, because they are irrelevant at + * runtime. Also ignore the rowmarks belonging to child tables + * that have been pruned in ExecDoInitialPruning(). + */ + if (rc->isParent || + !bms_is_member(rc->rti, estate->es_unpruned_relids)) continue; /* find ExecRowMark and build ExecAuxRowMark */ diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index e2032afcb7..0696dfe7eb 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -219,7 +219,7 @@ ExecMergeAppend(PlanState *pstate) */ if (node->ms_valid_subplans == NULL) node->ms_valid_subplans = - ExecFindMatchingSubPlans(node->ms_prune_state, false); + ExecFindMatchingSubPlans(node->ms_prune_state, false, NULL); /* * First time through: pull the first tuple from each valid subplan, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 1161520f76..7413a29eda 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -636,7 +636,7 @@ ExecInitUpdateProjection(ModifyTableState *mtstate, Assert(whichrel >= 0 && whichrel < mtstate->mt_nrels); } - updateColnos = (List *) list_nth(node->updateColnosLists, whichrel); + updateColnos = (List *) list_nth(mtstate->mt_updateColnosLists, whichrel); /* * For UPDATE, we use the old tuple to fill up missing values in the tuple @@ -4282,7 +4282,11 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ModifyTableState *mtstate; Plan *subplan = outerPlan(node); CmdType operation = node->operation; - int nrels = list_length(node->resultRelations); + int nrels; + List *resultRelations = NIL; + List *withCheckOptionLists = NIL; + List *returningLists = NIL; + List *updateColnosLists = NIL; ResultRelInfo *resultRelInfo; List *arowmarks; ListCell *l; @@ -4292,6 +4296,45 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + /* + * Only consider unpruned relations for initializing their ResultRelInfo + * struct and other fields such as withCheckOptions, etc. + */ + i = 0; + foreach(l, node->resultRelations) + { + Index rti = lfirst_int(l); + + if (bms_is_member(rti, estate->es_unpruned_relids)) + { + resultRelations = lappend_int(resultRelations, rti); + if (node->withCheckOptionLists) + { + List *withCheckOptions = list_nth_node(List, + node->withCheckOptionLists, + i); + + withCheckOptionLists = lappend(withCheckOptionLists, withCheckOptions); + } + if (node->returningLists) + { + List *returningList = list_nth_node(List, + node->returningLists, + i); + + returningLists = lappend(returningLists, returningList); + } + if (node->updateColnosLists) + { + List *updateColnosList = list_nth(node->updateColnosLists, i); + + updateColnosLists = lappend(updateColnosLists, updateColnosList); + } + } + i++; + } + nrels = list_length(resultRelations); + /* * create state structure */ @@ -4312,6 +4355,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->mt_merge_inserted = 0; mtstate->mt_merge_updated = 0; mtstate->mt_merge_deleted = 0; + mtstate->mt_updateColnosLists = updateColnosLists; /*---------- * Resolve the target relation. This is the same as: @@ -4329,6 +4373,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ if (node->rootRelation > 0) { + Assert(bms_is_member(node->rootRelation, estate->es_unpruned_relids)); mtstate->rootResultRelInfo = makeNode(ResultRelInfo); ExecInitResultRelation(estate, mtstate->rootResultRelInfo, node->rootRelation); @@ -4343,7 +4388,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* set up epqstate with dummy subplan data for the moment */ EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, - node->epqParam, node->resultRelations); + node->epqParam, resultRelations); mtstate->fireBSTriggers = true; /* @@ -4361,7 +4406,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ resultRelInfo = mtstate->resultRelInfo; i = 0; - foreach(l, node->resultRelations) + foreach(l, resultRelations) { Index resultRelation = lfirst_int(l); List *mergeActions = NIL; @@ -4505,7 +4550,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) * Initialize any WITH CHECK OPTION constraints if needed. */ resultRelInfo = mtstate->resultRelInfo; - foreach(l, node->withCheckOptionLists) + foreach(l, withCheckOptionLists) { List *wcoList = (List *) lfirst(l); List *wcoExprs = NIL; @@ -4528,7 +4573,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* * Initialize RETURNING projections if needed. */ - if (node->returningLists) + if (returningLists) { TupleTableSlot *slot; ExprContext *econtext; @@ -4537,7 +4582,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) * Initialize result tuple slot and assign its rowtype using the first * RETURNING list. We assume the rest will look the same. */ - mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists); + mtstate->ps.plan->targetlist = (List *) linitial(returningLists); /* Set up a slot for the output of the RETURNING projection(s) */ ExecInitResultTupleSlotTL(&mtstate->ps, &TTSOpsVirtual); @@ -4552,7 +4597,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) * Build a projection for each result rel. */ resultRelInfo = mtstate->resultRelInfo; - foreach(l, node->returningLists) + foreach(l, returningLists) { List *rlist = (List *) lfirst(l); @@ -4653,8 +4698,13 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ExecRowMark *erm; ExecAuxRowMark *aerm; - /* ignore "parent" rowmarks; they are irrelevant at runtime */ - if (rc->isParent) + /* + * Ignore "parent" rowmarks, because they are irrelevant at + * runtime. Also ignore the rowmarks belonging to child tables + * that have been pruned in ExecDoInitialPruning(). + */ + if (rc->isParent || + !bms_is_member(rc->rti, estate->es_unpruned_relids)) continue; /* Find ExecRowMark and build ExecAuxRowMark */ diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 2fb2e73604..a7f9824e4d 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -70,7 +70,8 @@ static int _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes, Datum *Values, const char *Nulls); -static int _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount); +static int _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount, + CachedPlanSource *plansource, int query_index); static void _SPI_error_callback(void *arg); @@ -1685,7 +1686,8 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, query_string, plansource->commandTag, stmt_list, - cplan); + cplan, + plansource); /* * Set up options for portal. Default SCROLL type is chosen the same way @@ -2500,6 +2502,7 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1); List *stmt_list; ListCell *lc2; + int query_index = 0; spicallbackarg.query = plansource->query_string; @@ -2690,14 +2693,16 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, snap = InvalidSnapshot; qdesc = CreateQueryDesc(stmt, + cplan, plansource->query_string, snap, crosscheck_snapshot, dest, options->params, _SPI_current->queryEnv, 0); - res = _SPI_pquery(qdesc, fire_triggers, - canSetTag ? options->tcount : 0); + + res = _SPI_pquery(qdesc, fire_triggers, canSetTag ? options->tcount : 0, + plansource, query_index); FreeQueryDesc(qdesc); } else @@ -2794,6 +2799,8 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, my_res = res; goto fail; } + + query_index++; } /* Done with this plan, so release refcount */ @@ -2871,7 +2878,8 @@ _SPI_convert_params(int nargs, Oid *argtypes, } static int -_SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount) +_SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount, + CachedPlanSource *plansource, int query_index) { int operation = queryDesc->operation; int eflags; @@ -2927,7 +2935,10 @@ _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount) else eflags = EXEC_FLAG_SKIP_TRIGGERS; - ExecutorStart(queryDesc, eflags); + if (queryDesc->cplan) + ExecutorStartCachedPlan(queryDesc, eflags, plansource, query_index); + else + ExecutorStart(queryDesc, eflags); ExecutorRun(queryDesc, ForwardScanDirection, tcount, true); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9c253e864a..5fe2eeb65c 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -559,6 +559,8 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->planTree = top_plan; result->partPruneInfos = glob->partPruneInfos; result->rtable = glob->finalrtable; + result->unprunableRelids = bms_difference(glob->allRelids, + glob->prunableRelids); result->permInfos = glob->finalrteperminfos; result->resultRelations = glob->resultRelations; result->appendRelations = glob->appendRelations; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 9f13243d54..053d2687f2 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -564,7 +564,8 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, List *rteperminfos, /* * If it's a plain relation RTE (or a subquery that was once a view - * reference), add the relation OID to relationOids. + * reference), add the relation OID to relationOids. Also add its new RT + * index to the set of relations that need to be locked for execution. * * We do this even though the RTE might be unreferenced in the plan tree; * this would correspond to cases such as views that were expanded, child @@ -576,7 +577,11 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, List *rteperminfos, */ if (newrte->rtekind == RTE_RELATION || (newrte->rtekind == RTE_SUBQUERY && OidIsValid(newrte->relid))) + { glob->relationOids = lappend_oid(glob->relationOids, newrte->relid); + glob->allRelids = bms_add_member(glob->allRelids, + list_length(glob->finalrtable)); + } /* * Add a copy of the RTEPermissionInfo, if any, corresponding to this RTE @@ -1740,6 +1745,11 @@ set_customscan_references(PlannerInfo *root, * * Also update the RT indexes present in PartitionedRelPruneInfos to add the * offset. + * + * Finally, if there are initial pruning steps, add the RT indexes of the + * leaf partitions to the set of relations that are prunable at execution + * startup time. This set indicates which relations should not be locked + * before executor startup, as they may be pruned during initial pruning. */ static int register_partpruneinfo(PlannerInfo *root, int part_prune_index, int rtoffset) @@ -1762,8 +1772,25 @@ register_partpruneinfo(PlannerInfo *root, int part_prune_index, int rtoffset) foreach(l2, prune_infos) { PartitionedRelPruneInfo *prelinfo = lfirst(l2); + int i; prelinfo->rtindex += rtoffset; + + for (i = 0; i < prelinfo->nparts; i++) + { + /* + * Non-leaf partitions and partitions that do not have a + * subplan are not included in this map as mentioned in + * make_partitionedrel_pruneinfo(). + */ + if (prelinfo->leafpart_rti_map[i]) + { + prelinfo->leafpart_rti_map[i] += rtoffset; + if (prelinfo->initial_pruning_steps) + glob->prunableRelids = bms_add_member(glob->prunableRelids, + prelinfo->leafpart_rti_map[i]); + } + } } } diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index ae1d69f96c..03e596c405 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -645,6 +645,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int *subplan_map; int *subpart_map; Oid *relid_map; + int *leafpart_rti_map; /* * Construct the subplan and subpart maps for this partitioning level. @@ -657,6 +658,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, subpart_map = (int *) palloc(nparts * sizeof(int)); memset(subpart_map, -1, nparts * sizeof(int)); relid_map = (Oid *) palloc0(nparts * sizeof(Oid)); + leafpart_rti_map = (int *) palloc0(nparts * sizeof(int)); present_parts = NULL; i = -1; @@ -671,9 +673,28 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, subplan_map[i] = subplanidx = relid_subplan_map[partrel->relid] - 1; subpart_map[i] = subpartidx = relid_subpart_map[partrel->relid] - 1; relid_map[i] = planner_rt_fetch(partrel->relid, root)->relid; + + /* + * Track the RT indexes of "leaf" partitions so they can be + * included in the PlannerGlobal.prunableRelids set, indicating + * relations whose locking is deferred until executor startup. + * + * We don’t defer locking of sub-partitioned partitions because + * setting up PartitionedRelPruningData currently occurs before + * initial pruning, so the relation must be locked at that stage, + * even if it may be pruned. + * + * Only leaf partitions with a valid subplan that are prunable + * using initial pruning are added to prunableRelids. So + * partitions without a subplan due to constraint exclusion will + * remain in PlannedStmt.unprunableRelids and thus their locking + * will not be deferred even if they may ultimately be pruned due + * to initial pruning. + */ if (subplanidx >= 0) { present_parts = bms_add_member(present_parts, i); + leafpart_rti_map[i] = (int) partrel->relid; /* Record finding this subplan */ subplansfound = bms_add_member(subplansfound, subplanidx); @@ -695,6 +716,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, pinfo->subplan_map = subplan_map; pinfo->subpart_map = subpart_map; pinfo->relid_map = relid_map; + pinfo->leafpart_rti_map = leafpart_rti_map; } pfree(relid_subpart_map); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 4b985bd056..48b0675070 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -1236,6 +1236,7 @@ exec_simple_query(const char *query_string) query_string, commandTag, plantree_list, + NULL, NULL); /* @@ -2038,7 +2039,8 @@ exec_bind_message(StringInfo input_message) query_string, psrc->commandTag, cplan->stmt_list, - cplan); + cplan, + psrc); /* Done with the snapshot used for parameter I/O and parsing/planning */ if (snapshot_set) diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 0c45fcf318..fe52db1369 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -19,6 +19,7 @@ #include "access/xact.h" #include "commands/prepare.h" +#include "executor/execdesc.h" #include "executor/tstoreReceiver.h" #include "miscadmin.h" #include "pg_trace.h" @@ -36,6 +37,9 @@ Portal ActivePortal = NULL; static void ProcessQuery(PlannedStmt *plan, + CachedPlan *cplan, + CachedPlanSource *plansource, + int query_index, const char *sourceText, ParamListInfo params, QueryEnvironment *queryEnv, @@ -65,6 +69,7 @@ static void DoPortalRewind(Portal portal); */ QueryDesc * CreateQueryDesc(PlannedStmt *plannedstmt, + CachedPlan *cplan, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, @@ -77,6 +82,7 @@ CreateQueryDesc(PlannedStmt *plannedstmt, qd->operation = plannedstmt->commandType; /* operation */ qd->plannedstmt = plannedstmt; /* plan */ + qd->cplan = cplan; /* CachedPlan supplying the plannedstmt */ qd->sourceText = sourceText; /* query text */ qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */ /* RI check snapshot */ @@ -122,6 +128,9 @@ FreeQueryDesc(QueryDesc *qdesc) * PORTAL_ONE_RETURNING, or PORTAL_ONE_MOD_WITH portal * * plan: the plan tree for the query + * cplan: CachedPlan supplying the plan + * plansource: CachedPlanSource supplying the cplan + * query_index: index of the query in plansource->query_list * sourceText: the source text of the query * params: any parameters needed * dest: where to send results @@ -134,6 +143,9 @@ FreeQueryDesc(QueryDesc *qdesc) */ static void ProcessQuery(PlannedStmt *plan, + CachedPlan *cplan, + CachedPlanSource *plansource, + int query_index, const char *sourceText, ParamListInfo params, QueryEnvironment *queryEnv, @@ -145,14 +157,17 @@ ProcessQuery(PlannedStmt *plan, /* * Create the QueryDesc object */ - queryDesc = CreateQueryDesc(plan, sourceText, + queryDesc = CreateQueryDesc(plan, cplan, sourceText, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0); /* - * Call ExecutorStart to prepare the plan for execution + * Prepare the plan for execution */ - ExecutorStart(queryDesc, 0); + if (queryDesc->cplan) + ExecutorStartCachedPlan(queryDesc, 0, plansource, query_index); + else + ExecutorStart(queryDesc, 0); /* * Run the plan to completion. @@ -493,6 +508,7 @@ PortalStart(Portal portal, ParamListInfo params, * the destination to DestNone. */ queryDesc = CreateQueryDesc(linitial_node(PlannedStmt, portal->stmts), + portal->cplan, portal->sourceText, GetActiveSnapshot(), InvalidSnapshot, @@ -512,9 +528,13 @@ PortalStart(Portal portal, ParamListInfo params, myeflags = eflags; /* - * Call ExecutorStart to prepare the plan for execution + * Prepare the plan for execution. */ - ExecutorStart(queryDesc, myeflags); + if (portal->cplan) + ExecutorStartCachedPlan(queryDesc, myeflags, + portal->plansource, 0); + else + ExecutorStart(queryDesc, myeflags); /* * This tells PortalCleanup to shut down the executor @@ -1194,6 +1214,7 @@ PortalRunMulti(Portal portal, { bool active_snapshot_set = false; ListCell *stmtlist_item; + int query_index = 0; /* * If the destination is DestRemoteExecute, change to DestNone. The @@ -1275,6 +1296,9 @@ PortalRunMulti(Portal portal, { /* statement can set tag string */ ProcessQuery(pstmt, + portal->cplan, + portal->plansource, + query_index, portal->sourceText, portal->portalParams, portal->queryEnv, @@ -1284,6 +1308,9 @@ PortalRunMulti(Portal portal, { /* stmt added by rewrite cannot set tag */ ProcessQuery(pstmt, + portal->cplan, + portal->plansource, + query_index, portal->sourceText, portal->portalParams, portal->queryEnv, @@ -1348,6 +1375,8 @@ PortalRunMulti(Portal portal, */ if (lnext(portal->stmts, stmtlist_item) != NULL) CommandCounterIncrement(); + + query_index++; } /* Pop the snapshot if we pushed one. */ diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index c66a088f40..8908a0cdc2 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -101,7 +101,8 @@ static dlist_head cached_expression_list = DLIST_STATIC_INIT(cached_expression_l static void ReleaseGenericPlan(CachedPlanSource *plansource); static List *RevalidateCachedQuery(CachedPlanSource *plansource, - QueryEnvironment *queryEnv); + QueryEnvironment *queryEnv, + bool release_generic); static bool CheckCachedPlan(CachedPlanSource *plansource); static CachedPlan *BuildCachedPlan(CachedPlanSource *plansource, List *qlist, ParamListInfo boundParams, QueryEnvironment *queryEnv); @@ -578,10 +579,17 @@ ReleaseGenericPlan(CachedPlanSource *plansource) * The result value is the transient analyzed-and-rewritten query tree if we * had to do re-analysis, and NIL otherwise. (This is returned just to save * a tree copying step in a subsequent BuildCachedPlan call.) + * + * This also releases and drops the generic plan (plansource->gplan), if any, + * as most callers will typically build a new CachedPlan for the plansource + * right after this. However, when called from UpdateCachedPlan(), the + * function does not release the generic plan, as UpdateCachedPlan() updates + * an existing CachedPlan in place. */ static List * RevalidateCachedQuery(CachedPlanSource *plansource, - QueryEnvironment *queryEnv) + QueryEnvironment *queryEnv, + bool release_generic) { bool snapshot_set; RawStmt *rawtree; @@ -678,8 +686,9 @@ RevalidateCachedQuery(CachedPlanSource *plansource, MemoryContextDelete(qcxt); } - /* Drop the generic plan reference if any */ - ReleaseGenericPlan(plansource); + /* Drop the generic plan reference, if any, and if requested */ + if (release_generic) + ReleaseGenericPlan(plansource); /* * Now re-do parse analysis and rewrite. This not incidentally acquires @@ -815,8 +824,11 @@ RevalidateCachedQuery(CachedPlanSource *plansource, * Caller must have already called RevalidateCachedQuery to verify that the * querytree is up to date. * - * On a "true" return, we have acquired the locks needed to run the plan. - * (We must do this for the "true" result to be race-condition-free.) + * On a "true" return, we have acquired locks on the "unprunableRelids" set + * for all plans in plansource->stmt_list. However, the plans are not fully + * race-condition-free until the executor acquires locks on the prunable + * relations that survive initial runtime pruning during executor + * initialization. */ static bool CheckCachedPlan(CachedPlanSource *plansource) @@ -870,7 +882,11 @@ CheckCachedPlan(CachedPlanSource *plansource) */ if (plan->is_valid) { - /* Successfully revalidated and locked the query. */ + /* + * Successfully revalidated and locked the query. Set is_reused + * to true so that CachedPlanRequiresLocking() returns true. + */ + plan->is_reused = true; return true; } @@ -895,12 +911,14 @@ CheckCachedPlan(CachedPlanSource *plansource) * To build a generic, parameter-value-independent plan, pass NULL for * boundParams. To build a custom plan, pass the actual parameter values via * boundParams. For best effect, the PARAM_FLAG_CONST flag should be set on - * each parameter value; otherwise the planner will treat the value as a - * hint rather than a hard constant. + * each parameter value; otherwise the planner will treat the value as a hint + * rather than a hard constant. * * Planning work is done in the caller's memory context. The finished plan * is in a child memory context, which typically should get reparented * (unless this is a one-shot plan, in which case we don't copy the plan). + * + * Note: When changing this, you should also look at UpdateCachedPlan(). */ static CachedPlan * BuildCachedPlan(CachedPlanSource *plansource, List *qlist, @@ -911,6 +929,7 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, bool snapshot_set; bool is_transient; MemoryContext plan_context; + MemoryContext stmt_context = NULL; MemoryContext oldcxt = CurrentMemoryContext; ListCell *lc; @@ -928,7 +947,7 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, * let's treat it as real and redo the RevalidateCachedQuery call. */ if (!plansource->is_valid) - qlist = RevalidateCachedQuery(plansource, queryEnv); + qlist = RevalidateCachedQuery(plansource, queryEnv, true); /* * If we don't already have a copy of the querytree list that can be @@ -967,10 +986,19 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, PopActiveSnapshot(); /* - * Normally we make a dedicated memory context for the CachedPlan and its - * subsidiary data. (It's probably not going to be large, but just in - * case, allow it to grow large. It's transient for the moment.) But for - * a one-shot plan, we just leave it in the caller's memory context. + * Normally, we create a dedicated memory context for the CachedPlan and + * its subsidiary data. Although it's usually not very large, the context + * is designed to allow growth if necessary. + * + * The PlannedStmts are stored in a separate child context (stmt_context) + * of the CachedPlan's memory context. This separation allows + * UpdateCachedPlan() to free and replace the PlannedStmts without + * affecting the CachedPlan structure or its stmt_list List. + * + * For one-shot plans, we instead use the caller's memory context, as the + * CachedPlan will not persist. stmt_context will be set to NULL in this + * case, because UpdateCachedPlan() should never get called on a one-shot + * plan. */ if (!plansource->is_oneshot) { @@ -979,12 +1007,17 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, ALLOCSET_START_SMALL_SIZES); MemoryContextCopyAndSetIdentifier(plan_context, plansource->query_string); - /* - * Copy plan into the new context. - */ - MemoryContextSwitchTo(plan_context); + stmt_context = AllocSetContextCreate(CurrentMemoryContext, + "CachedPlan PlannedStmts", + ALLOCSET_START_SMALL_SIZES); + MemoryContextCopyAndSetIdentifier(stmt_context, plansource->query_string); + MemoryContextSetParent(stmt_context, plan_context); + MemoryContextSwitchTo(stmt_context); plist = copyObject(plist); + + MemoryContextSwitchTo(plan_context); + plist = list_copy(plist); } else plan_context = CurrentMemoryContext; @@ -1025,8 +1058,10 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, plan->saved_xmin = InvalidTransactionId; plan->refcount = 0; plan->context = plan_context; + plan->stmt_context = stmt_context; plan->is_oneshot = plansource->is_oneshot; plan->is_saved = false; + plan->is_reused = false; plan->is_valid = true; /* assign generation number to new plan */ @@ -1153,8 +1188,11 @@ cached_plan_cost(CachedPlan *plan, bool include_planner) * plan or a custom plan for the given parameters: the caller does not know * which it will get. * - * On return, the plan is valid and we have sufficient locks to begin - * execution. + * On return, the plan is valid, but not all locks are acquired if the + * returned plan is a reused generic plan. In such cases, locks on relations + * subject to initial runtime pruning are not taken by CheckCachedPlan() but + * deferred until the execution startup phase, specifically when + * ExecDoInitialPruning() performs initial pruning. * * On return, the refcount of the plan has been incremented; a later * ReleaseCachedPlan() call is expected. If "owner" is not NULL then @@ -1180,7 +1218,7 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, elog(ERROR, "cannot apply ResourceOwner to non-saved cached plan"); /* Make sure the querytree list is valid and we have parse-time locks */ - qlist = RevalidateCachedQuery(plansource, queryEnv); + qlist = RevalidateCachedQuery(plansource, queryEnv, true); /* Decide whether to use a custom plan */ customplan = choose_custom_plan(plansource, boundParams); @@ -1276,6 +1314,113 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, return plan; } +/* + * UpdateCachedPlan + * Create fresh plans for all queries in the CachedPlanSource, replacing + * those in the generic plan's stmt_list, and return the plan for the + * query_index'th query. + * + * This function is primarily used by ExecutorStartCachedPlan() to handle + * cases where the original generic CachedPlan becomes invalid. Such + * invalidation may occur when prunable relations in the old plan for the + * query_index'th query are locked in preparation for execution. + * + * Note that invalidations received during the execution of the query_index'th + * query can affect both the queries that have already finished execution + * (e.g., due to concurrent modifications on prunable relations that were not + * locked during their execution) and also the queries that have not yet been + * executed. As a result, this function updates all plans to ensure + * CachedPlan.is_valid is safely set to true. + * + * The old PlannedStmts in plansource->gplan->stmt_list are freed here, so + * the caller and any of its callers must not rely on them remaining accessible + * after this function is called. + */ +PlannedStmt * +UpdateCachedPlan(CachedPlanSource *plansource, int query_index, + QueryEnvironment *queryEnv) +{ + List *query_list = plansource->query_list, + *plan_list; + ListCell *l1, + *l2; + CachedPlan *plan = plansource->gplan; + MemoryContext oldcxt; + + Assert(ActiveSnapshotSet()); + + /* Sanity checks */ + if (plan == NULL) + elog(ERROR, "UpdateCachedPlan() called in the wrong context: plansource->gplan is NULL"); + else if (plan->is_valid) + elog(ERROR, "UpdateCachedPlan() called in the wrong context: plansource->gplan->is_valid is true"); + else if (plan->is_oneshot) + elog(ERROR, "UpdateCachedPlan() called in the wrong context: plansource->gplan->is_oneshot is true"); + + /* + * The plansource might have become invalid since GetCachedPlan() returned + * the CachedPlan. See the comment in BuildCachedPlan() for details on why + * this might happen. Although invalidation is likely a false positive as + * stated there, we make the plan valid to ensure the query list used for + * planning is up to date. + * + * The risk of catching an invalidation is higher here than when + * BuildCachedPlan() is called from GetCachedPlan(), because this function + * is normally called long after GetCachedPlan() returns the CachedPlan, so + * much more processing could have occurred including things that mark + * the CachedPlanSource invalid. + * + * Note: Do not release plansource->gplan, because the upstream callers + * (such as the callers of ExecutorStartCachedPlan()) would still be + * referencing it. + */ + if (!plansource->is_valid) + query_list = RevalidateCachedQuery(plansource, queryEnv, false); + Assert(query_list != NIL); + + /* + * Build a new generic plan for all the queries after making a copy to be + * scribbled on by the planner. + */ + query_list = copyObject(query_list); + + /* + * Planning work is done in the caller's memory context. The resulting + * PlannedStmt is then copied into plan->stmt_context after throwing + * away the old ones. + */ + plan_list = pg_plan_queries(query_list, plansource->query_string, + plansource->cursor_options, NULL); + Assert(list_length(plan_list) == list_length(plan->stmt_list)); + + MemoryContextReset(plan->stmt_context); + oldcxt = MemoryContextSwitchTo(plan->stmt_context); + forboth (l1, plan_list, l2, plan->stmt_list) + { + PlannedStmt *plannedstmt = lfirst(l1); + + lfirst(l2) = copyObject(plannedstmt); + } + MemoryContextSwitchTo(oldcxt); + + /* + * XXX Should this also (re)set the properties of the CachedPlan that are + * set in BuildCachedPlan() after creating the fresh plans such as + * planRoleId, dependsOnRole, and save_xmin? + */ + + /* + * We've updated all the plans that might have been invalidated, so mark + * the CachedPlan as valid. + */ + plan->is_valid = true; + + /* Also update generic_cost because we just created a new generic plan. */ + plansource->generic_cost = cached_plan_cost(plan, false); + + return list_nth_node(PlannedStmt, plan->stmt_list, query_index); +} + /* * ReleaseCachedPlan: release active use of a cached plan. * @@ -1654,7 +1799,7 @@ CachedPlanGetTargetList(CachedPlanSource *plansource, return NIL; /* Make sure the querytree list is valid and we have parse-time locks */ - RevalidateCachedQuery(plansource, queryEnv); + RevalidateCachedQuery(plansource, queryEnv, true); /* Get the primary statement and find out what it returns */ pstmt = QueryListGetPrimaryStmt(plansource->query_list); @@ -1776,7 +1921,7 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) foreach(lc1, stmt_list) { PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1); - ListCell *lc2; + int rtindex; if (plannedstmt->commandType == CMD_UTILITY) { @@ -1794,13 +1939,16 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) continue; } - foreach(lc2, plannedstmt->rtable) + rtindex = -1; + while ((rtindex = bms_next_member(plannedstmt->unprunableRelids, + rtindex)) >= 0) { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2); + RangeTblEntry *rte = list_nth_node(RangeTblEntry, + plannedstmt->rtable, + rtindex - 1); - if (!(rte->rtekind == RTE_RELATION || - (rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid)))) - continue; + Assert(rte->rtekind == RTE_RELATION || + (rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid))); /* * Acquire the appropriate type of lock on each relation OID. Note diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index 93137820ac..ef4791bf65 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -284,7 +284,8 @@ PortalDefineQuery(Portal portal, const char *sourceText, CommandTag commandTag, List *stmts, - CachedPlan *cplan) + CachedPlan *cplan, + CachedPlanSource *plansource) { Assert(PortalIsValid(portal)); Assert(portal->status == PORTAL_NEW); @@ -299,6 +300,7 @@ PortalDefineQuery(Portal portal, portal->commandTag = commandTag; portal->stmts = stmts; portal->cplan = cplan; + portal->plansource = plansource; portal->status = PORTAL_DEFINED; } diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h index aa5872bc15..09c1b1367a 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -103,8 +103,10 @@ extern void ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, ParseState *pstate, ParamListInfo params); -extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, +extern void ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan, + CachedPlanSource *plansource, int plan_index, + IntoClause *into, ExplainState *es, + const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv, const instr_time *planduration, const BufferUsage *bufusage, diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index 8a5a9fe642..db21561c8c 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -258,6 +258,7 @@ extern void ExecASTruncateTriggers(EState *estate, extern void AfterTriggerBeginXact(void); extern void AfterTriggerBeginQuery(void); extern void AfterTriggerEndQuery(EState *estate); +extern void AfterTriggerAbortQuery(void); extern void AfterTriggerFireDeferred(void); extern void AfterTriggerEndXact(bool isCommit); extern void AfterTriggerBeginSubXact(void); diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 0b34784922..a0843481f7 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -49,6 +49,8 @@ extern void ExecCleanupTupleRouting(ModifyTableState *mtstate, * nparts Length of subplan_map[] and subpart_map[]. * subplan_map Subplan index by partition index, or -1. * subpart_map Subpart index by partition index, or -1. + * leafpart_rti_map RT index by partition index, or 0 if not a leaf + * partition. * present_parts A Bitmapset of the partition indexes that we * have subplans or subparts for. * initial_pruning_steps List of PartitionPruneSteps used to @@ -69,6 +71,7 @@ typedef struct PartitionedRelPruningData int nparts; int *subplan_map; int *subpart_map; + int *leafpart_rti_map; Bitmapset *present_parts; List *initial_pruning_steps; List *exec_pruning_steps; @@ -140,6 +143,7 @@ extern PartitionPruneState *ExecInitPartitionExecPruning(PlanState *planstate, Bitmapset *relids, Bitmapset **initially_valid_subplans); extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate, - bool initial_prune); + bool initial_prune, + Bitmapset **validsubplan_rtis); #endif /* EXECPARTITION_H */ diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index 0a7274e26c..0e7245435d 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -35,6 +35,7 @@ typedef struct QueryDesc /* These fields are provided by CreateQueryDesc */ CmdType operation; /* CMD_SELECT, CMD_UPDATE, etc. */ PlannedStmt *plannedstmt; /* planner's output (could be utility, too) */ + CachedPlan *cplan; /* CachedPlan that supplies the plannedstmt */ const char *sourceText; /* source text of the query */ Snapshot snapshot; /* snapshot to use for query */ Snapshot crosscheck_snapshot; /* crosscheck for RI update/delete */ @@ -57,6 +58,7 @@ typedef struct QueryDesc /* in pquery.c */ extern QueryDesc *CreateQueryDesc(PlannedStmt *plannedstmt, + CachedPlan *cplan, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 69c3ebff00..6d72f7d9d6 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -19,6 +19,7 @@ #include "nodes/lockoptions.h" #include "nodes/parsenodes.h" #include "utils/memutils.h" +#include "utils/plancache.h" /* @@ -198,6 +199,9 @@ ExecGetJunkAttribute(TupleTableSlot *slot, AttrNumber attno, bool *isNull) * prototypes from functions in execMain.c */ extern void ExecutorStart(QueryDesc *queryDesc, int eflags); +extern void ExecutorStartCachedPlan(QueryDesc *queryDesc, int eflags, + CachedPlanSource *plansource, + int query_index); extern void standard_ExecutorStart(QueryDesc *queryDesc, int eflags); extern void ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once); @@ -261,6 +265,30 @@ extern void ExecEndNode(PlanState *node); extern void ExecShutdownNode(PlanState *node); extern void ExecSetTupleBound(int64 tuples_needed, PlanState *child_node); +/* + * Is the CachedPlan in es_cachedplan still valid? + * + * Called from InitPlan() because invalidation messages that affect the plan + * might be received after locks have been taken on runtime-prunable relations. + * The caller should take appropriate action if the plan has become invalid. + */ +static inline bool +ExecPlanStillValid(EState *estate) +{ + return estate->es_cachedplan == NULL ? true : + CachedPlanValid(estate->es_cachedplan); +} + +/* + * Locks are needed only if running a cached plan that might contain unlocked + * relations, such as a reused generic plan. + */ +static inline bool +ExecShouldLockRelations(EState *estate) +{ + return estate->es_cachedplan == NULL ? false : + CachedPlanRequiresLocking(estate->es_cachedplan); +} /* ---------------------------------------------------------------- * ExecProcNode diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index f93061c7bf..9643a9d626 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -42,6 +42,7 @@ #include "storage/condition_variable.h" #include "utils/hsearch.h" #include "utils/queryenvironment.h" +#include "utils/plancache.h" #include "utils/reltrigger.h" #include "utils/sharedtuplestore.h" #include "utils/snapshot.h" @@ -639,9 +640,14 @@ typedef struct EState * ExecRowMarks, or NULL if none */ List *es_rteperminfos; /* List of RTEPermissionInfo */ PlannedStmt *es_plannedstmt; /* link to top of plan tree */ + CachedPlan *es_cachedplan; /* CachedPlan providing the plan tree */ List *es_part_prune_infos; /* List of PartitionPruneInfo */ List *es_part_prune_states; /* List of PartitionPruneState */ List *es_part_prune_results; /* List of Bitmapset */ + Bitmapset *es_unpruned_relids; /* PlannedStmt.unprunableRelids + RT + * indexes of leaf partitions that + * survive initial pruning; see + * ExecDoInitialPruning() */ const char *es_sourceText; /* Source text from QueryDesc */ JunkFilter *es_junkFilter; /* top-level junk filter, if any */ @@ -687,6 +693,7 @@ typedef struct EState int es_top_eflags; /* eflags passed to ExecutorStart */ int es_instrument; /* OR of InstrumentOption flags */ bool es_finished; /* true when ExecutorFinish is done */ + bool es_aborted; /* true when execution was aborted */ List *es_exprcontexts; /* List of ExprContexts within EState */ @@ -1426,6 +1433,12 @@ typedef struct ModifyTableState double mt_merge_inserted; double mt_merge_updated; double mt_merge_deleted; + + /* + * List of valid updateColnosLists. Contains only those belonging to + * unpruned relations from ModifyTable.updateColnosLists. + */ + List *mt_updateColnosLists; } ModifyTableState; /* ---------------- diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index f8a4cd42c6..ef6156f30b 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -116,6 +116,14 @@ typedef struct PlannerGlobal /* "flat" rangetable for executor */ List *finalrtable; + /* + * RT indexes of all relation RTEs in finalrtable (RTE_RELATION and + * RTE_SUBQUERY RTEs of views) and of those that are subject to runtime + * pruning at plan initialization time ("initial" pruning). + */ + Bitmapset *allRelids; + Bitmapset *prunableRelids; + /* "flat" list of RTEPermissionInfos */ List *finalrteperminfos; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index ef89927471..59699a1f86 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -74,6 +74,10 @@ typedef struct PlannedStmt List *rtable; /* list of RangeTblEntry nodes */ + Bitmapset *unprunableRelids; /* RT indexes of relations that are not + * subject to runtime pruning; set for + * AcquireExecutorLocks(). */ + List *permInfos; /* list of RTEPermissionInfo nodes for rtable * entries needing one */ @@ -1476,6 +1480,9 @@ typedef struct PartitionedRelPruneInfo /* subpart index by partition index, or -1 */ int *subpart_map pg_node_attr(array_size(nparts)); + /* RT index by partition index, or 0 if not a leaf partition */ + int *leafpart_rti_map pg_node_attr(array_size(nparts)); + /* relation OID by partition index, or 0 */ Oid *relid_map pg_node_attr(array_size(nparts)); diff --git a/src/include/utils/plancache.h b/src/include/utils/plancache.h index a90dfdf906..72862f5e85 100644 --- a/src/include/utils/plancache.h +++ b/src/include/utils/plancache.h @@ -18,6 +18,8 @@ #include "access/tupdesc.h" #include "lib/ilist.h" #include "nodes/params.h" +#include "nodes/parsenodes.h" +#include "nodes/plannodes.h" #include "tcop/cmdtag.h" #include "utils/queryenvironment.h" #include "utils/resowner.h" @@ -139,10 +141,11 @@ typedef struct CachedPlanSource * The reference count includes both the link from the parent CachedPlanSource * (if any), and any active plan executions, so the plan can be discarded * exactly when refcount goes to zero. Both the struct itself and the - * subsidiary data live in the context denoted by the context field. - * This makes it easy to free a no-longer-needed cached plan. (However, - * if is_oneshot is true, the context does not belong solely to the CachedPlan - * so no freeing is possible.) + * subsidiary data, except the PlannedStmts in stmt_list live in the context + * denoted by the context field; the PlannedStmts live in the context denoted + * by stmt_context. Separate contexts makes it easy to free a no-longer-needed + * cached plan. (However, if is_oneshot is true, the context does not belong + * solely to the CachedPlan so no freeing is possible.) */ typedef struct CachedPlan { @@ -150,6 +153,7 @@ typedef struct CachedPlan List *stmt_list; /* list of PlannedStmts */ bool is_oneshot; /* is it a "oneshot" plan? */ bool is_saved; /* is CachedPlan in a long-lived context? */ + bool is_reused; /* is it a reused generic plan? */ bool is_valid; /* is the stmt_list currently valid? */ Oid planRoleId; /* Role ID the plan was created for */ bool dependsOnRole; /* is plan specific to that role? */ @@ -158,6 +162,10 @@ typedef struct CachedPlan int generation; /* parent's generation number for this plan */ int refcount; /* count of live references to this struct */ MemoryContext context; /* context containing this CachedPlan */ + MemoryContext stmt_context; /* context containing the PlannedStmts in + * stmt_list, but not the List itself which + * is in the above context; NULL if is_oneshot + * is true. */ } CachedPlan; /* @@ -223,6 +231,10 @@ extern CachedPlan *GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, ResourceOwner owner, QueryEnvironment *queryEnv); +extern PlannedStmt *UpdateCachedPlan(CachedPlanSource *plansource, + int query_index, + QueryEnvironment *queryEnv); + extern void ReleaseCachedPlan(CachedPlan *plan, ResourceOwner owner); extern bool CachedPlanAllowsSimpleValidityCheck(CachedPlanSource *plansource, @@ -235,4 +247,34 @@ extern bool CachedPlanIsSimplyValid(CachedPlanSource *plansource, extern CachedExpression *GetCachedExpression(Node *expr); extern void FreeCachedExpression(CachedExpression *cexpr); +/* + * CachedPlanRequiresLocking: should the executor acquire additional locks? + * + * If the plan is a saved generic plan, the executor must acquire locks for + * relations that are not covered by AcquireExecutorLocks(), such as partitions + * that are subject to initial runtime pruning. + * + * Note: These locks are unnecessary if the plan is executed immediately after + * its creation, since the planner would have already acquired them. However, + * we do not optimize for that case. + */ +static inline bool +CachedPlanRequiresLocking(CachedPlan *cplan) +{ + return !cplan->is_oneshot && cplan->is_reused; +} + +/* + * CachedPlanValid + * Returns whether a cached generic plan is still valid. + * + * Invoked by the executor to check if the plan has not been invalidated after + * taking locks during the initialization of the plan. + */ +static inline bool +CachedPlanValid(CachedPlan *cplan) +{ + return cplan->is_valid; +} + #endif /* PLANCACHE_H */ diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h index 29f49829f2..58c3828d2c 100644 --- a/src/include/utils/portal.h +++ b/src/include/utils/portal.h @@ -138,6 +138,7 @@ typedef struct PortalData QueryCompletion qc; /* command completion data for executed query */ List *stmts; /* list of PlannedStmts */ CachedPlan *cplan; /* CachedPlan, if stmts are from one */ + CachedPlanSource *plansource; /* CachedPlanSource, for cplan */ ParamListInfo portalParams; /* params to pass to query */ QueryEnvironment *queryEnv; /* environment for query */ @@ -241,7 +242,8 @@ extern void PortalDefineQuery(Portal portal, const char *sourceText, CommandTag commandTag, List *stmts, - CachedPlan *cplan); + CachedPlan *cplan, + CachedPlanSource *plansource); extern PlannedStmt *PortalGetPrimaryStmt(Portal portal); extern void PortalCreateHoldStore(Portal portal); extern void PortalHashTableDeleteAll(void); diff --git a/src/test/modules/delay_execution/Makefile b/src/test/modules/delay_execution/Makefile index 70f24e846d..3eeb097fde 100644 --- a/src/test/modules/delay_execution/Makefile +++ b/src/test/modules/delay_execution/Makefile @@ -8,7 +8,8 @@ OBJS = \ delay_execution.o ISOLATION = partition-addition \ - partition-removal-1 + partition-removal-1 \ + cached-plan-inval ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/src/test/modules/delay_execution/delay_execution.c b/src/test/modules/delay_execution/delay_execution.c index fa4693a3f5..44aa828fdf 100644 --- a/src/test/modules/delay_execution/delay_execution.c +++ b/src/test/modules/delay_execution/delay_execution.c @@ -1,14 +1,18 @@ /*------------------------------------------------------------------------- * * delay_execution.c - * Test module to allow delay between parsing and execution of a query. + * Test module to introduce delay at various points during execution of a + * query to test that execution proceeds safely in light of concurrent + * changes. * * The delay is implemented by taking and immediately releasing a specified * advisory lock. If another process has previously taken that lock, the * current process will be blocked until the lock is released; otherwise, * there's no effect. This allows an isolationtester script to reliably - * test behaviors where some specified action happens in another backend - * between parsing and execution of any desired query. + * test behaviors where some specified action happens in another backend in + * a couple of cases: 1) between parsing and execution of any desired query + * when using the planner_hook, 2) between RevalidateCachedQuery() and + * ExecutorStart() when using the ExecutorStart_hook. * * Copyright (c) 2020-2024, PostgreSQL Global Development Group * @@ -22,6 +26,7 @@ #include +#include "executor/executor.h" #include "optimizer/planner.h" #include "utils/fmgrprotos.h" #include "utils/guc.h" @@ -32,9 +37,11 @@ PG_MODULE_MAGIC; /* GUC: advisory lock ID to use. Zero disables the feature. */ static int post_planning_lock_id = 0; +static int executor_start_lock_id = 0; -/* Save previous planner hook user to be a good citizen */ +/* Save previous hook users to be a good citizen */ static planner_hook_type prev_planner_hook = NULL; +static ExecutorStart_hook_type prev_ExecutorStart_hook = NULL; /* planner_hook function to provide the desired delay */ @@ -70,11 +77,41 @@ delay_execution_planner(Query *parse, const char *query_string, return result; } +/* ExecutorStart_hook function to provide the desired delay */ +static void +delay_execution_ExecutorStart(QueryDesc *queryDesc, int eflags) +{ + /* If enabled, delay by taking and releasing the specified lock */ + if (executor_start_lock_id != 0) + { + DirectFunctionCall1(pg_advisory_lock_int8, + Int64GetDatum((int64) executor_start_lock_id)); + DirectFunctionCall1(pg_advisory_unlock_int8, + Int64GetDatum((int64) executor_start_lock_id)); + + /* + * Ensure that we notice any pending invalidations, since the advisory + * lock functions don't do this. + */ + AcceptInvalidationMessages(); + } + + /* Now start the executor, possibly via a previous hook user */ + if (prev_ExecutorStart_hook) + prev_ExecutorStart_hook(queryDesc, eflags); + else + standard_ExecutorStart(queryDesc, eflags); + + if (executor_start_lock_id != 0) + elog(NOTICE, "Finished ExecutorStart(): CachedPlan is %s", + CachedPlanValid(queryDesc->cplan) ? "valid" : "not valid"); +} + /* Module load function */ void _PG_init(void) { - /* Set up the GUC to control which lock is used */ + /* Set up GUCs to control which lock is used */ DefineCustomIntVariable("delay_execution.post_planning_lock_id", "Sets the advisory lock ID to be locked/unlocked after planning.", "Zero disables the delay.", @@ -86,10 +123,22 @@ _PG_init(void) NULL, NULL, NULL); - + DefineCustomIntVariable("delay_execution.executor_start_lock_id", + "Sets the advisory lock ID to be locked/unlocked before starting execution.", + "Zero disables the delay.", + &executor_start_lock_id, + 0, + 0, INT_MAX, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); MarkGUCPrefixReserved("delay_execution"); - /* Install our hook */ + /* Install our hooks. */ prev_planner_hook = planner_hook; planner_hook = delay_execution_planner; + prev_ExecutorStart_hook = ExecutorStart_hook; + ExecutorStart_hook = delay_execution_ExecutorStart; } diff --git a/src/test/modules/delay_execution/expected/cached-plan-inval.out b/src/test/modules/delay_execution/expected/cached-plan-inval.out new file mode 100644 index 0000000000..5bfb2b33b3 --- /dev/null +++ b/src/test/modules/delay_execution/expected/cached-plan-inval.out @@ -0,0 +1,282 @@ +Parsed test spec with 2 sessions + +starting permutation: s1prep s2lock s1exec s2dropi s2unlock +step s1prep: SET plan_cache_mode = force_generic_plan; + PREPARE q AS SELECT * FROM foov WHERE a = $1 FOR UPDATE; + EXPLAIN (COSTS OFF) EXECUTE q (1); +QUERY PLAN +------------------------------------------------ +LockRows + -> Append + Subplans Removed: 2 + -> Bitmap Heap Scan on foo12_1 foo_1 + Recheck Cond: (a = $1) + -> Bitmap Index Scan on foo12_1_a + Index Cond: (a = $1) +(7 rows) + +step s2lock: SELECT pg_advisory_lock(12345); +pg_advisory_lock +---------------- + +(1 row) + +step s1exec: LOAD 'delay_execution'; + SET delay_execution.executor_start_lock_id = 12345; + EXPLAIN (COSTS OFF) EXECUTE q (1); +step s2dropi: DROP INDEX foo12_1_a; +step s2unlock: SELECT pg_advisory_unlock(12345); +pg_advisory_unlock +------------------ +t +(1 row) + +step s1exec: <... completed> +s1: NOTICE: Finished ExecutorStart(): CachedPlan is not valid +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +QUERY PLAN +------------------------------------- +LockRows + -> Append + Subplans Removed: 2 + -> Seq Scan on foo12_1 foo_1 + Filter: (a = $1) +(5 rows) + + +starting permutation: s1prep2 s2lock s1exec2 s2dropi s2unlock +step s1prep2: SET plan_cache_mode = force_generic_plan; + PREPARE q2 AS SELECT * FROM foov WHERE a = one() or a = two(); + EXPLAIN (COSTS OFF) EXECUTE q2; +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +QUERY PLAN +-------------------------------------------------- +Append + Subplans Removed: 1 + -> Bitmap Heap Scan on foo12_1 foo_1 + Recheck Cond: ((a = one()) OR (a = two())) + -> BitmapOr + -> Bitmap Index Scan on foo12_1_a + Index Cond: (a = one()) + -> Bitmap Index Scan on foo12_1_a + Index Cond: (a = two()) + -> Seq Scan on foo12_2 foo_2 + Filter: ((a = one()) OR (a = two())) +(11 rows) + +step s2lock: SELECT pg_advisory_lock(12345); +pg_advisory_lock +---------------- + +(1 row) + +step s1exec2: LOAD 'delay_execution'; + SET delay_execution.executor_start_lock_id = 12345; + EXPLAIN (COSTS OFF) EXECUTE q2; +step s2dropi: DROP INDEX foo12_1_a; +step s2unlock: SELECT pg_advisory_unlock(12345); +pg_advisory_unlock +------------------ +t +(1 row) + +step s1exec2: <... completed> +s1: NOTICE: Finished ExecutorStart(): CachedPlan is not valid +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +QUERY PLAN +-------------------------------------------- +Append + Subplans Removed: 1 + -> Seq Scan on foo12_1 foo_1 + Filter: ((a = one()) OR (a = two())) + -> Seq Scan on foo12_2 foo_2 + Filter: ((a = one()) OR (a = two())) +(6 rows) + + +starting permutation: s1prep3 s2lock s1exec3 s2dropi s2unlock +step s1prep3: SET plan_cache_mode = force_generic_plan; + PREPARE q3 AS UPDATE foov SET a = a WHERE a = one() or a = two(); + EXPLAIN (COSTS OFF) EXECUTE q3; +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +QUERY PLAN +-------------------------------------------------------------- +Nested Loop + -> Append + Subplans Removed: 1 + -> Bitmap Heap Scan on foo12_1 foo_1 + Recheck Cond: ((a = one()) OR (a = two())) + -> BitmapOr + -> Bitmap Index Scan on foo12_1_a + Index Cond: (a = one()) + -> Bitmap Index Scan on foo12_1_a + Index Cond: (a = two()) + -> Seq Scan on foo12_2 foo_2 + Filter: ((a = one()) OR (a = two())) + -> Materialize + -> Append + Subplans Removed: 1 + -> Bitmap Heap Scan on bar1 bar_1 + Recheck Cond: (a = one()) + -> Bitmap Index Scan on bar1_a_idx + Index Cond: (a = one()) + +Update on bar + Update on bar1 bar_1 + -> Nested Loop + -> Append + Subplans Removed: 1 + -> Bitmap Heap Scan on foo12_1 foo_1 + Recheck Cond: ((a = one()) OR (a = two())) + -> BitmapOr + -> Bitmap Index Scan on foo12_1_a + Index Cond: (a = one()) + -> Bitmap Index Scan on foo12_1_a + Index Cond: (a = two()) + -> Seq Scan on foo12_2 foo_2 + Filter: ((a = one()) OR (a = two())) + -> Materialize + -> Append + Subplans Removed: 1 + -> Bitmap Heap Scan on bar1 bar_1 + Recheck Cond: (a = one()) + -> Bitmap Index Scan on bar1_a_idx + Index Cond: (a = one()) + +Update on foo + Update on foo12_1 foo_1 + Update on foo12_2 foo_2 + -> Append + Subplans Removed: 1 + -> Bitmap Heap Scan on foo12_1 foo_1 + Recheck Cond: ((a = one()) OR (a = two())) + -> BitmapOr + -> Bitmap Index Scan on foo12_1_a + Index Cond: (a = one()) + -> Bitmap Index Scan on foo12_1_a + Index Cond: (a = two()) + -> Seq Scan on foo12_2 foo_2 + Filter: ((a = one()) OR (a = two())) +(56 rows) + +step s2lock: SELECT pg_advisory_lock(12345); +pg_advisory_lock +---------------- + +(1 row) + +step s1exec3: LOAD 'delay_execution'; + SET delay_execution.executor_start_lock_id = 12345; + EXPLAIN (COSTS OFF) EXECUTE q3; +step s2dropi: DROP INDEX foo12_1_a; +step s2unlock: SELECT pg_advisory_unlock(12345); +pg_advisory_unlock +------------------ +t +(1 row) + +step s1exec3: <... completed> +s1: NOTICE: Finished ExecutorStart(): CachedPlan is not valid +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +QUERY PLAN +------------------------------------------------------------- +Nested Loop + -> Append + Subplans Removed: 1 + -> Seq Scan on foo12_1 foo_1 + Filter: ((a = one()) OR (a = two())) + -> Seq Scan on foo12_2 foo_2 + Filter: ((a = one()) OR (a = two())) + -> Materialize + -> Append + Subplans Removed: 1 + -> Bitmap Heap Scan on bar1 bar_1 + Recheck Cond: (a = one()) + -> Bitmap Index Scan on bar1_a_idx + Index Cond: (a = one()) + +Update on bar + Update on bar1 bar_1 + -> Nested Loop + -> Append + Subplans Removed: 1 + -> Seq Scan on foo12_1 foo_1 + Filter: ((a = one()) OR (a = two())) + -> Seq Scan on foo12_2 foo_2 + Filter: ((a = one()) OR (a = two())) + -> Materialize + -> Append + Subplans Removed: 1 + -> Bitmap Heap Scan on bar1 bar_1 + Recheck Cond: (a = one()) + -> Bitmap Index Scan on bar1_a_idx + Index Cond: (a = one()) + +Update on foo + Update on foo12_1 foo_1 + Update on foo12_2 foo_2 + -> Append + Subplans Removed: 1 + -> Seq Scan on foo12_1 foo_1 + Filter: ((a = one()) OR (a = two())) + -> Seq Scan on foo12_2 foo_2 + Filter: ((a = one()) OR (a = two())) +(41 rows) + + +starting permutation: s1prep4 s2lock s1exec4 s2dropi s2unlock +step s1prep4: SET plan_cache_mode = force_generic_plan; + SET enable_seqscan TO off; + PREPARE q4 AS SELECT * FROM generate_series(1, 1) WHERE EXISTS (SELECT * FROM foov WHERE a = $1 FOR UPDATE); + EXPLAIN (COSTS OFF) EXECUTE q4 (1); +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +QUERY PLAN +--------------------------------------------------------------- +Result + One-Time Filter: (InitPlan 1).col1 + InitPlan 1 + -> LockRows + -> Append + Subplans Removed: 2 + -> Index Scan using foo12_1_a on foo12_1 foo_1 + Index Cond: (a = $1) + -> Function Scan on generate_series +(9 rows) + +step s2lock: SELECT pg_advisory_lock(12345); +pg_advisory_lock +---------------- + +(1 row) + +step s1exec4: LOAD 'delay_execution'; + SET delay_execution.executor_start_lock_id = 12345; + EXPLAIN (COSTS OFF) EXECUTE q4 (1); +step s2dropi: DROP INDEX foo12_1_a; +step s2unlock: SELECT pg_advisory_unlock(12345); +pg_advisory_unlock +------------------ +t +(1 row) + +step s1exec4: <... completed> +s1: NOTICE: Finished ExecutorStart(): CachedPlan is not valid +s1: NOTICE: Finished ExecutorStart(): CachedPlan is valid +QUERY PLAN +--------------------------------------------- +Result + One-Time Filter: (InitPlan 1).col1 + InitPlan 1 + -> LockRows + -> Append + Subplans Removed: 2 + -> Seq Scan on foo12_1 foo_1 + Disabled: true + Filter: (a = $1) + -> Function Scan on generate_series +(10 rows) + diff --git a/src/test/modules/delay_execution/meson.build b/src/test/modules/delay_execution/meson.build index 41f3ac0b89..5a70b183d0 100644 --- a/src/test/modules/delay_execution/meson.build +++ b/src/test/modules/delay_execution/meson.build @@ -24,6 +24,7 @@ tests += { 'specs': [ 'partition-addition', 'partition-removal-1', + 'cached-plan-inval', ], }, } diff --git a/src/test/modules/delay_execution/specs/cached-plan-inval.spec b/src/test/modules/delay_execution/specs/cached-plan-inval.spec new file mode 100644 index 0000000000..f27e8fb521 --- /dev/null +++ b/src/test/modules/delay_execution/specs/cached-plan-inval.spec @@ -0,0 +1,80 @@ +# Test to check that invalidation of cached generic plans during ExecutorStart +# correctly triggers replanning and re-execution. + +setup +{ + CREATE TABLE foo (a int, b text) PARTITION BY LIST(a); + CREATE TABLE foo12 PARTITION OF foo FOR VALUES IN (1, 2) PARTITION BY LIST (a); + CREATE TABLE foo12_1 PARTITION OF foo12 FOR VALUES IN (1); + CREATE TABLE foo12_2 PARTITION OF foo12 FOR VALUES IN (2); + CREATE INDEX foo12_1_a ON foo12_1 (a); + CREATE TABLE foo3 PARTITION OF foo FOR VALUES IN (3); + CREATE VIEW foov AS SELECT * FROM foo; + CREATE FUNCTION one () RETURNS int AS $$ BEGIN RETURN 1; END; $$ LANGUAGE PLPGSQL STABLE; + CREATE FUNCTION two () RETURNS int AS $$ BEGIN RETURN 2; END; $$ LANGUAGE PLPGSQL STABLE; + CREATE TABLE bar (a int, b text) PARTITION BY LIST(a); + CREATE TABLE bar1 PARTITION OF bar FOR VALUES IN (1); + CREATE INDEX ON bar1(a); + CREATE TABLE bar2 PARTITION OF bar FOR VALUES IN (2); + CREATE RULE update_foo AS ON UPDATE TO foo DO ALSO UPDATE bar SET a = a WHERE a = one(); + CREATE RULE update_bar AS ON UPDATE TO bar DO ALSO SELECT 1; +} + +teardown +{ + DROP VIEW foov; + DROP RULE update_foo ON foo; + DROP TABLE foo, bar; + DROP FUNCTION one(), two(); +} + +session "s1" +# Append with run-time pruning +step "s1prep" { SET plan_cache_mode = force_generic_plan; + PREPARE q AS SELECT * FROM foov WHERE a = $1 FOR UPDATE; + EXPLAIN (COSTS OFF) EXECUTE q (1); } + +# Another case with Append with run-time pruning +step "s1prep2" { SET plan_cache_mode = force_generic_plan; + PREPARE q2 AS SELECT * FROM foov WHERE a = one() or a = two(); + EXPLAIN (COSTS OFF) EXECUTE q2; } + +# Case with a rule adding another query +step "s1prep3" { SET plan_cache_mode = force_generic_plan; + PREPARE q3 AS UPDATE foov SET a = a WHERE a = one() or a = two(); + EXPLAIN (COSTS OFF) EXECUTE q3; } + +# Another case with Append with run-time pruning in a subquery +step "s1prep4" { SET plan_cache_mode = force_generic_plan; + SET enable_seqscan TO off; + PREPARE q4 AS SELECT * FROM generate_series(1, 1) WHERE EXISTS (SELECT * FROM foov WHERE a = $1 FOR UPDATE); + EXPLAIN (COSTS OFF) EXECUTE q4 (1); } + +# Executes a generic plan +step "s1exec" { LOAD 'delay_execution'; + SET delay_execution.executor_start_lock_id = 12345; + EXPLAIN (COSTS OFF) EXECUTE q (1); } +step "s1exec2" { LOAD 'delay_execution'; + SET delay_execution.executor_start_lock_id = 12345; + EXPLAIN (COSTS OFF) EXECUTE q2; } +step "s1exec3" { LOAD 'delay_execution'; + SET delay_execution.executor_start_lock_id = 12345; + EXPLAIN (COSTS OFF) EXECUTE q3; } +step "s1exec4" { LOAD 'delay_execution'; + SET delay_execution.executor_start_lock_id = 12345; + EXPLAIN (COSTS OFF) EXECUTE q4 (1); } + +session "s2" +step "s2lock" { SELECT pg_advisory_lock(12345); } +step "s2unlock" { SELECT pg_advisory_unlock(12345); } +step "s2dropi" { DROP INDEX foo12_1_a; } + +# While "s1exec", etc. wait to acquire the advisory lock, "s2drop" is able to +# drop the index being used in the cached plan. When "s1exec" is then +# unblocked and initializes the cached plan for execution, it detects the +# concurrent index drop and causes the cached plan to be discarded and +# recreated without the index. +permutation "s1prep" "s2lock" "s1exec" "s2dropi" "s2unlock" +permutation "s1prep2" "s2lock" "s1exec2" "s2dropi" "s2unlock" +permutation "s1prep3" "s2lock" "s1exec3" "s2dropi" "s2unlock" +permutation "s1prep4" "s2lock" "s1exec4" "s2dropi" "s2unlock" diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 7a03b4e360..705cd922fc 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -4440,3 +4440,47 @@ drop table hp_contradict_test; drop operator class part_test_int4_ops2 using hash; drop operator ===(int4, int4); drop function explain_analyze(text); +-- Runtime pruning on UPDATE using WITH CHECK OPTIONS and RETURNING +create table part_abc (a int, b text, c bool) partition by list (a); +create table part_abc_1 (b text, a int, c bool); +create table part_abc_2 (a int, c bool, b text); +alter table part_abc attach partition part_abc_1 for values in (1); +alter table part_abc attach partition part_abc_2 for values in (2); +insert into part_abc values (1, 'b', true); +insert into part_abc values (2, 'c', true); +create view part_abc_view as select * from part_abc where b <> 'a' with check option; +prepare update_part_abc_view as update part_abc_view set b = $2 where a = $1 returning *; +explain (costs off) execute update_part_abc_view (1, 'd'); + QUERY PLAN +------------------------------------------------------- + Update on part_abc + Update on part_abc_1 + -> Append + Subplans Removed: 1 + -> Seq Scan on part_abc_1 + Filter: ((b <> 'a'::text) AND (a = $1)) +(6 rows) + +execute update_part_abc_view (1, 'd'); + a | b | c +---+---+--- + 1 | d | t +(1 row) + +explain (costs off) execute update_part_abc_view (2, 'a'); + QUERY PLAN +------------------------------------------------------- + Update on part_abc + Update on part_abc_2 part_abc_1 + -> Append + Subplans Removed: 1 + -> Seq Scan on part_abc_2 part_abc_1 + Filter: ((b <> 'a'::text) AND (a = $1)) +(6 rows) + +execute update_part_abc_view (2, 'a'); +ERROR: new row violates check option for view "part_abc_view" +DETAIL: Failing row contains (2, a, t). +deallocate update_part_abc_view; +drop view part_abc_view; +drop table part_abc; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 442428d937..af26ad2fb2 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -1339,3 +1339,21 @@ drop operator class part_test_int4_ops2 using hash; drop operator ===(int4, int4); drop function explain_analyze(text); + +-- Runtime pruning on UPDATE using WITH CHECK OPTIONS and RETURNING +create table part_abc (a int, b text, c bool) partition by list (a); +create table part_abc_1 (b text, a int, c bool); +create table part_abc_2 (a int, c bool, b text); +alter table part_abc attach partition part_abc_1 for values in (1); +alter table part_abc attach partition part_abc_2 for values in (2); +insert into part_abc values (1, 'b', true); +insert into part_abc values (2, 'c', true); +create view part_abc_view as select * from part_abc where b <> 'a' with check option; +prepare update_part_abc_view as update part_abc_view set b = $2 where a = $1 returning *; +explain (costs off) execute update_part_abc_view (1, 'd'); +execute update_part_abc_view (1, 'd'); +explain (costs off) execute update_part_abc_view (2, 'a'); +execute update_part_abc_view (2, 'a'); +deallocate update_part_abc_view; +drop view part_abc_view; +drop table part_abc; -- 2.43.0