From ad047f0bb7b703c0d2079464622588138e64b117 Mon Sep 17 00:00:00 2001 From: Amit Langote Date: Wed, 18 Sep 2024 12:00:41 +0900 Subject: [PATCH v55 4/5] Defer locking of runtime-prunable relations to executor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When preparing a cached plan for execution, plancache.c locks the relations in the plan's range table to ensure they are safe for execution. However, this approach, implemented in AcquireExecutorLocks(), results in unnecessarily locking relations that might be pruned during "initial" runtime pruning. To optimize this, locking is now deferred for relations subject to "initial" runtime pruning. The planner now provides a set of "unprunable" relations through the new PlannedStmt.unprunableRelids field. AcquireExecutorLocks() will only lock these unprunable relations. PlannedStmt.unprunableRelids is populated by subtracting the set of initially prunable relids from all RT indexes. The prunable relids are identified by examining all PartitionPruneInfos during set_plan_refs() and storing the RT indexes of partitions subject to "initial" pruning steps. While at it, some duplicated code in set_append_references() and set_mergeappend_references() that constructs the prunable relids set has been refactored into a common function. Deferred locks are taken, if necessary, after ExecDoInitialPruning() determines the set of unpruned partitions. To allow the executor to determine whether the plan tree it’s executing is cached and may contain unlocked relations, the CachedPlan is now made available via the QueryDesc. The executor can call CachedPlanRequiresLocking(), which returns true if the CachedPlan is a reusable generic plan that might contain unlocked relations. Plan nodes like Append have already been updated to consider only the set of unpruned relations. However, there are cases, such as child RowMarks and child result relations, where the code manipulating those do not directly receive information about unpruned partitions. Therefore, code handling child RowMarks and result relations has been modified to ensure they don’t belong to pruned partitions. For this, the RT indexes of unpruned partitions are added in ExecDoInitialPruning() to es_unprunable_relids, which initially contains PlannedStmt.unprunableRelids. The corresponding code now processes only those child RowMarks and result relations whose owning relations are in this set. For result relations managed by a ModifyTable node, its resultRelations list is truncated in ExecInitModifyTable to only consider unpruned relations and the ResultRelInfo structs are created only for those. Finally, an Assert has also been added in ExecCheckPermissions() to ensure that all relations whose permissions are checked have been properly locked, helping to catch any accidental omission of relations from the unprunableRelids set that should have their permissions checked. This deferment introduces a window where prunable relations may be altered by concurrent DDL, potentially causing the plan to become invalid. Consequently, the executor might attempt to execute an invalid plan, leading to errors such as failing to locate the index of an unpruned partition that may have been dropped concurrently during ExecInitIndexScan() (if it's partition-local, not inherited, for example). Future commits will introduce changes to enable the executor to check plan validity during ExecutorStart() and retry with a newly created plan if the original becomes invalid after taking deferred locks. --- src/backend/commands/copyto.c | 2 +- src/backend/commands/createas.c | 2 +- src/backend/commands/explain.c | 7 +-- src/backend/commands/extension.c | 1 + src/backend/commands/matview.c | 2 +- src/backend/commands/prepare.c | 3 +- src/backend/executor/execMain.c | 75 ++++++++++++++++++++++++-- src/backend/executor/execParallel.c | 9 +++- src/backend/executor/execPartition.c | 36 ++++++++++--- src/backend/executor/functions.c | 1 + src/backend/executor/nodeAppend.c | 8 +-- src/backend/executor/nodeLockRows.c | 10 +++- src/backend/executor/nodeMergeAppend.c | 2 +- src/backend/executor/nodeModifyTable.c | 38 ++++++++++--- src/backend/executor/spi.c | 1 + src/backend/optimizer/plan/planner.c | 2 + src/backend/optimizer/plan/setrefs.c | 7 +++ src/backend/partitioning/partprune.c | 18 +++++++ src/backend/tcop/pquery.c | 10 +++- src/backend/utils/cache/plancache.c | 40 ++++++++------ src/include/commands/explain.h | 5 +- src/include/executor/execPartition.h | 5 +- src/include/executor/execdesc.h | 2 + src/include/nodes/execnodes.h | 6 +++ src/include/nodes/pathnodes.h | 6 +++ src/include/nodes/plannodes.h | 7 +++ src/include/utils/plancache.h | 10 ++++ 27 files changed, 263 insertions(+), 52 deletions(-) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 91de442f43..db976f928a 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -552,7 +552,7 @@ BeginCopyTo(ParseState *pstate, ((DR_copy *) dest)->cstate = cstate; /* Create a QueryDesc requesting no output */ - cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + cstate->queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 0b629b1f79..57a3375cad 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -324,7 +324,7 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, UpdateActiveSnapshotCommandId(); /* Create a QueryDesc, redirecting output to our tuple receiver */ - queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index aaec439892..49f7370734 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -509,7 +509,7 @@ standard_ExplainOneQuery(Query *query, int cursorOptions, } /* run it (if needed) and produce output */ - ExplainOnePlan(plan, into, es, queryString, params, queryEnv, + ExplainOnePlan(plan, NULL, into, es, queryString, params, queryEnv, &planduration, (es->buffers ? &bufusage : NULL), es->memory ? &mem_counters : NULL); } @@ -617,7 +617,8 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, * to call it. */ void -ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan, + IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv, const instr_time *planduration, const BufferUsage *bufusage, @@ -673,7 +674,7 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, dest = None_Receiver; /* Create a QueryDesc for the query */ - queryDesc = CreateQueryDesc(plannedstmt, queryString, + queryDesc = CreateQueryDesc(plannedstmt, cplan, queryString, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, instrument_option); diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index fab59ad5f6..bd169edeff 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -742,6 +742,7 @@ execute_sql_string(const char *sql) QueryDesc *qdesc; qdesc = CreateQueryDesc(stmt, + NULL, sql, GetActiveSnapshot(), NULL, dest, NULL, NULL, 0); diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 010097873d..69be74b4bd 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -438,7 +438,7 @@ refresh_matview_datafill(DestReceiver *dest, Query *query, UpdateActiveSnapshotCommandId(); /* Create a QueryDesc, redirecting output to our tuple receiver */ - queryDesc = CreateQueryDesc(plan, queryString, + queryDesc = CreateQueryDesc(plan, NULL, queryString, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index 07257d4db9..311b9ebd5b 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -655,7 +655,8 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, PlannedStmt *pstmt = lfirst_node(PlannedStmt, p); if (pstmt->commandType != CMD_UTILITY) - ExplainOnePlan(pstmt, into, es, query_string, paramLI, queryEnv, + ExplainOnePlan(pstmt, cplan, into, es, query_string, paramLI, + queryEnv, &planduration, (es->buffers ? &bufusage : NULL), es->memory ? &mem_counters : NULL); else diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 1994112b2e..df1b5b2dc3 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -53,6 +53,7 @@ #include "miscadmin.h" #include "parser/parse_relation.h" #include "rewrite/rewriteHandler.h" +#include "storage/lmgr.h" #include "tcop/utility.h" #include "utils/acl.h" #include "utils/backend_status.h" @@ -90,6 +91,7 @@ static bool ExecCheckPermissionsModified(Oid relOid, Oid userid, AclMode requiredPerms); static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt); static void EvalPlanQualStart(EPQState *epqstate, Plan *planTree); +static inline bool ExecShouldLockRelations(EState *estate); /* end of local decls */ @@ -600,6 +602,21 @@ ExecCheckPermissions(List *rangeTable, List *rteperminfos, (rte->rtekind == RTE_SUBQUERY && rte->relkind == RELKIND_VIEW)); + /* + * Ensure that we have at least an AccessShareLock on relations + * whose permissions need to be checked. + * + * Skip this check in a parallel worker because locks won't be + * taken until ExecInitNode() performs plan initialization. + * + * XXX: ExecCheckPermissions() in a parallel worker may be + * redundant with the checks done in the leader process, so this + * should be reviewed to ensure it’s necessary. + */ + Assert(IsParallelWorker() || + CheckRelationOidLockedByMe(rte->relid, AccessShareLock, + true)); + (void) getRTEPermissionInfo(rteperminfos, rte); /* Many-to-one mapping not allowed */ Assert(!bms_is_member(rte->perminfoindex, indexset)); @@ -862,12 +879,46 @@ ExecDoInitialPruning(EState *estate) * result. */ if (prunestate->do_initial_prune) - validsubplans = ExecFindMatchingSubPlans(prunestate, true); + { + Bitmapset *validsubplan_rtis = NULL; + + validsubplans = ExecFindMatchingSubPlans(prunestate, true, + &validsubplan_rtis); + if (ExecShouldLockRelations(estate)) + { + int rtindex = -1; + + rtindex = -1; + while ((rtindex = bms_next_member(validsubplan_rtis, + rtindex)) >= 0) + { + RangeTblEntry *rte = exec_rt_fetch(rtindex, estate); + + Assert(rte->rtekind == RTE_RELATION && + rte->rellockmode != NoLock); + LockRelationOid(rte->relid, rte->rellockmode); + } + } + estate->es_unprunable_relids = bms_add_members(estate->es_unprunable_relids, + validsubplan_rtis); + } + estate->es_part_prune_results = lappend(estate->es_part_prune_results, validsubplans); } } +/* + * Locks might be needed only if running a cached plan that might contain + * unlocked relations, such as reused generic plans. + */ +static inline bool +ExecShouldLockRelations(EState *estate) +{ + return estate->es_cachedplan == NULL ? false : + CachedPlanRequiresLocking(estate->es_cachedplan); +} + /* ---------------------------------------------------------------- * InitPlan * @@ -880,6 +931,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) { CmdType operation = queryDesc->operation; PlannedStmt *plannedstmt = queryDesc->plannedstmt; + CachedPlan *cachedplan = queryDesc->cplan; Plan *plan = plannedstmt->planTree; List *rangeTable = plannedstmt->rtable; EState *estate = queryDesc->estate; @@ -899,10 +951,13 @@ InitPlan(QueryDesc *queryDesc, int eflags) ExecInitRangeTable(estate, rangeTable, plannedstmt->permInfos); estate->es_plannedstmt = plannedstmt; + estate->es_cachedplan = cachedplan; + estate->es_unprunable_relids = bms_copy(plannedstmt->unprunableRelids); /* * Perform runtime "initial" pruning to determine the plan nodes that will - * not be executed. + * not be executed. This will also add the RT indexes of surviving leaf + * partitions to es_unprunable_relids. */ estate->es_part_prune_infos = plannedstmt->partPruneInfos; ExecDoInitialPruning(estate); @@ -921,8 +976,13 @@ InitPlan(QueryDesc *queryDesc, int eflags) Relation relation; ExecRowMark *erm; - /* ignore "parent" rowmarks; they are irrelevant at runtime */ - if (rc->isParent) + /* + * Ignore "parent" rowmarks, because they are irrelevant at + * runtime. Also ignore the rowmarks belonging to child tables + * that have been pruned in ExecDoInitialPruning(). + */ + if (rc->isParent || + !bms_is_member(rc->rti, estate->es_unprunable_relids)) continue; /* get relation's OID (will produce InvalidOid if subquery) */ @@ -2959,6 +3019,13 @@ EvalPlanQualStart(EPQState *epqstate, Plan *planTree) } } + /* + * Copy es_unprunable_relids so that RowMarks of pruned relations are + * ignored in ExecInitLockRows() and ExecInitModifyTable() when + * initializing the plan trees below. + */ + rcestate->es_unprunable_relids = parentestate->es_unprunable_relids; + /* * Initialize private state information for each SubPlan. We must do this * before running ExecInitNode on the main query tree, since diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index b01a2fdfdd..7519c9a860 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -1257,8 +1257,15 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver, paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMLISTINFO, false); paramLI = RestoreParamList(¶mspace); - /* Create a QueryDesc for the query. */ + /* + * Create a QueryDesc for the query. We pass NULL for cachedplan, because + * we don't have a pointer to the CachedPlan in the leader's process. It's + * fine because the only reason the executor needs to see it is to decide + * if it should take locks on certain relations, but paraller workers + * always take locks anyway. + */ return CreateQueryDesc(pstmt, + NULL, queryString, GetActiveSnapshot(), InvalidSnapshot, receiver, paramLI, NULL, instrument_options); diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index d9fa593785..551e0ce9b2 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -26,6 +26,7 @@ #include "partitioning/partdesc.h" #include "partitioning/partprune.h" #include "rewrite/rewriteManip.h" +#include "storage/lmgr.h" #include "utils/acl.h" #include "utils/lsyscache.h" #include "utils/partcache.h" @@ -194,7 +195,8 @@ static void find_matching_subplans_recurse(PlanState *parent_plan, PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, - Bitmapset **validsubplans); + Bitmapset **validsubplans, + Bitmapset **validsubplan_rtis); /* @@ -1978,8 +1980,8 @@ ExecCreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo) * The set of partitions that exist now might not be the same that * existed when the plan was made. The normal case is that it is; * optimize for that case with a quick comparison, and just copy - * the subplan_map and make subpart_map point to the one in - * PruneInfo. + * the subplan_map and make subpart_map, rti_map point to the + * ones in PruneInfo. * * For the case where they aren't identical, we could have more * partitions on either side; or even exactly the same number of @@ -1999,6 +2001,7 @@ ExecCreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo) sizeof(int) * partdesc->nparts) == 0) { pprune->subpart_map = pinfo->subpart_map; + pprune->rti_map = pinfo->rti_map; memcpy(pprune->subplan_map, pinfo->subplan_map, sizeof(int) * pinfo->nparts); } @@ -2019,6 +2022,7 @@ ExecCreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo) * mismatches. */ pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts); + pprune->rti_map = palloc(sizeof(int) * partdesc->nparts); for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++) { @@ -2036,6 +2040,8 @@ ExecCreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo) pinfo->subplan_map[pd_idx]; pprune->subpart_map[pp_idx] = pinfo->subpart_map[pd_idx]; + pprune->rti_map[pp_idx] = + pinfo->rti_map[pd_idx]; pd_idx++; continue; } @@ -2073,6 +2079,7 @@ ExecCreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo) pprune->subpart_map[pp_idx] = -1; pprune->subplan_map[pp_idx] = -1; + pprune->rti_map[pp_idx] = 0; } } @@ -2339,10 +2346,13 @@ PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate, * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This * differentiates the initial executor-time pruning step from later * runtime pruning. + * + * valisubplan_rtis must be non-NULL if initial_pruning is true. */ Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, - bool initial_prune) + bool initial_prune, + Bitmapset **validsubplan_rtis) { Bitmapset *result = NULL; MemoryContext oldcontext; @@ -2378,7 +2388,7 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, pprune = &prunedata->partrelprunedata[0]; find_matching_subplans_recurse(prunestate->parent_plan, prunedata, pprune, initial_prune, - &result); + &result, validsubplan_rtis); /* Expression eval may have used space in ExprContext too */ if (pprune->exec_context.is_valid) @@ -2395,6 +2405,8 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + if (validsubplan_rtis) + *validsubplan_rtis = bms_copy(*validsubplan_rtis); MemoryContextReset(prunestate->prune_context); @@ -2405,14 +2417,16 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, * find_matching_subplans_recurse * Recursive worker function for ExecFindMatchingSubPlans * - * Adds valid (non-prunable) subplan IDs to *validsubplans + * Adds valid (non-prunable) subplan IDs to *validsubplans and the RT indexes + * of their owning leaf partitions to *validsubplan_rtis if it's non-NULL. */ static void find_matching_subplans_recurse(PlanState *parent_plan, PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, - Bitmapset **validsubplans) + Bitmapset **validsubplans, + Bitmapset **validsubplan_rtis) { Bitmapset *partset; int i; @@ -2464,8 +2478,13 @@ find_matching_subplans_recurse(PlanState *parent_plan, while ((i = bms_next_member(partset, i)) >= 0) { if (pprune->subplan_map[i] >= 0) + { *validsubplans = bms_add_member(*validsubplans, pprune->subplan_map[i]); + if (validsubplan_rtis) + *validsubplan_rtis = bms_add_member(*validsubplan_rtis, + pprune->rti_map[i]); + } else { int partidx = pprune->subpart_map[i]; @@ -2474,7 +2493,8 @@ find_matching_subplans_recurse(PlanState *parent_plan, find_matching_subplans_recurse(parent_plan, prunedata, &prunedata->partrelprunedata[partidx], - initial_prune, validsubplans); + initial_prune, validsubplans, + validsubplan_rtis); else { /* diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index 692854e2b3..6f6f45e0ad 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -840,6 +840,7 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache) dest = None_Receiver; es->qd = CreateQueryDesc(es->stmt, + NULL, fcache->src, GetActiveSnapshot(), InvalidSnapshot, diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index de7ebab5c2..006bdafaea 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -581,7 +581,7 @@ choose_next_subplan_locally(AppendState *node) else if (!node->as_valid_subplans_identified) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); node->as_valid_subplans_identified = true; } @@ -648,7 +648,7 @@ choose_next_subplan_for_leader(AppendState *node) if (!node->as_valid_subplans_identified) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); node->as_valid_subplans_identified = true; /* @@ -724,7 +724,7 @@ choose_next_subplan_for_worker(AppendState *node) else if (!node->as_valid_subplans_identified) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); node->as_valid_subplans_identified = true; mark_invalid_subplans_as_finished(node); @@ -877,7 +877,7 @@ ExecAppendAsyncBegin(AppendState *node) if (!node->as_valid_subplans_identified) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); node->as_valid_subplans_identified = true; classify_matching_subplans(node); diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index 41754ddfea..b5b2cd53c5 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -28,6 +28,7 @@ #include "foreign/fdwapi.h" #include "miscadmin.h" #include "utils/rel.h" +#include "utils/lsyscache.h" /* ---------------------------------------------------------------- @@ -347,8 +348,13 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags) ExecRowMark *erm; ExecAuxRowMark *aerm; - /* ignore "parent" rowmarks; they are irrelevant at runtime */ - if (rc->isParent) + /* + * Ignore "parent" rowmarks, because they are irrelevant at + * runtime. Also ignore the rowmarks belonging to child tables + * that have been pruned in ExecDoInitialPruning(). + */ + if (rc->isParent || + !bms_is_member(rc->rti, estate->es_unprunable_relids)) continue; /* find ExecRowMark and build ExecAuxRowMark */ diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index 3ed91808dd..f7821aa178 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -219,7 +219,7 @@ ExecMergeAppend(PlanState *pstate) */ if (node->ms_valid_subplans == NULL) node->ms_valid_subplans = - ExecFindMatchingSubPlans(node->ms_prune_state, false); + ExecFindMatchingSubPlans(node->ms_prune_state, false, NULL); /* * First time through: pull the first tuple from each valid subplan, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 8bf4c80d4a..3c02782445 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -4176,12 +4176,17 @@ ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid, hash_search(node->mt_resultOidHash, &resultoid, HASH_FIND, NULL); if (mtlookup) { + ResultRelInfo *resultRelInfo; + if (update_cache) { node->mt_lastResultOid = resultoid; node->mt_lastResultIndex = mtlookup->relationIndex; } - return node->resultRelInfo + mtlookup->relationIndex; + + resultRelInfo = node->resultRelInfo + mtlookup->relationIndex; + + return resultRelInfo; } } else @@ -4218,7 +4223,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ModifyTableState *mtstate; Plan *subplan = outerPlan(node); CmdType operation = node->operation; - int nrels = list_length(node->resultRelations); + int nrels; + List *resultRelations = NIL; ResultRelInfo *resultRelInfo; List *arowmarks; ListCell *l; @@ -4228,6 +4234,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + /* + * Only consider unpruned relations. In the future, it might be more + * efficient to store resultRelations as a bitmapset, which would make + * this operation cheaper. + */ + foreach(l, node->resultRelations) + { + Index rti = lfirst_int(l); + + if (bms_is_member(rti, estate->es_unprunable_relids)) + resultRelations = lappend_int(resultRelations, rti); + } + nrels = list_length(resultRelations); + /* * create state structure */ @@ -4265,6 +4285,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ if (node->rootRelation > 0) { + Assert(bms_is_member(node->rootRelation, estate->es_unprunable_relids)); mtstate->rootResultRelInfo = makeNode(ResultRelInfo); ExecInitResultRelation(estate, mtstate->rootResultRelInfo, node->rootRelation); @@ -4279,7 +4300,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* set up epqstate with dummy subplan data for the moment */ EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, - node->epqParam, node->resultRelations); + node->epqParam, resultRelations); mtstate->fireBSTriggers = true; /* @@ -4297,7 +4318,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ resultRelInfo = mtstate->resultRelInfo; i = 0; - foreach(l, node->resultRelations) + foreach(l, resultRelations) { Index resultRelation = lfirst_int(l); List *mergeActions = NIL; @@ -4589,8 +4610,13 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ExecRowMark *erm; ExecAuxRowMark *aerm; - /* ignore "parent" rowmarks; they are irrelevant at runtime */ - if (rc->isParent) + /* + * Ignore "parent" rowmarks, because they are irrelevant at + * runtime. Also ignore the rowmarks belonging to child tables + * that have been pruned in ExecDoInitialPruning(). + */ + if (rc->isParent || + !bms_is_member(rc->rti, estate->es_unprunable_relids)) continue; /* Find ExecRowMark and build ExecAuxRowMark */ diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 90d9834576..659bd6dcd9 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -2684,6 +2684,7 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, snap = InvalidSnapshot; qdesc = CreateQueryDesc(stmt, + cplan, plansource->query_string, snap, crosscheck_snapshot, dest, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1b9071c774..9e47a7fd50 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -549,6 +549,8 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->planTree = top_plan; result->partPruneInfos = glob->partPruneInfos; result->rtable = glob->finalrtable; + result->unprunableRelids = bms_difference(bms_add_range(NULL, 1, list_length(result->rtable)), + glob->prunableRelids); result->permInfos = glob->finalrteperminfos; result->resultRelations = glob->resultRelations; result->appendRelations = glob->appendRelations; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index e2ea406c4e..283a61a972 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1764,8 +1764,15 @@ register_partpruneinfo(PlannerInfo *root, int part_prune_index, int rtoffset) foreach(l2, prune_infos) { PartitionedRelPruneInfo *prelinfo = lfirst(l2); + int i; prelinfo->rtindex += rtoffset; + for (i = 0; i < prelinfo->nparts; i++) + { + prelinfo->rti_map[i] += rtoffset; + glob->prunableRelids = bms_add_member(glob->prunableRelids, + prelinfo->rti_map[i]); + } } } diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 60fabb1734..85894c87af 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -645,6 +645,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int *subplan_map; int *subpart_map; Oid *relid_map; + int *rti_map; /* * Construct the subplan and subpart maps for this partitioning level. @@ -657,6 +658,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, subpart_map = (int *) palloc(nparts * sizeof(int)); memset(subpart_map, -1, nparts * sizeof(int)); relid_map = (Oid *) palloc0(nparts * sizeof(Oid)); + rti_map = (int *) palloc0(nparts * sizeof(int)); present_parts = NULL; i = -1; @@ -671,9 +673,24 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, subplan_map[i] = subplanidx = relid_subplan_map[partrel->relid] - 1; subpart_map[i] = subpartidx = relid_subpart_map[partrel->relid] - 1; relid_map[i] = planner_rt_fetch(partrel->relid, root)->relid; + + /* + * Track the RT indexes of partitions to ensure they are included + * in the prunableRelids set of relations that are locked during + * execution. This ensures that if the plan is cached, these + * partitions are locked when the plan is reused. + * + * Partitions without a subplan and sub-partitioned partitions + * where none of the sub-partitions have a subplan due to + * constraint exclusion are not included in this set. Instead, + * they are added to the unprunableRelids set, and the relations + * in this set are locked by AcquireExecutorLocks() before + * executing a cached plan. + */ if (subplanidx >= 0) { present_parts = bms_add_member(present_parts, i); + rti_map[i] = (int) partrel->relid; /* Record finding this subplan */ subplansfound = bms_add_member(subplansfound, subplanidx); @@ -695,6 +712,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, pinfo->subplan_map = subplan_map; pinfo->subpart_map = subpart_map; pinfo->relid_map = relid_map; + pinfo->rti_map = rti_map; } pfree(relid_subpart_map); diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index a1f8d03db1..6e8f6b1b8f 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -36,6 +36,7 @@ Portal ActivePortal = NULL; static void ProcessQuery(PlannedStmt *plan, + CachedPlan *cplan, const char *sourceText, ParamListInfo params, QueryEnvironment *queryEnv, @@ -65,6 +66,7 @@ static void DoPortalRewind(Portal portal); */ QueryDesc * CreateQueryDesc(PlannedStmt *plannedstmt, + CachedPlan *cplan, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, @@ -77,6 +79,7 @@ CreateQueryDesc(PlannedStmt *plannedstmt, qd->operation = plannedstmt->commandType; /* operation */ qd->plannedstmt = plannedstmt; /* plan */ + qd->cplan = cplan; /* CachedPlan supplying the plannedstmt */ qd->sourceText = sourceText; /* query text */ qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */ /* RI check snapshot */ @@ -122,6 +125,7 @@ FreeQueryDesc(QueryDesc *qdesc) * PORTAL_ONE_RETURNING, or PORTAL_ONE_MOD_WITH portal * * plan: the plan tree for the query + * cplan: CachedPlan supplying the plan * sourceText: the source text of the query * params: any parameters needed * dest: where to send results @@ -134,6 +138,7 @@ FreeQueryDesc(QueryDesc *qdesc) */ static void ProcessQuery(PlannedStmt *plan, + CachedPlan *cplan, const char *sourceText, ParamListInfo params, QueryEnvironment *queryEnv, @@ -145,7 +150,7 @@ ProcessQuery(PlannedStmt *plan, /* * Create the QueryDesc object */ - queryDesc = CreateQueryDesc(plan, sourceText, + queryDesc = CreateQueryDesc(plan, cplan, sourceText, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0); @@ -493,6 +498,7 @@ PortalStart(Portal portal, ParamListInfo params, * the destination to DestNone. */ queryDesc = CreateQueryDesc(linitial_node(PlannedStmt, portal->stmts), + portal->cplan, portal->sourceText, GetActiveSnapshot(), InvalidSnapshot, @@ -1276,6 +1282,7 @@ PortalRunMulti(Portal portal, { /* statement can set tag string */ ProcessQuery(pstmt, + portal->cplan, portal->sourceText, portal->portalParams, portal->queryEnv, @@ -1285,6 +1292,7 @@ PortalRunMulti(Portal portal, { /* stmt added by rewrite cannot set tag */ ProcessQuery(pstmt, + portal->cplan, portal->sourceText, portal->portalParams, portal->queryEnv, diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 5af1a168ec..5b75dadf13 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -104,7 +104,8 @@ static List *RevalidateCachedQuery(CachedPlanSource *plansource, QueryEnvironment *queryEnv); static bool CheckCachedPlan(CachedPlanSource *plansource); static CachedPlan *BuildCachedPlan(CachedPlanSource *plansource, List *qlist, - ParamListInfo boundParams, QueryEnvironment *queryEnv); + ParamListInfo boundParams, QueryEnvironment *queryEnv, + bool generic); static bool choose_custom_plan(CachedPlanSource *plansource, ParamListInfo boundParams); static double cached_plan_cost(CachedPlan *plan, bool include_planner); @@ -815,8 +816,11 @@ RevalidateCachedQuery(CachedPlanSource *plansource, * Caller must have already called RevalidateCachedQuery to verify that the * querytree is up to date. * - * On a "true" return, we have acquired the locks needed to run the plan. - * (We must do this for the "true" result to be race-condition-free.) + * On a "true" return, we have acquired locks on the "unprunableRelids" set + * for all plans in plansource->stmt_list. The plans are not completely + * race-condition-free until the executor takes locks on the set of prunable + * relations that survive initial runtime pruning during executor + * initialization; */ static bool CheckCachedPlan(CachedPlanSource *plansource) @@ -893,10 +897,10 @@ CheckCachedPlan(CachedPlanSource *plansource) * or it can be set to NIL if we need to re-copy the plansource's query_list. * * To build a generic, parameter-value-independent plan, pass NULL for - * boundParams. To build a custom plan, pass the actual parameter values via - * boundParams. For best effect, the PARAM_FLAG_CONST flag should be set on - * each parameter value; otherwise the planner will treat the value as a - * hint rather than a hard constant. + * boundParams, and true for generic. To build a custom plan, pass the actual + * parameter values via boundParams, and false for generic. For best effect, + * the PARAM_FLAG_CONST flag should be set on each parameter value; otherwise + * the planner will treat the value as a hint rather than a hard constant. * * Planning work is done in the caller's memory context. The finished plan * is in a child memory context, which typically should get reparented @@ -904,7 +908,8 @@ CheckCachedPlan(CachedPlanSource *plansource) */ static CachedPlan * BuildCachedPlan(CachedPlanSource *plansource, List *qlist, - ParamListInfo boundParams, QueryEnvironment *queryEnv) + ParamListInfo boundParams, QueryEnvironment *queryEnv, + bool generic) { CachedPlan *plan; List *plist; @@ -1026,6 +1031,7 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, plan->refcount = 0; plan->context = plan_context; plan->is_oneshot = plansource->is_oneshot; + plan->is_generic = generic; plan->is_saved = false; plan->is_valid = true; @@ -1196,7 +1202,7 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, else { /* Build a new generic plan */ - plan = BuildCachedPlan(plansource, qlist, NULL, queryEnv); + plan = BuildCachedPlan(plansource, qlist, NULL, queryEnv, true); /* Just make real sure plansource->gplan is clear */ ReleaseGenericPlan(plansource); /* Link the new generic plan into the plansource */ @@ -1241,7 +1247,7 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, if (customplan) { /* Build a custom plan */ - plan = BuildCachedPlan(plansource, qlist, boundParams, queryEnv); + plan = BuildCachedPlan(plansource, qlist, boundParams, queryEnv, false); /* Accumulate total costs of custom plans */ plansource->total_custom_cost += cached_plan_cost(plan, true); @@ -1387,8 +1393,8 @@ CachedPlanAllowsSimpleValidityCheck(CachedPlanSource *plansource, } /* - * Reject if AcquireExecutorLocks would have anything to do. This is - * probably unnecessary given the previous check, but let's be safe. + * Reject if there are any lockable relations. This is probably + * unnecessary given the previous check, but let's be safe. */ foreach(lc, plan->stmt_list) { @@ -1776,7 +1782,7 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) foreach(lc1, stmt_list) { PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1); - ListCell *lc2; + int rtindex; if (plannedstmt->commandType == CMD_UTILITY) { @@ -1794,9 +1800,13 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) continue; } - foreach(lc2, plannedstmt->rtable) + rtindex = -1; + while ((rtindex = bms_next_member(plannedstmt->unprunableRelids, + rtindex)) >= 0) { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2); + RangeTblEntry *rte = list_nth_node(RangeTblEntry, + plannedstmt->rtable, + rtindex - 1); if (!(rte->rtekind == RTE_RELATION || (rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid)))) diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h index 3ab0aae78f..21c71e0d53 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -103,8 +103,9 @@ extern void ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv); -extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, +extern void ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan, + IntoClause *into, ExplainState *es, + const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv, const instr_time *planduration, const BufferUsage *bufusage, diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index ef6d8b2d48..7f2592e3b0 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -48,6 +48,7 @@ extern void ExecCleanupTupleRouting(ModifyTableState *mtstate, * nparts Length of subplan_map[] and subpart_map[]. * subplan_map Subplan index by partition index, or -1. * subpart_map Subpart index by partition index, or -1. + * rti_map RT index by partition index, or 0. * present_parts A Bitmapset of the partition indexes that we * have subplans or subparts for. * initial_pruning_steps List of PartitionPruneSteps used to @@ -65,6 +66,7 @@ typedef struct PartitionedRelPruningData int nparts; int *subplan_map; int *subpart_map; + int *rti_map pg_node_attr(array_size(nparts)); Bitmapset *present_parts; List *initial_pruning_steps; List *exec_pruning_steps; @@ -132,7 +134,8 @@ extern PartitionPruneState *ExecInitPartitionPruning(PlanState *planstate, Bitmapset *root_parent_relids, Bitmapset **initially_valid_subplans); extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate, - bool initial_prune); + bool initial_prune, + Bitmapset **validsubplan_rtis); extern PartitionPruneState *ExecCreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo); #endif /* EXECPARTITION_H */ diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index 0a7274e26c..0e7245435d 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -35,6 +35,7 @@ typedef struct QueryDesc /* These fields are provided by CreateQueryDesc */ CmdType operation; /* CMD_SELECT, CMD_UPDATE, etc. */ PlannedStmt *plannedstmt; /* planner's output (could be utility, too) */ + CachedPlan *cplan; /* CachedPlan that supplies the plannedstmt */ const char *sourceText; /* source text of the query */ Snapshot snapshot; /* snapshot to use for query */ Snapshot crosscheck_snapshot; /* crosscheck for RI update/delete */ @@ -57,6 +58,7 @@ typedef struct QueryDesc /* in pquery.c */ extern QueryDesc *CreateQueryDesc(PlannedStmt *plannedstmt, + CachedPlan *cplan, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 518a9fcd15..57170818c0 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -42,6 +42,7 @@ #include "storage/condition_variable.h" #include "utils/hsearch.h" #include "utils/queryenvironment.h" +#include "utils/plancache.h" #include "utils/reltrigger.h" #include "utils/sharedtuplestore.h" #include "utils/snapshot.h" @@ -636,9 +637,14 @@ typedef struct EState * ExecRowMarks, or NULL if none */ List *es_rteperminfos; /* List of RTEPermissionInfo */ PlannedStmt *es_plannedstmt; /* link to top of plan tree */ + CachedPlan *es_cachedplan; List *es_part_prune_infos; /* PlannedStmt.partPruneInfos */ List *es_part_prune_states; /* List of PartitionPruneState */ List *es_part_prune_results; /* List of Bitmapset */ + Bitmapset *es_unprunable_relids; /* PlannedStmt.unprunableRelids + RT + * indexes of leaf partitions that + * survive initial pruning; see + * ExecDoInitialPruning() */ const char *es_sourceText; /* Source text from QueryDesc */ JunkFilter *es_junkFilter; /* top-level junk filter, if any */ diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 8d30b6e896..cc2190ea63 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -116,6 +116,12 @@ typedef struct PlannerGlobal /* "flat" rangetable for executor */ List *finalrtable; + /* + * RT indexes of relations subject to removal from the plan due to runtime + * pruning at plan initialization time + */ + Bitmapset *prunableRelids; + /* "flat" list of RTEPermissionInfos */ List *finalrteperminfos; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 39d0281c23..318e30fe2f 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -74,6 +74,10 @@ typedef struct PlannedStmt List *rtable; /* list of RangeTblEntry nodes */ + Bitmapset *unprunableRelids; /* RT indexes of relations that are not + * subject to runtime pruning; for + * AcquireExecutorLocks() */ + List *permInfos; /* list of RTEPermissionInfo nodes for rtable * entries needing one */ @@ -1474,6 +1478,9 @@ typedef struct PartitionedRelPruneInfo /* subpart index by partition index, or -1 */ int *subpart_map pg_node_attr(array_size(nparts)); + /* RT index by partition index, or 0 */ + int *rti_map pg_node_attr(array_size(nparts)); + /* relation OID by partition index, or 0 */ Oid *relid_map pg_node_attr(array_size(nparts)); diff --git a/src/include/utils/plancache.h b/src/include/utils/plancache.h index a90dfdf906..0b5ee007ca 100644 --- a/src/include/utils/plancache.h +++ b/src/include/utils/plancache.h @@ -149,6 +149,7 @@ typedef struct CachedPlan int magic; /* should equal CACHEDPLAN_MAGIC */ List *stmt_list; /* list of PlannedStmts */ bool is_oneshot; /* is it a "oneshot" plan? */ + bool is_generic; /* is it a reusable generic plan? */ bool is_saved; /* is CachedPlan in a long-lived context? */ bool is_valid; /* is the stmt_list currently valid? */ Oid planRoleId; /* Role ID the plan was created for */ @@ -235,4 +236,13 @@ extern bool CachedPlanIsSimplyValid(CachedPlanSource *plansource, extern CachedExpression *GetCachedExpression(Node *expr); extern void FreeCachedExpression(CachedExpression *cexpr); +/* + * CachedPlanRequiresLocking: should the executor acquire locks? + */ +static inline bool +CachedPlanRequiresLocking(CachedPlan *cplan) +{ + return cplan->is_generic; +} + #endif /* PLANCACHE_H */ -- 2.43.0