From f1ad8bea2621c5a7074742516b248c9efe9dd17e Mon Sep 17 00:00:00 2001 From: Amit Langote Date: Wed, 6 Sep 2023 17:54:02 +0900 Subject: [PATCH v50 2/6] Add field to store parent relids to Append/MergeAppend There's no way currently in the executor to tell if the child subplans of Append/MergeAppend are scanning partitions, and if they indeed do, what the RT indexes of their parent/ancestor tables are. Executor doesn't need to see those RT indexes except for run-time pruning, in which case they can can be found in the PartitionPruneInfo. An upcoming commit will create a need for them to be available for the purpose of locking those parent/ancestor tables when executing a cached plan, so add a field called allpartrelids to Append/MergeAppend to store those RT indexes. In the cases where an Append/MergeAppend node containing parent RT indexes is eligible for elision in set_{append|mergeappend}_references(), those RT indexes are now transferred into PlannedStmt.elidedAppendPartRels. The code to look up partitioned parent relids for a given list of partition scan subpaths of an Append/MergeAppend is already present in make_partition_pruneinfo() but it's local to partprune.c. This commit refactors that code into its own function called add_append_subpath_partrelids() defined in appendinfo.c and generalizes it to consider child join and aggregation paths. To facilitate looking up of parent rels of child grouping rels in add_append_subpath_partrelids(), parent links are now also set in the RelOptInfos of child grouping rels too, like they are in those of child base and join rels. Discussion: https://postgr.es/m/CA+HiwqFGkMSge6TgC9KQzde0ohpAycLQuV7ooitEEpbKB0O_mg@mail.gmail.com --- src/backend/executor/execParallel.c | 1 + src/backend/optimizer/plan/createplan.c | 41 ++++++-- src/backend/optimizer/plan/planner.c | 5 + src/backend/optimizer/plan/setrefs.c | 22 ++++ src/backend/optimizer/util/appendinfo.c | 127 ++++++++++++++++++++++++ src/backend/partitioning/partprune.c | 124 +++-------------------- src/include/nodes/pathnodes.h | 3 + src/include/nodes/plannodes.h | 17 ++++ src/include/optimizer/appendinfo.h | 3 + src/include/partitioning/partprune.h | 3 +- 10 files changed, 223 insertions(+), 123 deletions(-) diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index bfb3419efb..f995714d7f 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -185,6 +185,7 @@ ExecSerializePlan(Plan *plan, EState *estate) pstmt->permInfos = estate->es_rteperminfos; pstmt->resultRelations = NIL; pstmt->appendRelations = NIL; + pstmt->elidedAppendPartRels = NIL; /* * Transfer only parallel-safe subplans, leaving a NULL "hole" in the list diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 28addc1129..49c193c237 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -25,6 +25,7 @@ #include "nodes/extensible.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/appendinfo.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" @@ -1232,6 +1233,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) Oid *nodeCollations = NULL; bool *nodeNullsFirst = NULL; bool consider_async = false; + List *allpartrelids = NIL; /* * The subpaths list could be empty, if every child was proven empty by @@ -1373,15 +1375,23 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) ++nasyncplans; } + /* + * Find partitioned parent rel(s) of the subpath's rel(s). + */ + allpartrelids = add_append_subpath_partrelids(root, subpath, rel, + allpartrelids); + subplans = lappend(subplans, subplan); } + plan->allpartrelids = allpartrelids; + /* - * If any quals exist, they may be useful to perform further partition - * pruning during execution. Gather information needed by the executor to - * do partition pruning. + * If scanning partitions, check if there are quals that may be useful to + * perform further partition pruning during execution. Gather information + * needed by the executor to do partition pruning. */ - if (enable_partition_pruning) + if (enable_partition_pruning && allpartrelids != NIL) { List *prunequal; @@ -1402,7 +1412,8 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) partpruneinfo = make_partition_pruneinfo(root, rel, best_path->subpaths, - prunequal); + prunequal, + allpartrelids); } plan->appendplans = subplans; @@ -1448,6 +1459,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, ListCell *subpaths; RelOptInfo *rel = best_path->path.parent; PartitionPruneInfo *partpruneinfo = NULL; + List *allpartrelids = NIL; /* * We don't have the actual creation of the MergeAppend node split out @@ -1537,15 +1549,23 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, subplan = (Plan *) sort; } + /* + * Find partitioned parent rel(s) of the subpath's rel(s). + */ + allpartrelids = add_append_subpath_partrelids(root, subpath, rel, + allpartrelids); + subplans = lappend(subplans, subplan); } + node->allpartrelids = allpartrelids; + /* - * If any quals exist, they may be useful to perform further partition - * pruning during execution. Gather information needed by the executor to - * do partition pruning. + * If scanning partitions, check if there are quals that may be useful to + * perform further partition pruning during execution. Gather information + * needed by the executor to do partition pruning. */ - if (enable_partition_pruning) + if (enable_partition_pruning && allpartrelids != NIL) { List *prunequal; @@ -1557,7 +1577,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, if (prunequal != NIL) partpruneinfo = make_partition_pruneinfo(root, rel, best_path->subpaths, - prunequal); + prunequal, + allpartrelids); } node->mergeplans = subplans; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 948afd9094..2c2e38f589 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -522,6 +522,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, Assert(glob->finalrowmarks == NIL); Assert(glob->resultRelations == NIL); Assert(glob->appendRelations == NIL); + Assert(glob->elidedAppendPartRels == NIL); top_plan = set_plan_references(root, top_plan); /* ... and the subplans (both regular subplans and initplans) */ Assert(list_length(glob->subplans) == list_length(glob->subroots)); @@ -549,6 +550,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->permInfos = glob->finalrteperminfos; result->resultRelations = glob->resultRelations; result->appendRelations = glob->appendRelations; + result->elidedAppendPartRels = glob->elidedAppendPartRels; result->subplans = glob->subplans; result->rewindPlanIDs = glob->rewindPlanIDs; result->rowMarks = glob->finalrowmarks; @@ -7941,8 +7943,11 @@ create_partitionwise_grouping_paths(PlannerInfo *root, agg_costs, gd, &child_extra, &child_partially_grouped_rel); + /* Mark as child of grouped_rel. */ + child_grouped_rel->parent = grouped_rel; if (child_partially_grouped_rel) { + child_partially_grouped_rel->parent = grouped_rel; partially_grouped_live_children = lappend(partially_grouped_live_children, child_partially_grouped_rel); diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 7aed84584c..4fc5ed15aa 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1757,6 +1757,10 @@ set_append_references(PlannerInfo *root, lfirst(l) = set_plan_refs(root, (Plan *) lfirst(l), rtoffset); } + /* Do this before possibly removing the MergeAppend node below. */ + foreach(l, aplan->allpartrelids) + lfirst(l) = offset_relid_set((Relids) lfirst(l), rtoffset); + /* * See if it's safe to get rid of the Append entirely. For this to be * safe, there must be only one child plan and that child plan's parallel @@ -1770,7 +1774,14 @@ set_append_references(PlannerInfo *root, Plan *p = (Plan *) linitial(aplan->appendplans); if (p->parallel_aware == aplan->plan.parallel_aware) + { + if (aplan->allpartrelids) + root->glob->elidedAppendPartRels = + list_concat(root->glob->elidedAppendPartRels, + aplan->allpartrelids); + return clean_up_removed_plan_level((Plan *) aplan, p); + } } /* @@ -1832,6 +1843,10 @@ set_mergeappend_references(PlannerInfo *root, lfirst(l) = set_plan_refs(root, (Plan *) lfirst(l), rtoffset); } + /* Do this before possibly removing the MergeAppend node below. */ + foreach(l, mplan->allpartrelids) + lfirst(l) = offset_relid_set((Relids) lfirst(l), rtoffset); + /* * See if it's safe to get rid of the MergeAppend entirely. For this to * be safe, there must be only one child plan and that child plan's @@ -1846,7 +1861,14 @@ set_mergeappend_references(PlannerInfo *root, Plan *p = (Plan *) linitial(mplan->mergeplans); if (p->parallel_aware == mplan->plan.parallel_aware) + { + if (mplan->allpartrelids) + root->glob->elidedAppendPartRels = + list_concat(root->glob->elidedAppendPartRels, + mplan->allpartrelids); + return clean_up_removed_plan_level((Plan *) mplan, p); + } } /* diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index 4989722637..0569cd00a5 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -1038,3 +1038,130 @@ distribute_row_identity_vars(PlannerInfo *root) } } } + +/* + * add_append_subpath_partrelids + * Look up a child subpath's rel's partitioned parent relids up to + * parentrel and add the bitmapset containing those into + * 'allpartrelids' + */ +List * +add_append_subpath_partrelids(PlannerInfo *root, Path *subpath, + RelOptInfo *parentrel, + List *allpartrelids) +{ + RelOptInfo *pathrel = subpath->parent; + Relids partrelids = NULL; + Index top_parent; + ListCell *lc; + + /* Nothing to do if there's no parent to begin with. */ + if (!IS_OTHER_REL(pathrel)) + return allpartrelids; + + /* + * Traverse up to the pathrel's topmost partitioned parent, collecting + * parent relids as we go; but stop if we reach parentrel. (Normally, a + * pathrel's topmost partitioned parent is either parentrel or a UNION ALL + * appendrel child of parentrel. But when handling partitionwise joins of + * multi-level partitioning trees, we can see an append path whose + * parentrel is an intermediate partitioned table.) + */ + do + { + Relids parent_relids = NULL; + + /* + * For simple child rels, we can simply set the parent_relids to + * pathrel->parent->relids. But for partitionwise join and aggregate + * child rels, while we can use pathrel->parent to move up the tree, + * parent_relids must be found the hard way through AppendInfoInfos, + * because 1) a joinrel's relids may point to RTE_JOIN entries, + * 2) topmost parent grouping rel's relids field is NULL. + */ + if (IS_SIMPLE_REL(pathrel)) + { + pathrel = pathrel->parent; + /* Stop once we reach the root partitioned rel. */ + if (!IS_PARTITIONED_REL(pathrel)) + break; + parent_relids = bms_add_members(parent_relids, pathrel->relids); + } + else + { + AppendRelInfo **appinfos; + int nappinfos, + i; + + appinfos = find_appinfos_by_relids(root, pathrel->relids, + &nappinfos); + for (i = 0; i < nappinfos; i++) + { + AppendRelInfo *appinfo = appinfos[i]; + + parent_relids = bms_add_member(parent_relids, + appinfo->parent_relid); + } + pfree(appinfos); + pathrel = pathrel->parent; + } + /* accept this level as an interesting parent */ + partrelids = bms_add_members(partrelids, parent_relids); + if (pathrel == parentrel) + break; /* don't traverse above parentrel */ + } while (IS_OTHER_REL(pathrel)); + + if (partrelids == NULL) + return allpartrelids; + + /* + * Append the 'partrelids' RT index bitmapset to 'allpartrelids' or + * merge the RT indexes into an appropriate bitmapset already present + * in the list + * + * Within 'allpartrelids', there is one Bitmapset for each topmost parent + * partitioned rel mentioned in the query whose children's subpaths have + * been passed to add_append_subpath_partrelids. Each Bitmapset contains + * the RT indexes of the topmost parent as well as its relevant non-leaf + * child partitions. Since (by construction of the rangetable list) parent + * partitions must have lower RT indexes than their children, we can + * distinguish the topmost parent as being the lowest set bit in the + * Bitmapset. + * + * Note that the list contains only RT indexes of partitioned tables that + * are parents of some scan-level relation appearing in the 'subpaths' that + * add_append_subpath_partrelids() is dealing with. Also, "topmost" + * parents are not allowed to be higher than the 'parentrel' associated + * with the append path. In this way, we avoid expending cycles on + * partitioned rels that can't contribute useful pruning information for + * the problem at hand. + * + * (It is possible for 'parentrel' to be a child partitioned table, and it + * is also possible for scan-level relations to be child partitioned tables + * rather than leaf partitions. Hence we must construct this relation set + * with reference to the particular append path we're dealing with, rather + * than looking at the full partitioning structure represented in the + * RelOptInfos.) + */ + + /* We can easily get the lowest set bit this way: */ + top_parent = bms_next_member(partrelids, -1); + Assert(top_parent > 0); + + /* Look for a matching topmost parent */ + foreach(lc, allpartrelids) + { + Bitmapset *currpartrelids = (Bitmapset *) lfirst(lc); + Index currtarget = bms_next_member(currpartrelids, -1); + + if (top_parent == currtarget) + { + /* Found a match, so add any new RT indexes to this hierarchy */ + currpartrelids = bms_add_members(currpartrelids, partrelids); + lfirst(lc) = currpartrelids; + return allpartrelids; + } + } + /* No match, so add the new partition hierarchy to the list */ + return lappend(allpartrelids, partrelids); +} diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 9a1a7faac7..2afc10c40b 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -137,7 +137,6 @@ typedef struct PruneStepResult } PruneStepResult; -static List *add_part_relids(List *allpartrelids, Bitmapset *partrelids); static List *make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, List *prunequal, @@ -215,33 +214,32 @@ static void partkey_datum_from_expr(PartitionPruneContext *context, * of scan paths for its child rels. * 'prunequal' is a list of potential pruning quals (i.e., restriction * clauses that are applicable to the appendrel). + * 'allpartrelids' contains Bitmapsets of RT indexes of partitioned parents + * whose partitions' Paths are in 'subpaths'; there's one Bitmapset for every + * partition tree involved. */ PartitionPruneInfo * make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, List *subpaths, - List *prunequal) + List *prunequal, + List *allpartrelids) { PartitionPruneInfo *pruneinfo; Bitmapset *allmatchedsubplans = NULL; - List *allpartrelids; List *prunerelinfos; int *relid_subplan_map; ListCell *lc; int i; + Assert(list_length(allpartrelids) > 0); + /* - * Scan the subpaths to see which ones are scans of partition child - * relations, and identify their parent partitioned rels. (Note: we must - * restrict the parent partitioned rels to be parentrel or children of - * parentrel, otherwise we couldn't translate prunequal to match.) - * - * Also construct a temporary array to map from partition-child-relation - * relid to the index in 'subpaths' of the scan plan for that partition. + * Construct a temporary array to map from partition-child-relation relid + * to the index in 'subpaths' of the scan plan for that partition. * (Use of "subplan" rather than "subpath" is a bit of a misnomer, but * we'll let it stand.) For convenience, we use 1-based indexes here, so * that zero can represent an un-filled array entry. */ - allpartrelids = NIL; relid_subplan_map = palloc0(sizeof(int) * root->simple_rel_array_size); i = 1; @@ -250,50 +248,9 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, Path *path = (Path *) lfirst(lc); RelOptInfo *pathrel = path->parent; - /* We don't consider partitioned joins here */ - if (pathrel->reloptkind == RELOPT_OTHER_MEMBER_REL) - { - RelOptInfo *prel = pathrel; - Bitmapset *partrelids = NULL; - - /* - * Traverse up to the pathrel's topmost partitioned parent, - * collecting parent relids as we go; but stop if we reach - * parentrel. (Normally, a pathrel's topmost partitioned parent - * is either parentrel or a UNION ALL appendrel child of - * parentrel. But when handling partitionwise joins of - * multi-level partitioning trees, we can see an append path whose - * parentrel is an intermediate partitioned table.) - */ - do - { - AppendRelInfo *appinfo; - - Assert(prel->relid < root->simple_rel_array_size); - appinfo = root->append_rel_array[prel->relid]; - prel = find_base_rel(root, appinfo->parent_relid); - if (!IS_PARTITIONED_REL(prel)) - break; /* reached a non-partitioned parent */ - /* accept this level as an interesting parent */ - partrelids = bms_add_member(partrelids, prel->relid); - if (prel == parentrel) - break; /* don't traverse above parentrel */ - } while (prel->reloptkind == RELOPT_OTHER_MEMBER_REL); - - if (partrelids) - { - /* - * Found some relevant parent partitions, which may or may not - * overlap with partition trees we already found. Add new - * information to the allpartrelids list. - */ - allpartrelids = add_part_relids(allpartrelids, partrelids); - /* Also record the subplan in relid_subplan_map[] */ - /* No duplicates please */ - Assert(relid_subplan_map[pathrel->relid] == 0); - relid_subplan_map[pathrel->relid] = i; - } - } + /* No duplicates please */ + Assert(relid_subplan_map[pathrel->relid] == 0); + relid_subplan_map[pathrel->relid] = i; i++; } @@ -359,63 +316,6 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, return pruneinfo; } -/* - * add_part_relids - * Add new info to a list of Bitmapsets of partitioned relids. - * - * Within 'allpartrelids', there is one Bitmapset for each topmost parent - * partitioned rel. Each Bitmapset contains the RT indexes of the topmost - * parent as well as its relevant non-leaf child partitions. Since (by - * construction of the rangetable list) parent partitions must have lower - * RT indexes than their children, we can distinguish the topmost parent - * as being the lowest set bit in the Bitmapset. - * - * 'partrelids' contains the RT indexes of a parent partitioned rel, and - * possibly some non-leaf children, that are newly identified as parents of - * some subpath rel passed to make_partition_pruneinfo(). These are added - * to an appropriate member of 'allpartrelids'. - * - * Note that the list contains only RT indexes of partitioned tables that - * are parents of some scan-level relation appearing in the 'subpaths' that - * make_partition_pruneinfo() is dealing with. Also, "topmost" parents are - * not allowed to be higher than the 'parentrel' associated with the append - * path. In this way, we avoid expending cycles on partitioned rels that - * can't contribute useful pruning information for the problem at hand. - * (It is possible for 'parentrel' to be a child partitioned table, and it - * is also possible for scan-level relations to be child partitioned tables - * rather than leaf partitions. Hence we must construct this relation set - * with reference to the particular append path we're dealing with, rather - * than looking at the full partitioning structure represented in the - * RelOptInfos.) - */ -static List * -add_part_relids(List *allpartrelids, Bitmapset *partrelids) -{ - Index targetpart; - ListCell *lc; - - /* We can easily get the lowest set bit this way: */ - targetpart = bms_next_member(partrelids, -1); - Assert(targetpart > 0); - - /* Look for a matching topmost parent */ - foreach(lc, allpartrelids) - { - Bitmapset *currpartrelids = (Bitmapset *) lfirst(lc); - Index currtarget = bms_next_member(currpartrelids, -1); - - if (targetpart == currtarget) - { - /* Found a match, so add any new RT indexes to this hierarchy */ - currpartrelids = bms_add_members(currpartrelids, partrelids); - lfirst(lc) = currpartrelids; - return allpartrelids; - } - } - /* No match, so add the new partition hierarchy to the list */ - return lappend(allpartrelids, partrelids); -} - /* * make_partitionedrel_pruneinfo * Build a List of PartitionedRelPruneInfos, one for each interesting diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 14ccfc1ac1..73c2a70028 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -128,6 +128,9 @@ typedef struct PlannerGlobal /* "flat" list of AppendRelInfos */ List *appendRelations; + /* "flat list of Bitmapsets of RT indexes "*/ + List *elidedAppendPartRels; + /* OIDs of relations the plan depends on */ List *relationOids; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 1aeeaec95e..634c1908ca 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -79,6 +79,11 @@ typedef struct PlannedStmt List *appendRelations; /* list of AppendRelInfo nodes */ + List *elidedAppendPartRels; /* list of Bitmapsets of RT indexes of + * partitioned tables from Append/ + * MergeAppend nodes that were elided + * in setrefs.c */ + List *subplans; /* Plan trees for SubPlan expressions; note * that some could be NULL */ @@ -269,6 +274,15 @@ typedef struct Append List *appendplans; int nasyncplans; /* # of asynchronous plans */ + /* + * List of bitmapsets containing RT indexes of all partitioned tables + * scanned by this Append, with one bitmapset for every partitioned + * table appearing in the query. Each bitmapset contains the RT indexes + * of all non-pruned non-leaf partitions in the tree with a given + * partitioned table as root. + */ + List *allpartrelids; + /* * All 'appendplans' preceding this index are non-partial plans. All * 'appendplans' from this index onwards are partial plans. @@ -293,6 +307,9 @@ typedef struct MergeAppend List *mergeplans; + /* See the description in Append's definition. */ + List *allpartrelids; + /* these fields are just like the sort-key info in struct Sort: */ /* number of sort-key columns */ diff --git a/src/include/optimizer/appendinfo.h b/src/include/optimizer/appendinfo.h index cc12c9c743..8e3d61c708 100644 --- a/src/include/optimizer/appendinfo.h +++ b/src/include/optimizer/appendinfo.h @@ -46,5 +46,8 @@ extern void add_row_identity_columns(PlannerInfo *root, Index rtindex, RangeTblEntry *target_rte, Relation target_relation); extern void distribute_row_identity_vars(PlannerInfo *root); +extern List *add_append_subpath_partrelids(PlannerInfo *root, Path *subpath, + RelOptInfo *parentrel, + List *allpartrelids); #endif /* APPENDINFO_H */ diff --git a/src/include/partitioning/partprune.h b/src/include/partitioning/partprune.h index bd490d154f..1587298812 100644 --- a/src/include/partitioning/partprune.h +++ b/src/include/partitioning/partprune.h @@ -73,7 +73,8 @@ typedef struct PartitionPruneContext extern PartitionPruneInfo *make_partition_pruneinfo(struct PlannerInfo *root, struct RelOptInfo *parentrel, List *subpaths, - List *prunequal); + List *prunequal, + List *allpartrelids); extern Bitmapset *prune_append_rel_partitions(struct RelOptInfo *rel); extern Bitmapset *get_matching_partitions(PartitionPruneContext *context, List *pruning_steps); -- 2.43.0