From 7b92e4fc46a1737ad1d7669a5b3dc52eed98e293 Mon Sep 17 00:00:00 2001 From: jian he Date: Tue, 9 Jun 2026 16:50:20 +0800 Subject: [PATCH v47 3/4] v47 refactor many functions in rpr.c In rpr.c, we have many newly constructed list: Each function built a new list and returned it, requiring the caller to assign the result back to pattern->children. Replace these with in-place variants that mutate the existing list directly using list_delete_nth_cell(), avoiding unnecessary list allocations: mergeConsecutiveVars -> mergeConsecutiveVars1 mergeConsecutiveGroups -> mergeConsecutiveGroups1 mergeConsecutiveAlts -> mergeConsecutiveAlts1 mergeGroupPrefixSuffix -> mergeGroupPrefixSuffix1 flattenAltChildren -> flattenAltChildren1 removeDuplicateAlts -> removeDuplicateAlternatives1 I left the original functions intact and tagged them with pg_attribute_unused() to help reviewers easily compare the two approaches. Also simplify optimizeGroupPattern to update lfirst(lc) in place rather than accumulating a separate newChildren list. In tryMultiplyQuantifiers, replace the int64 cast-and-compare overflow guard with pg_mul_s32_overflow(), consistent with how pg_add_s32_overflow() is used elsewhere in the same file. We can also optimize flattenSeqChildren using list_insert_nth, but list_insert_nth have comments like: /* * Insert the given datum at position 'pos' (measured from 0) in the list. * 'pos' must be valid, ie, 0 <= pos <= list's length. * * Note that this takes time proportional to the distance to the end of the * list, since the following entries must be moved. */ --- src/backend/optimizer/plan/rpr.c | 452 +++++++++++++++++++++++-- src/test/regress/expected/rpr_base.out | 36 ++ src/test/regress/sql/rpr_base.sql | 16 + 3 files changed, 478 insertions(+), 26 deletions(-) diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c index 664c942e57..3d9a4afe37 100644 --- a/src/backend/optimizer/plan/rpr.c +++ b/src/backend/optimizer/plan/rpr.c @@ -39,6 +39,7 @@ #include +#include "common/int.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" @@ -57,10 +58,18 @@ static List *mergeConsecutiveAlts(List *children); static List *mergeGroupPrefixSuffix(List *children); static RPRPatternNode *optimizeSeqPattern(RPRPatternNode *pattern); +static void mergeConsecutiveVars1(List *children); +static void mergeConsecutiveGroups1(List *children); +static void mergeConsecutiveAlts1(List **children); +static void mergeGroupPrefixSuffix1(List *children); + static List *flattenAltChildren(List *children); static List *removeDuplicateAlternatives(List *children); static RPRPatternNode *optimizeAltPattern(RPRPatternNode *pattern); +static void flattenAltChildren1(List **children); +static void removeDuplicateAlternatives1(List *children); + static RPRPatternNode *tryMultiplyQuantifiers(RPRPatternNode *pattern); static RPRPatternNode *tryUnwrapGroup(RPRPatternNode *pattern); static RPRPatternNode *optimizeGroupPattern(RPRPatternNode *pattern); @@ -214,6 +223,62 @@ flattenSeqChildren(List *children) return newChildren; } +static void +mergeConsecutiveVars1(List *children) +{ + for (int outerpos = 0; outerpos < list_length(children); outerpos++) + { + RPRPatternNode *rprpattern = list_nth_node(RPRPatternNode, children, outerpos); + + if (rprpattern->nodeType != RPR_PATTERN_VAR || + rprpattern->reluctant) + continue; + + for (int restpos = outerpos + 1; restpos < list_length(children);) + { + RPRPatternNode *other; + int newmin; + int newmax = 0; + + other = list_nth_node(RPRPatternNode, children, restpos); + + if (other->nodeType != RPR_PATTERN_VAR) + break; + + if (strcmp(rprpattern->varName, other->varName) != 0) + break; + + if (rprpattern->max == RPR_QUANTITY_INF || + other->max == RPR_QUANTITY_INF) + newmax = RPR_QUANTITY_INF; + + if (pg_add_s32_overflow(rprpattern->min, other->min, &newmin)) + break; + + if (newmax != RPR_QUANTITY_INF && + pg_add_s32_overflow(rprpattern->max, other->max, &newmax)) + break; + + if (newmin < RPR_QUANTITY_INF && + newmax <= RPR_QUANTITY_INF) + { + rprpattern->min = newmin; + rprpattern->max = newmax; + + /* + * Allow combining a non-reluctant preceding VAR with a + * reluctant current VAR. The resulting combination takes its + * reluctance property from the current VAR. + */ + rprpattern->reluctant = other->reluctant; + + children = list_delete_nth_cell(children, restpos); + } + else + break; + } + } +} /* * mergeConsecutiveVars @@ -224,6 +289,7 @@ flattenSeqChildren(List *children) * * Only merges non-reluctant VAR nodes with the same variable name. */ +pg_attribute_unused() static List * mergeConsecutiveVars(List *children) { @@ -289,6 +355,63 @@ mergeConsecutiveVars(List *children) return mergedChildren; } +static void +mergeConsecutiveGroups1(List *children) +{ + for (int outerpos = 0; outerpos < list_length(children); outerpos++) + { + RPRPatternNode *rprpattern = list_nth_node(RPRPatternNode, children, outerpos); + + if (rprpattern->nodeType != RPR_PATTERN_GROUP || + rprpattern->reluctant) + continue;; + + for (int restpos = outerpos + 1; restpos < list_length(children);) + { + RPRPatternNode *other; + int newmin; + int newmax = 0; + + other = list_nth_node(RPRPatternNode, children, restpos); + + if (other->nodeType != RPR_PATTERN_GROUP) + break; + + if (!rprPatternChildrenEqual(rprpattern->children, other->children)) + break; + + if (rprpattern->max == RPR_QUANTITY_INF || + other->max == RPR_QUANTITY_INF) + newmax = RPR_QUANTITY_INF; + + if (pg_add_s32_overflow(rprpattern->min, other->min, &newmin)) + break; + + if (newmax != RPR_QUANTITY_INF && + pg_add_s32_overflow(rprpattern->max, other->max, &newmax)) + break; + + if (newmin < RPR_QUANTITY_INF && + newmax <= RPR_QUANTITY_INF) + { + rprpattern->min = newmin; + rprpattern->max = newmax; + + /* + * Allow combining a non-reluctant preceding GROUP with a + * reluctant current GROUP. The resulting combination takes + * its reluctance property from the current GROUP. + */ + rprpattern->reluctant = other->reluctant; + + children = list_delete_nth_cell(children, restpos); + } + else + break; + } + } +} + /* * mergeConsecutiveGroups * Merge consecutive identical GROUP nodes. @@ -298,6 +421,7 @@ mergeConsecutiveVars(List *children) * * Only merges non-reluctant GROUP nodes with identical children. */ +pg_attribute_unused() static List * mergeConsecutiveGroups(List *children) { @@ -362,6 +486,56 @@ mergeConsecutiveGroups(List *children) return mergedChildren; } +static void +mergeConsecutiveAlts1(List **children) +{ + int count = 1; + + for (int outerpos = 0; outerpos < list_length(*children); outerpos++) + { + RPRPatternNode *rprpattern; + + rprpattern = list_nth_node(RPRPatternNode, *children, outerpos); + + if (rprpattern->nodeType != RPR_PATTERN_ALT || + rprpattern->reluctant) + continue; + + for (int restpos = outerpos + 1; restpos < list_length(*children);) + { + RPRPatternNode *other; + + other = list_nth_node(RPRPatternNode, *children, restpos); + + if (other->nodeType != RPR_PATTERN_ALT || other->reluctant) + break; + + if (!rprPatternChildrenEqual(rprpattern->children, + other->children)) + break; + + *children = list_delete_nth_cell(*children, restpos); + count++; + } + + if (count > 1) + { + /* Wrap it into GROUP {count, count}(ALT) */ + RPRPatternNode *group = makeNode(RPRPatternNode); + + group->nodeType = RPR_PATTERN_GROUP; + group->min = count; + group->max = count; + group->reluctant = false; + group->reluctant_location = -1; + group->location = -1; + group->children = list_make1(copyObject(rprpattern)); + *children = list_delete_nth_cell(*children, outerpos); + *children = lcons(group, *children); + } + count = 1; /* reset */ + } +} /* * mergeConsecutiveAlts @@ -374,6 +548,8 @@ mergeConsecutiveGroups(List *children) * in the SEQ. This step detects consecutive identical ALT nodes and wraps * them in a GROUP with the appropriate quantifier. */ +pg_attribute_unused() + static List * mergeConsecutiveAlts(List *children) { @@ -489,6 +665,7 @@ mergeConsecutiveAlts(List *children) * (A B)+ A B -> (A B){2,} * A B (A B)+ A B -> (A B){3,} */ +pg_attribute_unused() static List * mergeGroupPrefixSuffix(List *children) { @@ -644,6 +821,167 @@ mergeGroupPrefixSuffix(List *children) return result; } +static void +mergeGroupPrefixSuffix1(List *children) +{ + int group_len; + List *prefixElements = NIL; + List *groupContent = NIL; + List *suffixElements = NIL; + + for (int outerpos = 0; outerpos < list_length(children); outerpos++) + { + RPRPatternNode *rprpattern = list_nth_node(RPRPatternNode, children, + outerpos); + + if (rprpattern->nodeType != RPR_PATTERN_GROUP || + rprpattern->reluctant) + continue; + + groupContent = rprpattern->children; + group_len = list_length(rprpattern->children); + + /* + * If GROUP has single SEQ child, compare with SEQ's children. e.g., + * (A B)+ internally contains sequence A B; compare against that. + */ + if (group_len == 1) + { + RPRPatternNode *inner = (RPRPatternNode *) linitial(rprpattern->children); + + if (inner->nodeType == RPR_PATTERN_SEQ) + { + group_len = list_length(inner->children); + groupContent = inner->children; + } + } + + if (group_len > outerpos) + continue; + + /* + * PATTERN (A B A B (A B)+) AS val > 50); + * + * PATTERN (A B C D (C D)+) + */ + for (int restpos = outerpos % group_len; + restpos + group_len < list_length(children);) + { + int i; + int newmax; + int newmin; + bool matched; + + for (i = restpos; i < restpos + group_len; i++) + prefixElements = lappend(prefixElements, + list_nth(children, i)); + + Assert(list_length(prefixElements) == group_len); + + matched = rprPatternChildrenEqual(prefixElements, groupContent); + list_free(prefixElements); + prefixElements = NIL; + + if (!matched) + { + restpos = restpos + group_len; + continue; + } + + if (pg_add_s32_overflow(rprpattern->min, 1, &newmin)) + { + restpos = restpos + group_len; + continue; + } + + if (rprpattern->max == RPR_QUANTITY_INF) + newmax = RPR_QUANTITY_INF; + else if (pg_add_s32_overflow(rprpattern->max, 1, &newmax)) + { + restpos = restpos + group_len; + continue; + } + + rprpattern->max = newmax; + rprpattern->min = newmin; + + for (; i > restpos; i--) + children = list_delete_nth_cell(children, i - 1); + + /* + * No need advance restpos because we in-place delete list + * element, we need begin in the same position + */ + } + } + + for (int outerpos = 0; outerpos < list_length(children); outerpos++) + { + RPRPatternNode *rprpattern = list_nth_node(RPRPatternNode, children, + outerpos); + + if (rprpattern->nodeType != RPR_PATTERN_GROUP || + rprpattern->reluctant) + continue; + + group_len = list_length(rprpattern->children); + groupContent = rprpattern->children; + + /* + * If GROUP has single SEQ child, compare with SEQ's children. e.g., + * (A B)+ internally contains sequence A B; compare against that. + */ + if (group_len == 1) + { + RPRPatternNode *inner = (RPRPatternNode *) linitial(rprpattern->children); + + if (inner->nodeType == RPR_PATTERN_SEQ) + { + group_len = list_length(inner->children); + groupContent = inner->children; + } + } + + if (outerpos + group_len >= list_length(children)) + continue; + + for (int restpos = outerpos + 1; restpos + group_len <= list_length(children);) + { + int newmax; + int newmin; + bool matched; + + for (int j = restpos; j < restpos + group_len; j++) + suffixElements = lappend(suffixElements, + list_nth(children, j)); + + Assert(list_length(suffixElements) == group_len); + + matched = rprPatternChildrenEqual(suffixElements, groupContent); + list_free(suffixElements); + suffixElements = NIL; + + if (!matched) + break; + + if (pg_add_s32_overflow(rprpattern->min, 1, &newmin)) + break; + + if (rprpattern->max == RPR_QUANTITY_INF) + newmax = RPR_QUANTITY_INF; + else if (pg_add_s32_overflow(rprpattern->max, 1, &newmax)) + break; + + rprpattern->max = newmax; + rprpattern->min = newmin; + + for (int j = restpos + group_len - 1; j >= restpos; j--) + children = list_delete_nth_cell(children, j); + } + } +} + + /* * optimizeSeqPattern * Optimize SEQ pattern node. @@ -663,16 +1001,18 @@ optimizeSeqPattern(RPRPatternNode *pattern) pattern->children = flattenSeqChildren(pattern->children); /* Merge consecutive identical VAR nodes */ - pattern->children = mergeConsecutiveVars(pattern->children); + /* pattern->children = mergeConsecutiveVars(pattern->children); */ + mergeConsecutiveVars1(pattern->children); /* Merge consecutive identical GROUP nodes */ - pattern->children = mergeConsecutiveGroups(pattern->children); + mergeConsecutiveGroups1(pattern->children); /* Merge consecutive identical ALT nodes into GROUP */ - pattern->children = mergeConsecutiveAlts(pattern->children); + mergeConsecutiveAlts1(&pattern->children); /* Merge prefix/suffix into GROUP with matching children */ - pattern->children = mergeGroupPrefixSuffix(pattern->children); + /* pattern->children = mergeGroupPrefixSuffix(pattern->children); */ + mergeGroupPrefixSuffix1(pattern->children); /* Unwrap single-item SEQ */ return tryUnwrapSingleChild(pattern); @@ -688,6 +1028,7 @@ optimizeSeqPattern(RPRPatternNode *pattern) * Returns a new list with optimized children, with nested ALT children * flattened into the parent list. */ +pg_attribute_unused() static List * flattenAltChildren(List *children) { @@ -705,6 +1046,41 @@ flattenAltChildren(List *children) return newChildren; } +static void +flattenAltChildren1(List **children) +{ + for (int outerpos = 0; outerpos < list_length(*children);) + { + ListCell *lc = list_nth_cell(*children, outerpos); + + RPRPatternNode *rprpattern = list_nth_node(RPRPatternNode, *children, + outerpos); + + RPRPatternNode *optimized = optimizeRPRPattern(rprpattern); + + if (optimized->nodeType == RPR_PATTERN_ALT) + { + ListCell *lc1; + List *optimized_list; + + optimized_list = list_copy(optimized->children); + + *children = list_delete_nth_cell(*children, outerpos); + + foreach(lc1, optimized_list) + { + *children = lappend(*children, lfirst(lc1)); + } + + outerpos = outerpos + list_length(optimized->children); + } + else + { + lfirst(lc) = optimized; + outerpos++; + } + } +} /* * removeDuplicateAlternatives @@ -716,6 +1092,7 @@ flattenAltChildren(List *children) * * Returns a new list with only unique children (first occurrence kept). */ +pg_attribute_unused() static List * removeDuplicateAlternatives(List *children) { @@ -741,6 +1118,29 @@ removeDuplicateAlternatives(List *children) return uniqueChildren; } +static void +removeDuplicateAlternatives1(List *children) +{ + for (int outerpos = 0; outerpos < list_length(children); outerpos++) + { + RPRPatternNode *rprpattern; + + rprpattern = list_nth_node(RPRPatternNode, children, outerpos); + + for (int restpos = outerpos + 1; restpos < list_length(children);) + { + RPRPatternNode *other; + + other = list_nth_node(RPRPatternNode, children, restpos); + + if (rprPatternEqual(rprpattern, other)) + children = list_delete_nth_cell(children, restpos); + else + restpos++; + } + } +} + /* * optimizeAltPattern * Optimize ALT pattern node. @@ -754,10 +1154,11 @@ static RPRPatternNode * optimizeAltPattern(RPRPatternNode *pattern) { /* Recursively optimize children and flatten nested ALT */ - pattern->children = flattenAltChildren(pattern->children); + /* pattern->children = flattenAltChildren(pattern->children); */ + flattenAltChildren1(&pattern->children); /* Remove duplicate alternatives */ - pattern->children = removeDuplicateAlternatives(pattern->children); + removeDuplicateAlternatives1(pattern->children); /* Unwrap single-item ALT */ return tryUnwrapSingleChild(pattern); @@ -793,8 +1194,8 @@ tryMultiplyQuantifiers(RPRPatternNode *pattern) { RPRPatternNode *child; bool safe; - int64 new_min_64; - int64 new_max_64; + int32 newmin; + int32 newmax; /* Parser always creates GROUP with exactly one child */ Assert(list_length(pattern->children) == 1); @@ -846,22 +1247,19 @@ tryMultiplyQuantifiers(RPRPatternNode *pattern) if (!safe) return pattern; - /* Flatten the child quantifier, guarding against overflow. */ - new_min_64 = (int64) pattern->min * child->min; - if (new_min_64 >= RPR_QUANTITY_INF) - return pattern; /* overflow, skip optimization */ + /* + * Flatten the child quantifier, guarding against overflow. + */ + if (pg_mul_s32_overflow(pattern->min, child->min, &newmin)) + return pattern; if (pattern->max == RPR_QUANTITY_INF || child->max == RPR_QUANTITY_INF) - new_max_64 = RPR_QUANTITY_INF; - else - { - new_max_64 = (int64) pattern->max * child->max; - if (new_max_64 >= RPR_QUANTITY_INF) - return pattern; - } + newmax = RPR_QUANTITY_INF; + else if (pg_mul_s32_overflow(pattern->max, child->max, &newmax)) + return pattern; - child->min = (int) new_min_64; - child->max = (int) new_max_64; + child->min = newmin; + child->max = newmax; return child; } @@ -924,16 +1322,18 @@ tryUnwrapGroup(RPRPatternNode *pattern) static RPRPatternNode * optimizeGroupPattern(RPRPatternNode *pattern) { - List *newChildren; + ListCell *lc; RPRPatternNode *result; /* Recursively optimize children */ - newChildren = NIL; - foreach_node(RPRPatternNode, child, pattern->children) + foreach(lc, pattern->children) { - newChildren = lappend(newChildren, optimizeRPRPattern(child)); + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + RPRPatternNode *optimized = optimizeRPRPattern(child); + + lfirst(lc) = optimized; } - pattern->children = newChildren; /* Try quantifier multiplication */ result = tryMultiplyQuantifiers(pattern); diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out index 099ed6e644..428ae0ca6a 100644 --- a/src/test/regress/expected/rpr_base.out +++ b/src/test/regress/expected/rpr_base.out @@ -4497,6 +4497,25 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (7 rows) +-- cannot merge, prefix is different +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +PATTERN (A B C D C (C D) +) +DEFINE A AS val <= 25, B AS val > 25, + C AS val > 50, D AS val > 75); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c d c (c d)+ + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + -- PREFIX merge with quantifiers: A B* (A B*)+ -> (a b*){2,} EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -4802,6 +4821,23 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (7 rows) +-- PREFIX+SUFFIX merge (5-way): B A B (A B)+ A B A B -> b (a b){4,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B A B (A B)+ A B A B) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: b (a b){4,} + Nav Mark Lookback: 0 + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(7 rows) + -- Unwrap single-item ALT after dedup: (A | A)+ -> a+ -- ALT dedup reduces to single-item, then GROUP unwrap EXPLAIN (COSTS OFF) diff --git a/src/test/regress/sql/rpr_base.sql b/src/test/regress/sql/rpr_base.sql index e324d77da6..d26e2ec706 100644 --- a/src/test/regress/sql/rpr_base.sql +++ b/src/test/regress/sql/rpr_base.sql @@ -2773,6 +2773,15 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING DEFINE A AS val <= 25, B AS val > 25 AND val <= 50, C AS val > 50 AND val <= 75, D AS val > 75); +-- cannot merge, prefix is different +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w +FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +PATTERN (A B C D C (C D) +) +DEFINE A AS val <= 25, B AS val > 25, + C AS val > 50, D AS val > 75); + -- PREFIX merge with quantifiers: A B* (A B*)+ -> (a b*){2,} EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -2895,6 +2904,13 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A B A B (A B)+ A B A B) DEFINE A AS val <= 50, B AS val > 50); +-- PREFIX+SUFFIX merge (5-way): B A B (A B)+ A B A B -> b (a b){4,} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (B A B (A B)+ A B A B) + DEFINE A AS val <= 50, B AS val > 50); + -- Unwrap single-item ALT after dedup: (A | A)+ -> a+ -- ALT dedup reduces to single-item, then GROUP unwrap EXPLAIN (COSTS OFF) -- 2.34.1