From 176f477a820f40ab31cb34fcf4493de60b004579 Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Mon, 1 Jun 2026 21:36:38 +0900 Subject: [PATCH 37/68] Fix count slot leak in row pattern recognition absorption When a leaf variable in an absorbable group reaches its maximum count, the match-phase inline advance walks it through the enclosing END elements without clearing the variable's own counts[] slot, unlike the regular exit in nfa_advance_var. Sibling elements at the same depth share a slot, so the stale count leaked into a following sibling group and tripped the max-count Assert -- or, in a non-assert build, produced a wrong match count. PATTERN ((A B)+ (C D)+) is the smallest reproducer. Clear the leaf variable's count on the inline exit. Assert the resulting contract -- each element zeroes its own slot on exit, so a variable or group is entered with a zero count -- at the entry points, and drop the now-redundant loop-back body reset. Per a report from a static analysis tool. --- src/backend/executor/execRPR.c | 55 +++++++++++++++++++++------ src/test/regress/expected/rpr_nfa.out | 42 ++++++++++++++++++++ src/test/regress/sql/rpr_nfa.sql | 31 +++++++++++++++ 3 files changed, 117 insertions(+), 11 deletions(-) diff --git a/src/backend/executor/execRPR.c b/src/backend/executor/execRPR.c index 16a0f4ae375..9e45920de9c 100644 --- a/src/backend/executor/execRPR.c +++ b/src/backend/executor/execRPR.c @@ -315,8 +315,9 @@ nfa_states_equal(WindowAggState *winstate, RPRNFAState *s1, RPRNFAState *s2) * The +1 is the slot arithmetic: comparing through depth N requires * counts[0..N], i.e., N+1 entries. Deeper slots (counts[d] with d > * elem->depth) are excluded because they hold scratch state from inner - * groups that gets zeroed on re-entry (see END loop-back in - * nfa_advance_end), and so must not participate in equivalence judgment. + * groups. Per the count-clear policy such a slot is zeroed when its + * owning element exits (see nfa_advance_var and the inline fast path in + * nfa_match), so it must not participate in equivalence judgment. */ elem = &pattern->elements[s1->elemIdx]; compareDepth = elem->depth + 1; @@ -858,6 +859,16 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched) state->elemIdx = elem->next; state->counts[endDepth] = endCount; + /* + * Leaf VAR exited (reached max): clear its own count so + * the next occupant enters with zero, as nfa_advance_var + * does on exit (this inline path replaces that exit). + * depth > endDepth, so this leaves the group count just + * written intact. + */ + Assert(endDepth < depth); + state->counts[depth] = 0; + /* * Chain through END elements within the absorbable region * (ABSORBABLE_BRANCH) until reaching the judgment point @@ -873,7 +884,13 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched) int outerDepth = outerEnd->depth; int32 outerCount = state->counts[outerDepth]; - /* Reset exited group's count */ + /* + * Exit this intermediate group: clear its own count + * (count-clear policy). It sits below the absorbable + * judgment point, so it is excluded from the + * dominance comparison; the judgment point where the + * chain stops keeps its count. + */ state->counts[endDepth] = 0; /* Increment outer group count */ @@ -925,6 +942,15 @@ nfa_route_to_elem(WindowAggState *winstate, RPRNFAContext *ctx, { RPRNFAState *skipState = NULL; + /* + * Entry-side check of the count-clear policy: a VAR is always routed + * to with a clean slot. Each element zeroes its own count on exit, + * so a nonzero count here would be a leak from an earlier element + * (see nfa_advance_var / nfa_advance_end exit handling and the inline + * fast path in nfa_match). + */ + Assert(state->counts[nextElem->depth] == 0); + /* Create skip state before add_unique, which may free state */ if (RPRElemCanSkip(nextElem)) skipState = nfa_state_clone(winstate, nextElem->next, @@ -989,9 +1015,10 @@ nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, * nfa_advance_begin * * Handle BEGIN element: group entry logic. - * BEGIN is only visited at initial group entry (count is always 0). + * BEGIN is only visited at initial group entry; loop-back from END goes + * directly to first child, bypassing BEGIN. Per the count-clear policy the + * group's own count slot is therefore already zero on entry (asserted below). * If min=0, creates a skip path past the group. - * Loop-back from END goes directly to first child, bypassing BEGIN. */ static void nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, @@ -1002,7 +1029,12 @@ nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, RPRPatternElement *elements = pattern->elements; RPRNFAState *skipState = NULL; - state->counts[elem->depth] = 0; + /* + * Entry-side check of the count-clear policy: the group's own count slot + * is already zero here. BEGIN is only visited at initial group entry, + * and the previous occupant of this depth slot cleared it on exit. + */ + Assert(state->counts[elem->depth] == 0); /* Optional group: create skip path (but don't route yet) */ if (elem->min == 0) @@ -1094,8 +1126,6 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, state->counts, state->isAbsorbable); /* Primary path: loop back for real matches */ - for (int d = depth + 1; d < pattern->maxDepth; d++) - state->counts[d] = 0; state->elemIdx = elem->jump; jumpElem = &elements[state->elemIdx]; nfa_route_to_elem(winstate, ctx, state, jumpElem, @@ -1112,6 +1142,7 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, { RPRPatternElement *nextElem; + /* Exit the group: clear its own count (count-clear policy) */ ffState->counts[depth] = 0; ffState->elemIdx = elem->next; nextElem = &elements[ffState->elemIdx]; @@ -1130,6 +1161,7 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, /* Must exit: reached max iterations. */ RPRPatternElement *nextElem; + /* Exit: clear the group's own count (count-clear policy) */ state->counts[depth] = 0; state->elemIdx = elem->next; nextElem = &elements[state->elemIdx]; @@ -1161,6 +1193,7 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, */ exitState = nfa_state_clone(winstate, elem->next, state->counts, state->isAbsorbable); + /* Exit branch: clear the group's own count (count-clear policy) */ exitState->counts[depth] = 0; nextElem = &elements[exitState->elemIdx]; @@ -1169,8 +1202,6 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, exitState->counts[nextElem->depth]++; /* Prepare loop state */ - for (int d = depth + 1; d < pattern->maxDepth; d++) - state->counts[d] = 0; state->elemIdx = elem->jump; jumpElem = &elements[state->elemIdx]; @@ -1253,6 +1284,7 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, /* Clone for exit, original stays for loop */ cloneState = nfa_state_clone(winstate, elem->next, state->counts, state->isAbsorbable); + /* Exit: clear the VAR's own count (count-clear policy) */ cloneState->counts[depth] = 0; nextElem = &elements[cloneState->elemIdx]; @@ -1289,7 +1321,7 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, /* Loop first (preferred for greedy) */ nfa_add_state_unique(winstate, ctx, cloneState); - /* Exit second */ + /* Exit second: clear the VAR's own count (count-clear policy) */ state->counts[depth] = 0; state->elemIdx = elem->next; nextElem = &elements[state->elemIdx]; @@ -1326,6 +1358,7 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, /* Exit only: advance to next element */ RPRPatternElement *nextElem; + /* Exit: clear the VAR's own count (count-clear policy) */ state->counts[depth] = 0; state->elemIdx = elem->next; nextElem = &elements[state->elemIdx]; diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out index 59b91ff9aa4..2a0d4f11e74 100644 --- a/src/test/regress/expected/rpr_nfa.out +++ b/src/test/regress/expected/rpr_nfa.out @@ -447,6 +447,48 @@ WINDOW w AS ( 7 | {X} | | (7 rows) +-- Two consecutive unbounded groups: (A B)+ (C D)+ +-- The leading group (A B)+ is absorbable (unbounded multi-element); (C D)+ is +-- a distinct sibling group that does not merge with it. When the leading group +-- exits into the sibling, its body leaf-VAR count must be cleared so it does +-- not leak into the sibling's shared depth slot. +WITH test_absorb_two_groups AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['C']), + (8, ARRAY['D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_two_groups +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ (C D)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {A} | 5 | 8 + 6 | {B} | | + 7 | {C} | | + 8 | {D} | | +(8 rows) + -- Fixed-length group absorption: (A B{2})+ C -- B{2} has min == max, equivalent to unrolling to (A B B)+ C WITH test_absorb_fixedlen AS ( diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql index febf834565d..6362c69f385 100644 --- a/src/test/regress/sql/rpr_nfa.sql +++ b/src/test/regress/sql/rpr_nfa.sql @@ -346,6 +346,37 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- Two consecutive unbounded groups: (A B)+ (C D)+ +-- The leading group (A B)+ is absorbable (unbounded multi-element); (C D)+ is +-- a distinct sibling group that does not merge with it. When the leading group +-- exits into the sibling, its body leaf-VAR count must be cleared so it does +-- not leak into the sibling's shared depth slot. +WITH test_absorb_two_groups AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['C']), + (8, ARRAY['D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_two_groups +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ (C D)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + -- Fixed-length group absorption: (A B{2})+ C -- B{2} has min == max, equivalent to unrolling to (A B B)+ C WITH test_absorb_fixedlen AS ( -- 2.50.1 (Apple Git-155)