From 426451992a4a865203f4fed65a5d38839bc1402b Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Sun, 7 Jun 2026 19:38:16 +0900 Subject: [PATCH 57/68] Fix shortest match for reluctant nullable quantifiers in row pattern recognition When a reluctant outer quantifier wrapped a nullable reluctant body, such as (A??)+?, the match consumed rows instead of producing the required shortest (empty) match. nfa_advance_end decides a group's repeat-or-exit by comparing count with the quantifier's min and max. The count < min branch always routed the loop-back (real match) before the fast-forward exit and never suppressed it, so a longer match could replace the shortest one. The sibling min <= count < max branch already handles this correctly for reluctant groups, leaving the two branches asymmetric. Split the count < min branch into reluctant and greedy cases, mirroring the sibling: a reluctant group takes the fast-forward exit first and, if it reaches FIN, frees the loop-back state so a longer match cannot replace the shortest one. Greedy and non-nullable groups keep the existing loop-first behaviour. --- src/backend/executor/execRPR.c | 72 +++++++++++++++++---------- src/test/regress/expected/rpr_nfa.out | 33 ++++++++++++ src/test/regress/sql/rpr_nfa.sql | 26 ++++++++++ 3 files changed, 106 insertions(+), 25 deletions(-) diff --git a/src/backend/executor/execRPR.c b/src/backend/executor/execRPR.c index 69e3603adef..b7e3c4a8274 100644 --- a/src/backend/executor/execRPR.c +++ b/src/backend/executor/execRPR.c @@ -1139,43 +1139,32 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, { RPRPatternElement *jumpElem; RPRNFAState *ffState = NULL; + RPRPatternElement *nextElem = NULL; /*---------- - * Two paths are explored in parallel when the group body is nullable + * Two paths are explored when the group body is nullable * (RPR_ELEM_EMPTY_LOOP): * - * 1. Primary path: loop back and attempt real matches in the - * next iteration (state, modified below). + * 1. Loop-back path: attempt real matches in the next iteration + * (state, modified below). * - * 2. Fast-forward path: skip directly to after the group, - * treating all remaining required iterations as empty - * matches (ffState, handled after the primary path). + * 2. Fast-forward path: skip directly to after the group, treating + * all remaining required iterations as empty matches (ffState). + * Route to elem->next (not nfa_advance_end) to avoid creating + * competing greedy/reluctant loop states. * - * The snapshot must be taken BEFORE modifying state for the loop-back, - * since both paths diverge from the same point. + * Greedy prefers the loop-back first (more iterations); reluctant + * prefers the fast-forward (exit) first and, if it reaches FIN, drops + * the loop-back so a longer match cannot replace the shortest one -- + * mirroring the min<=countelemIdx, state->counts, state->isAbsorbable); - /* Primary path: loop back for real matches */ - state->elemIdx = elem->jump; - jumpElem = &elements[state->elemIdx]; - nfa_route_to_elem(winstate, ctx, state, jumpElem, - currentPos); - - /* - * Fast-forward path for nullable bodies. E.g. (A?){2,3} when A - * doesn't match: the primary loop-back produces empty iterations that - * cycle detection would kill. Instead, exit directly with count - * satisfied. Route to elem->next (not nfa_advance_end) to avoid - * creating competing greedy/reluctant loop states. - */ - if (ffState != NULL) - { - RPRPatternElement *nextElem; - /* Exit the group: clear its own count (count-clear policy) */ ffState->counts[depth] = 0; ffState->elemIdx = elem->next; @@ -1192,9 +1181,42 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, if (RPRElemIsEnd(nextElem) && ffState->counts[nextElem->depth] < RPR_COUNT_MAX) ffState->counts[nextElem->depth]++; + } + + /* Prepare the loop-back state */ + state->elemIdx = elem->jump; + jumpElem = &elements[state->elemIdx]; + if (ffState != NULL && RPRElemIsReluctant(elem)) + { + RPRNFAState *savedMatch = ctx->matchedState; + + /* Reluctant: take the fast-forward (exit) first */ nfa_route_to_elem(winstate, ctx, ffState, nextElem, currentPos); + + /* + * If the exit reached FIN, the shortest match is found. Skip the + * loop-back to prevent longer matches from replacing it. + */ + if (ctx->matchedState != savedMatch) + { + nfa_state_free(winstate, state); + return; + } + + /* Loop-back second */ + nfa_route_to_elem(winstate, ctx, state, jumpElem, + currentPos); + } + else + { + /* Greedy (or non-nullable): loop-back first, fast-forward second */ + nfa_route_to_elem(winstate, ctx, state, jumpElem, + currentPos); + if (ffState != NULL) + nfa_route_to_elem(winstate, ctx, ffState, nextElem, + currentPos); } } else if (elem->max != RPR_QUANTITY_INF && count >= elem->max) diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out index 02a5e517b0e..3b9975a83df 100644 --- a/src/test/regress/expected/rpr_nfa.out +++ b/src/test/regress/expected/rpr_nfa.out @@ -1255,6 +1255,39 @@ WINDOW w AS ( 3 | {C} | | (3 rows) +-- Reluctant outer quantifier over a nullable reluctant body: SQL/RPR +-- semantics call for the shortest (empty) match. The count=2 boundary and single-quantifier controls localize the behaviour: only +-- the all-reluctant case (rr) should differ. +WITH t(id, isa) AS (VALUES (1, true), (2, true), (3, true), (4, false)) +SELECT id, + count(*) OVER gg AS gg, -- (A?)+ greedy / greedy + count(*) OVER gr AS gr, -- (A??)+ greedy / reluctant + count(*) OVER rg AS rg, -- (A?)+? reluctant / greedy + count(*) OVER rr AS rr, -- (A??)+? reluctant / reluctant + count(*) OVER rr2 AS rr2, -- (A??){2,}? reluctant, min>=2 boundary + count(*) OVER ca AS ca, -- A?? single reluctant control + count(*) OVER cs AS cs -- A*? single reluctant control +FROM t +WINDOW gg AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A?)+) DEFINE A AS isa), + gr AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A??)+) DEFINE A AS isa), + rg AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A?)+?) DEFINE A AS isa), + rr AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A??)+?) DEFINE A AS isa), + rr2 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A??){2,}?) DEFINE A AS isa), + ca AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A??) DEFINE A AS isa), + cs AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A*?) DEFINE A AS isa) +ORDER BY id; + id | gg | gr | rg | rr | rr2 | ca | cs +----+----+----+----+----+-----+----+---- + 1 | 3 | 3 | 1 | 0 | 0 | 0 | 0 + 2 | 0 | 0 | 1 | 0 | 0 | 0 | 0 + 3 | 0 | 0 | 1 | 0 | 0 | 0 | 0 + 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +(4 rows) + -- Non-leading reluctant optional GROUP with a follower: (B (A X)?? C) -- Like the VAR case above but a multi-element group; it goes through the -- begin path (nfa_advance_begin), which already honors reluctant ordering. diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql index 213385f143b..61072d1d6f1 100644 --- a/src/test/regress/sql/rpr_nfa.sql +++ b/src/test/regress/sql/rpr_nfa.sql @@ -887,6 +887,32 @@ WINDOW w AS ( C AS 'C' = ANY(flags) ); +-- Reluctant outer quantifier over a nullable reluctant body: SQL/RPR +-- semantics call for the shortest (empty) match. The count=2 boundary and single-quantifier controls localize the behaviour: only +-- the all-reluctant case (rr) should differ. +WITH t(id, isa) AS (VALUES (1, true), (2, true), (3, true), (4, false)) +SELECT id, + count(*) OVER gg AS gg, -- (A?)+ greedy / greedy + count(*) OVER gr AS gr, -- (A??)+ greedy / reluctant + count(*) OVER rg AS rg, -- (A?)+? reluctant / greedy + count(*) OVER rr AS rr, -- (A??)+? reluctant / reluctant + count(*) OVER rr2 AS rr2, -- (A??){2,}? reluctant, min>=2 boundary + count(*) OVER ca AS ca, -- A?? single reluctant control + count(*) OVER cs AS cs -- A*? single reluctant control +FROM t +WINDOW gg AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A?)+) DEFINE A AS isa), + gr AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A??)+) DEFINE A AS isa), + rg AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A?)+?) DEFINE A AS isa), + rr AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A??)+?) DEFINE A AS isa), + rr2 AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A??){2,}?) DEFINE A AS isa), + ca AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A??) DEFINE A AS isa), + cs AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A*?) DEFINE A AS isa) +ORDER BY id; + -- Non-leading reluctant optional GROUP with a follower: (B (A X)?? C) -- Like the VAR case above but a multi-element group; it goes through the -- begin path (nfa_advance_begin), which already honors reluctant ordering. -- 2.50.1 (Apple Git-155)