From 6bfd39fc7cf8b758a3dd910d89c0893d57425407 Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Mon, 1 Jun 2026 16:35:45 +0900 Subject: [PATCH 35/68] Fix unsafe (A{n,})* quantifier flattening in row pattern recognition The Phase-1 pattern optimizer's tryMultiplyQuantifiers() collapsed a nested quantifier (A{k,})* to A* whenever both the inner and outer bounds were unbounded. That is only correct when the inner minimum is 0 or 1. For k >= 2 the reachable repetition counts are {0} together with [k, INF), so the counts 1..k-1 are unreachable; A* admits them. As a result an isolated single A matched as length 1 where the pattern must instead produce an empty match (length 0), widening the matching language. Skip the multiplication when the outer quantifier is skippable (minimum 0) and the inner minimum is at least 2. The existing safe cases are unaffected: an inner minimum of 0 or 1 stays contiguous, and a non-skippable outer such as (A{2,})+ still folds to A{2,}. Per a report from a static analysis tool. --- src/backend/executor/README.rpr | 1 + src/backend/optimizer/plan/rpr.c | 18 +++++- src/test/regress/expected/rpr_explain.out | 41 +++++++++++++ src/test/regress/expected/rpr_nfa.out | 72 +++++++++++++++++++++++ src/test/regress/sql/rpr_explain.sql | 23 ++++++++ src/test/regress/sql/rpr_nfa.sql | 53 +++++++++++++++++ 6 files changed, 207 insertions(+), 1 deletion(-) diff --git a/src/backend/executor/README.rpr b/src/backend/executor/README.rpr index 08418588114..3a215f2566b 100644 --- a/src/backend/executor/README.rpr +++ b/src/backend/executor/README.rpr @@ -232,6 +232,7 @@ applied: (g) Quantifier multiplication: Collapse nested quantifiers when safe (A+)+ -> A+ (A{2,3}){5} -> A{10,15} + (A{2,})* stays as-is (count 1 unreachable; A* would be wrong) (h) Single-child unwrap SEQ(A) -> A, (A){1,1} -> A diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c index 2a1d665c7ee..2a98977b288 100644 --- a/src/backend/optimizer/plan/rpr.c +++ b/src/backend/optimizer/plan/rpr.c @@ -781,7 +781,8 @@ optimizeAltPattern(RPRPatternNode *pattern) * Try to multiply quantifiers. * * Multiplication is SAFE when: - * 1. Both unbounded: (A*)* -> A*, (A+)+ -> A+ + * 1. Both unbounded, with skipless outer or child->min <= 1: + * (A*)* -> A*, (A+)+ -> A+, (A+)* -> A*, (A{2,})+ -> A{2,} * 2. Outer exact: (A{m,n}){k} -> A{m*k, n*k} * 3. Outer range + child {1,1}: (A){2,} -> A{2,} * @@ -789,6 +790,9 @@ optimizeAltPattern(RPRPatternNode *pattern) * - Only child unbounded: (A+){3} has different semantics * - Outer range + child not {1,1}: gaps possible * e.g., (A{2}){2,3} yields 4,6 only (not 4,5,6) + * - Skippable outer (min 0) + child->min >= 2: (A{2,})* reaches + * {0} UNION [child->min, INF), so 1..child->min-1 are unreachable + * and A* would wrongly admit them * * Returns the child node with multiplied quantifiers if successful, * otherwise returns the original pattern unchanged. @@ -816,6 +820,18 @@ tryMultiplyQuantifiers(RPRPatternNode *pattern) /* Case 1: Both unbounded - (A*)* -> A*, (A+)+ -> A+ */ if (child->max == RPR_QUANTITY_INF && pattern->max == RPR_QUANTITY_INF) { + /* + * A skippable outer (min 0) over a child with min >= 2 reaches + * repetition counts {0} UNION [child->min, INF): the counts + * 1..child->min-1 are unreachable, and no single quantifier can + * express that gap. Flattening to A{0,INF} = A* would wrongly admit + * them, e.g. (A{2,})* would match a single A. Multiplication is safe + * here only when child->min <= 1 (the reachable set is then + * contiguous from 0); otherwise leave the pattern unflattened. + */ + if (pattern->min == 0 && child->min >= 2) + return pattern; + new_min_64 = (int64) child->min * pattern->min; if (new_min_64 >= RPR_QUANTITY_INF) return pattern; /* overflow, skip optimization */ diff --git a/src/test/regress/expected/rpr_explain.out b/src/test/regress/expected/rpr_explain.out index 77079d5e8c9..9ba302b11ae 100644 --- a/src/test/regress/expected/rpr_explain.out +++ b/src/test/regress/expected/rpr_explain.out @@ -782,6 +782,47 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=1000.00 loops=1) (10 rows) +-- (A{2,})* must NOT flatten to a* (H-1): counts {0} UNION [2, INF) leave 1 +-- unreachable. The planner keeps it as (a{2,})*, not a*. +CREATE VIEW rpr_ev_nested_quant_no_flatten AS +SELECT count(*) OVER w +FROM generate_series(1, 6) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A{2,})*) + DEFINE A AS v % 3 <> 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_nested_quant_no_flatten'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN ((a{2,})*) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 6) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A{2,})*) + DEFINE A AS v % 3 <> 0 +);'); + rpr_explain_filter +--------------------------------------------------------------------- + WindowAgg (actual rows=6.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2,}")* + Nav Mark Lookback: 0 + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 18 total, 0 merged + NFA Contexts: 3 peak, 7 total, 2 pruned + NFA: 2 matched (len 2/2/2.0), 0 mismatched + NFA: 2 absorbed (len 1/1/1.0), 0 skipped + -> Function Scan on generate_series s (actual rows=6.00 loops=1) +(10 rows) + -- ============================================================ -- Context Statistics Tests (peak, total, pruned + absorbed/skipped) -- ============================================================ diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out index 72dbf080a37..59b91ff9aa4 100644 --- a/src/test/regress/expected/rpr_nfa.out +++ b/src/test/regress/expected/rpr_nfa.out @@ -2275,6 +2275,78 @@ WINDOW w AS ( 6 | {B} | | (6 rows) +-- Nested quantifier flattening must not widen the matching language (H-1). +-- (A{k,})* with k >= 2 reaches repetition counts {0} UNION [k, INF); the gap +-- 1..k-1 is unreachable, so it must NOT collapse to A*. An isolated single A +-- must yield an EMPTY match (count 0), not a length-1 match. +WITH test_nested_quant_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), -- isolated A: (A{2,})* matches empty here, not 1 + (2, ARRAY['_']), + (3, ARRAY['A']), + (4, ARRAY['A']), -- run of 2: matched + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end, + count(*) OVER w AS match_count +FROM test_nested_quant_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A{2,})*) + DEFINE A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end | match_count +----+-------+-------------+-----------+------------- + 1 | {A} | | | 0 + 2 | {_} | | | 0 + 3 | {A} | 3 | 4 | 2 + 4 | {A} | | | 0 + 5 | {_} | | | 0 +(5 rows) + +-- Same for a GROUP child: ((A B){2,})* must not collapse to (A B)*. +-- An isolated single (A B) pair must yield an EMPTY match (count 0). +WITH test_nested_quant_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), -- isolated (A B) pair: matches empty here + (2, ARRAY['B']), + (3, ARRAY['_']), + (4, ARRAY['A']), + (5, ARRAY['B']), + (6, ARRAY['A']), + (7, ARRAY['B']), -- run of 2 pairs: matched + (8, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end, + count(*) OVER w AS match_count +FROM test_nested_quant_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B){2,})*) + DEFINE A AS 'A' = ANY(flags), B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end | match_count +----+-------+-------------+-----------+------------- + 1 | {A} | | | 0 + 2 | {B} | | | 0 + 3 | {_} | | | 0 + 4 | {A} | 4 | 7 | 4 + 5 | {B} | | | 0 + 6 | {A} | | | 0 + 7 | {B} | | | 0 + 8 | {_} | | | 0 +(8 rows) + -- ============================================================ -- Pathological Pattern Runtime Protection -- ============================================================ diff --git a/src/test/regress/sql/rpr_explain.sql b/src/test/regress/sql/rpr_explain.sql index a527615849a..c8b159e30e6 100644 --- a/src/test/regress/sql/rpr_explain.sql +++ b/src/test/regress/sql/rpr_explain.sql @@ -499,6 +499,29 @@ WINDOW w AS ( DEFINE A AS v % 3 = 1, B AS v % 3 = 2 );'); +-- (A{2,})* must NOT flatten to a* (H-1): counts {0} UNION [2, INF) leave 1 +-- unreachable. The planner keeps it as (a{2,})*, not a*. +CREATE VIEW rpr_ev_nested_quant_no_flatten AS +SELECT count(*) OVER w +FROM generate_series(1, 6) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A{2,})*) + DEFINE A AS v % 3 <> 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev_nested_quant_no_flatten'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 6) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A{2,})*) + DEFINE A AS v % 3 <> 0 +);'); + -- ============================================================ -- Context Statistics Tests (peak, total, pruned + absorbed/skipped) -- ============================================================ diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql index 128476aa1d1..febf834565d 100644 --- a/src/test/regress/sql/rpr_nfa.sql +++ b/src/test/regress/sql/rpr_nfa.sql @@ -1583,6 +1583,59 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- Nested quantifier flattening must not widen the matching language (H-1). +-- (A{k,})* with k >= 2 reaches repetition counts {0} UNION [k, INF); the gap +-- 1..k-1 is unreachable, so it must NOT collapse to A*. An isolated single A +-- must yield an EMPTY match (count 0), not a length-1 match. +WITH test_nested_quant_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), -- isolated A: (A{2,})* matches empty here, not 1 + (2, ARRAY['_']), + (3, ARRAY['A']), + (4, ARRAY['A']), -- run of 2: matched + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end, + count(*) OVER w AS match_count +FROM test_nested_quant_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A{2,})*) + DEFINE A AS 'A' = ANY(flags) +); + +-- Same for a GROUP child: ((A B){2,})* must not collapse to (A B)*. +-- An isolated single (A B) pair must yield an EMPTY match (count 0). +WITH test_nested_quant_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), -- isolated (A B) pair: matches empty here + (2, ARRAY['B']), + (3, ARRAY['_']), + (4, ARRAY['A']), + (5, ARRAY['B']), + (6, ARRAY['A']), + (7, ARRAY['B']), -- run of 2 pairs: matched + (8, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end, + count(*) OVER w AS match_count +FROM test_nested_quant_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B){2,})*) + DEFINE A AS 'A' = ANY(flags), B AS 'B' = ANY(flags) +); + -- ============================================================ -- Pathological Pattern Runtime Protection -- ============================================================ -- 2.50.1 (Apple Git-155)